[IMP] html_sanitize: keep email adresses formatted like '\<raoul@grosbedon.fr\>' in the html to sanitize; before this revision they were simply removed as incorrect html code.

bzr revid: tde@openerp.com-20130103135649-ngbhs3n7pa86qnl5
This commit is contained in:
Thibault Delavallée 2013-01-03 14:56:49 +01:00
commit 0f4cd7b133
2 changed files with 13 additions and 0 deletions

View File

@ -229,6 +229,14 @@ class TestSanitizer(unittest2.TestCase):
for attr in ['javascript']:
self.assertNotIn(attr, sanitized_html, 'html_sanitize did not remove enough unwanted attributes')
emails =[("Charles <charles.bidule@truc.fr>", "Charles &lt;charles.bidule@truc.fr&gt;"),
("Dupuis <'tr/-: ${dupuis#$'@truc.baz.fr>", "Dupuis &lt;'tr/-: ${dupuis#$'@truc.baz.fr&gt;"),
("Technical <service/technical+2@open.com>", "Technical &lt;service/technical+2@open.com&gt;"),
("Div nico <div-nico@open.com>", "Div nico &lt;div-nico@open.com&gt;")]
for email in emails:
self.assertIn(email[1], html_sanitize(email[0]), 'html_sanitize stripped emails of original html')
def test_edi_source(self):
html = html_sanitize(EDI_LIKE_HTML_SOURCE)
self.assertIn('div style="font-family: \'Lucica Grande\', Ubuntu, Arial, Verdana, sans-serif; font-size: 12px; color: rgb(34, 34, 34); background-color: #FFF;', html,

View File

@ -48,6 +48,11 @@ def html_sanitize(src):
if not src:
return src
src = ustr(src, errors='replace')
# html encode email tags
part = re.compile(r"(<[^<>]+@[^<>]+>)", re.IGNORECASE | re.DOTALL)
src = part.sub(lambda m: cgi.escape(m.group(1)), src)
# some corner cases make the parser crash (such as <SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT> in test_mail)
try:
cleaner = clean.Cleaner(page_structure=True, style=False, safe_attrs_only=False, forms=False, kill_tags=tags_to_kill, remove_tags=tags_to_remove)