[FIX] tools: html_sanitize

bzr revid: chm@openerp.com-20121231154451-0guqjid92rfndghp
This commit is contained in:
Christophe Matthieu 2012-12-31 16:44:51 +01:00
parent de4ba1084a
commit 71a92f46e4
2 changed files with 13 additions and 0 deletions

View File

@ -229,6 +229,14 @@ class TestSanitizer(unittest2.TestCase):
for attr in ['javascript']:
self.assertNotIn(attr, sanitized_html, 'html_sanitize did not remove enough unwanted attributes')
emails =[("Charles <charles.bidule@truc.fr>", "<p>Charles &lt;charles.bidule@truc.fr&gt;</p>"),
("Dupuis <'tr/-:dupuis><#><$'@truc.baz.fr>", "<p>Dupuis &lt;'tr/-:dupuis&gt;&lt;#&gt;&lt;$'@truc.baz.fr&gt;</p>"),
("Technical <service/technical+2@open.com>", "<p>Technical &lt;service/technical+2@open.com&gt;</p>"),
("Div nico <div-nico@open.com>", "<p>Div nico &lt;div-nico@open.com&gt;</p>")]
for email in emails:
self.assertEqual(email[1], html_sanitize(email[0]), 'html_sanitize stripped emails of original html')
def test_edi_source(self):
html = html_sanitize(EDI_LIKE_HTML_SOURCE)
self.assertIn('div style="font-family: \'Lucica Grande\', Ubuntu, Arial, Verdana, sans-serif; font-size: 12px; color: rgb(34, 34, 34); background-color: #FFF;', html,

View File

@ -48,6 +48,11 @@ def html_sanitize(src):
if not src:
return src
src = ustr(src, errors='replace')
# html encode email tags
part = re.compile(r"(<\s*[^\s]+@[^\s]+\s*>)", re.IGNORECASE | re.DOTALL)
src = part.sub(lambda m: cgi.escape(m.group(1)), src)
# some corner cases make the parser crash (such as <SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT> in test_mail)
try:
cleaner = clean.Cleaner(page_structure=True, style=False, safe_attrs_only=False, forms=False, kill_tags=tags_to_kill, remove_tags=tags_to_remove)