[FIX] tools: html_sanitize
bzr revid: chm@openerp.com-20121231154451-0guqjid92rfndghp
This commit is contained in:
parent
de4ba1084a
commit
71a92f46e4
|
@ -229,6 +229,14 @@ class TestSanitizer(unittest2.TestCase):
|
||||||
for attr in ['javascript']:
|
for attr in ['javascript']:
|
||||||
self.assertNotIn(attr, sanitized_html, 'html_sanitize did not remove enough unwanted attributes')
|
self.assertNotIn(attr, sanitized_html, 'html_sanitize did not remove enough unwanted attributes')
|
||||||
|
|
||||||
|
emails =[("Charles <charles.bidule@truc.fr>", "<p>Charles <charles.bidule@truc.fr></p>"),
|
||||||
|
("Dupuis <'tr/-:dupuis><#><$'@truc.baz.fr>", "<p>Dupuis <'tr/-:dupuis><#><$'@truc.baz.fr></p>"),
|
||||||
|
("Technical <service/technical+2@open.com>", "<p>Technical <service/technical+2@open.com></p>"),
|
||||||
|
("Div nico <div-nico@open.com>", "<p>Div nico <div-nico@open.com></p>")]
|
||||||
|
for email in emails:
|
||||||
|
self.assertEqual(email[1], html_sanitize(email[0]), 'html_sanitize stripped emails of original html')
|
||||||
|
|
||||||
|
|
||||||
def test_edi_source(self):
|
def test_edi_source(self):
|
||||||
html = html_sanitize(EDI_LIKE_HTML_SOURCE)
|
html = html_sanitize(EDI_LIKE_HTML_SOURCE)
|
||||||
self.assertIn('div style="font-family: \'Lucica Grande\', Ubuntu, Arial, Verdana, sans-serif; font-size: 12px; color: rgb(34, 34, 34); background-color: #FFF;', html,
|
self.assertIn('div style="font-family: \'Lucica Grande\', Ubuntu, Arial, Verdana, sans-serif; font-size: 12px; color: rgb(34, 34, 34); background-color: #FFF;', html,
|
||||||
|
|
|
@ -48,6 +48,11 @@ def html_sanitize(src):
|
||||||
if not src:
|
if not src:
|
||||||
return src
|
return src
|
||||||
src = ustr(src, errors='replace')
|
src = ustr(src, errors='replace')
|
||||||
|
|
||||||
|
# html encode email tags
|
||||||
|
part = re.compile(r"(<\s*[^\s]+@[^\s]+\s*>)", re.IGNORECASE | re.DOTALL)
|
||||||
|
src = part.sub(lambda m: cgi.escape(m.group(1)), src)
|
||||||
|
|
||||||
# some corner cases make the parser crash (such as <SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT> in test_mail)
|
# some corner cases make the parser crash (such as <SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT> in test_mail)
|
||||||
try:
|
try:
|
||||||
cleaner = clean.Cleaner(page_structure=True, style=False, safe_attrs_only=False, forms=False, kill_tags=tags_to_kill, remove_tags=tags_to_remove)
|
cleaner = clean.Cleaner(page_structure=True, style=False, safe_attrs_only=False, forms=False, kill_tags=tags_to_kill, remove_tags=tags_to_remove)
|
||||||
|
|
Loading…
Reference in New Issue