[FIX] Don't remove data-... attributes like data-snippet-id

bzr revid: fp@tinyerp.com-20131209185406-newaf16v532l146p
This commit is contained in:
Fabien Pinckaers 2013-12-09 19:54:06 +01:00
parent 1731f6158d
commit 630e9ad64c
1 changed files with 3 additions and 10 deletions

View File

@ -75,22 +75,15 @@ def html_sanitize(src, silent=True):
else:
kwargs['remove_tags'] = tags_to_kill + tags_to_remove
if etree.LXML_VERSION >= (3, 1, 0):
kwargs.update({
'safe_attrs_only': True,
'safe_attrs': safe_attrs,
})
else:
# lxml < 3.1.0 does not allow to specify safe_attrs. We keep all attributes in order to keep "style"
kwargs['safe_attrs_only'] = False
kwargs['safe_attrs_only'] = False
try:
# some corner cases make the parser crash (such as <SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT> in test_mail)
cleaner = clean.Cleaner(**kwargs)
cleaned = cleaner.clean_html(src)
except etree.ParserError, e:
if 'empty' in str(e):
return ""
if 'empty' in str(e):
return ""
if not silent:
raise
logger.warning('ParserError obtained when sanitizing %r', src, exc_info=True)