[IMP] tools: html_email_clean: a bit more robust.
bzr revid: tde@openerp.com-20121114141452-n5jy3yzbmlbd3811
This commit is contained in:
parent
8e15ab0dff
commit
23743683ea
|
@ -133,9 +133,8 @@ def html_email_clean(html):
|
|||
html = ustr(html)
|
||||
|
||||
# 1. <br[ /]> -> \n, because otherwise the tree is obfuscated
|
||||
br_tags = re.compile(r'([<]\s*br\s*\/?[>])')
|
||||
br_tags = re.compile(r'([<]\s*[bB][rR]\s*\/?[>])')
|
||||
html = _replace_matching_regex(br_tags, html, '__BR_TAG__')
|
||||
# TDE note: seems to have lots of <div><br></div> in emails... needs to be checks, could be cleaned
|
||||
|
||||
# 2. form a tree, handle (currently ?) pure-text by enclosing them in a pre
|
||||
root = lxml.html.fromstring(html)
|
||||
|
|
Loading…
Reference in New Issue