[IMP] tools: html_email_clean: a bit more robust.
bzr revid: tde@openerp.com-20121114141452-n5jy3yzbmlbd3811
This commit is contained in:
parent
8e15ab0dff
commit
23743683ea
|
@ -133,9 +133,8 @@ def html_email_clean(html):
|
||||||
html = ustr(html)
|
html = ustr(html)
|
||||||
|
|
||||||
# 1. <br[ /]> -> \n, because otherwise the tree is obfuscated
|
# 1. <br[ /]> -> \n, because otherwise the tree is obfuscated
|
||||||
br_tags = re.compile(r'([<]\s*br\s*\/?[>])')
|
br_tags = re.compile(r'([<]\s*[bB][rR]\s*\/?[>])')
|
||||||
html = _replace_matching_regex(br_tags, html, '__BR_TAG__')
|
html = _replace_matching_regex(br_tags, html, '__BR_TAG__')
|
||||||
# TDE note: seems to have lots of <div><br></div> in emails... needs to be checks, could be cleaned
|
|
||||||
|
|
||||||
# 2. form a tree, handle (currently ?) pure-text by enclosing them in a pre
|
# 2. form a tree, handle (currently ?) pure-text by enclosing them in a pre
|
||||||
root = lxml.html.fromstring(html)
|
root = lxml.html.fromstring(html)
|
||||||
|
|
Loading…
Reference in New Issue