[IMP] tools.html2plaintext: consistent use of lxml.etree.HTMLParser to convert HTML to plaintext
We used to switch to using BeautifulSoup when available, but that lead to inconsistent behavior depending on the installed Python packages, and sometimes lead to bad surprises. There is no advantage in using BeautifulSoup rather than HTMLParser, and the latter is always available. bzr revid: odo@openerp.com-20121015120934-njaylf99dc5zekfw
This commit is contained in:
parent
99c4f31111
commit
e06e3aad4a
|
@ -312,16 +312,8 @@ def html2plaintext(html, body_id=None, encoding='utf-8'):
|
|||
|
||||
html = ustr(html)
|
||||
|
||||
from lxml.etree import tostring
|
||||
try:
|
||||
from lxml.html.soupparser import fromstring
|
||||
kwargs = {}
|
||||
except ImportError:
|
||||
_logger.debug('tools.misc.html2plaintext: cannot use BeautifulSoup, fallback to lxml.etree.HTMLParser')
|
||||
from lxml.etree import fromstring, HTMLParser
|
||||
kwargs = dict(parser=HTMLParser())
|
||||
|
||||
tree = fromstring(html, **kwargs)
|
||||
from lxml.etree import tostring, fromstring, HTMLParser
|
||||
tree = fromstring(html, parser=HTMLParser())
|
||||
|
||||
if body_id is not None:
|
||||
source = tree.xpath('//*[@id=%s]'%(body_id,))
|
||||
|
|
Loading…
Reference in New Issue