From e06e3aad4a98450300018ceecf502b789224301a Mon Sep 17 00:00:00 2001 From: Olivier Dony Date: Mon, 15 Oct 2012 14:09:34 +0200 Subject: [PATCH] [IMP] tools.html2plaintext: consistent use of lxml.etree.HTMLParser to convert HTML to plaintext We used to switch to using BeautifulSoup when available, but that lead to inconsistent behavior depending on the installed Python packages, and sometimes lead to bad surprises. There is no advantage in using BeautifulSoup rather than HTMLParser, and the latter is always available. bzr revid: odo@openerp.com-20121015120934-njaylf99dc5zekfw --- openerp/tools/misc.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/openerp/tools/misc.py b/openerp/tools/misc.py index 130db6f0d4e..c9283a839d4 100644 --- a/openerp/tools/misc.py +++ b/openerp/tools/misc.py @@ -312,16 +312,8 @@ def html2plaintext(html, body_id=None, encoding='utf-8'): html = ustr(html) - from lxml.etree import tostring - try: - from lxml.html.soupparser import fromstring - kwargs = {} - except ImportError: - _logger.debug('tools.misc.html2plaintext: cannot use BeautifulSoup, fallback to lxml.etree.HTMLParser') - from lxml.etree import fromstring, HTMLParser - kwargs = dict(parser=HTMLParser()) - - tree = fromstring(html, **kwargs) + from lxml.etree import tostring, fromstring, HTMLParser + tree = fromstring(html, parser=HTMLParser()) if body_id is not None: source = tree.xpath('//*[@id=%s]'%(body_id,))