From 8d1e3d06abae19598ce1bd854789327f56ef2536 Mon Sep 17 00:00:00 2001 From: niv-openerp Date: Wed, 5 Sep 2012 17:32:12 +0200 Subject: [PATCH] [IMP] removed dependency to pyquery bzr revid: nicolas.vanhoren@openerp.com-20120905153212-0gi1wjhf9m4xtnml --- openerp/tests/test_html_sanitize.py | 8 ++++++-- openerp/tools/html_sanitize.py | 21 +++++++++++++-------- setup.py | 1 - 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/openerp/tests/test_html_sanitize.py b/openerp/tests/test_html_sanitize.py index 90ccc88cc1d..cb46b325144 100755 --- a/openerp/tests/test_html_sanitize.py +++ b/openerp/tests/test_html_sanitize.py @@ -26,11 +26,15 @@ class TestSanitizer(unittest.TestCase): def test_simple(self): x = "yop" self.assertEqual(x, html_sanitize(x)) + + def test_trailing_text(self): + x = 'lala

yop

xxx' + self.assertEqual(x, html_sanitize(x)) - def test_test_case(self): + def test_no_exception(self): html_sanitize(test_case) - def test_crm(self): + def test_unicode(self): html_sanitize("Merci à l'intérêt pour notre produit.nous vous contacterons bientôt. Merci") if __name__ == '__main__': diff --git a/openerp/tools/html_sanitize.py b/openerp/tools/html_sanitize.py index 5164ceb276e..65f17be074f 100644 --- a/openerp/tools/html_sanitize.py +++ b/openerp/tools/html_sanitize.py @@ -1,17 +1,22 @@ -from pyquery import PyQuery as pq +import lxml.html import re def html_sanitize(x): if not x: return x - root = pq("
") if type(x) == str: x = unicode(x, "utf8", "replace") - root.html(x) - result = handle_element(root[0]) - new = pq(result) - return new.html() + root = lxml.html.fromstring("
%s
" % x) + result = handle_element(root) + res = "" + for el in children(result[0]): + if type(el) == str or type(el) == unicode: + res += el + else: + el.tail = "" + res += lxml.html.tostring(el) + return res to_remove = set(["script", "head", "meta", "title", "link", "img"]) to_unwrap = set(["html", "body"]) @@ -33,7 +38,7 @@ def handle_element(el): return [] if el.tag in to_unwrap: return reduce(lambda x,y: x+y, [handle_element(x) for x in children(el)]) - new = pq("<%s />" % el.tag)[0] + new = lxml.html.fromstring("<%s />" % el.tag) for i in children(el): append_to(handle_element(i), new) if el.tag in special: @@ -59,4 +64,4 @@ def append_to(new_ones, el): else: children[-1].tail = i else: - el.append(i) \ No newline at end of file + el.append(i) diff --git a/setup.py b/setup.py index a033921e70d..a26b74e255d 100755 --- a/setup.py +++ b/setup.py @@ -99,7 +99,6 @@ setuptools.setup( 'mako', 'psycopg2', 'pydot', - 'pyquery', 'python-dateutil < 2', 'python-ldap', 'python-openid',