[IMP] removed dependency to pyquery

bzr revid: nicolas.vanhoren@openerp.com-20120905153212-0gi1wjhf9m4xtnml
This commit is contained in:
niv-openerp 2012-09-05 17:32:12 +02:00
parent 4e23bcf80a
commit 8d1e3d06ab
3 changed files with 19 additions and 11 deletions

View File

@ -26,11 +26,15 @@ class TestSanitizer(unittest.TestCase):
def test_simple(self):
x = "yop"
self.assertEqual(x, html_sanitize(x))
def test_trailing_text(self):
x = 'lala<p>yop</p>xxx'
self.assertEqual(x, html_sanitize(x))
def test_test_case(self):
def test_no_exception(self):
html_sanitize(test_case)
def test_crm(self):
def test_unicode(self):
html_sanitize("Merci à l'intérêt pour notre produit.nous vous contacterons bientôt. Merci")
if __name__ == '__main__':

View File

@ -1,17 +1,22 @@
from pyquery import PyQuery as pq
import lxml.html
import re
def html_sanitize(x):
if not x:
return x
root = pq("<div />")
if type(x) == str:
x = unicode(x, "utf8", "replace")
root.html(x)
result = handle_element(root[0])
new = pq(result)
return new.html()
root = lxml.html.fromstring("<div>%s</div>" % x)
result = handle_element(root)
res = ""
for el in children(result[0]):
if type(el) == str or type(el) == unicode:
res += el
else:
el.tail = ""
res += lxml.html.tostring(el)
return res
to_remove = set(["script", "head", "meta", "title", "link", "img"])
to_unwrap = set(["html", "body"])
@ -33,7 +38,7 @@ def handle_element(el):
return []
if el.tag in to_unwrap:
return reduce(lambda x,y: x+y, [handle_element(x) for x in children(el)])
new = pq("<%s />" % el.tag)[0]
new = lxml.html.fromstring("<%s />" % el.tag)
for i in children(el):
append_to(handle_element(i), new)
if el.tag in special:
@ -59,4 +64,4 @@ def append_to(new_ones, el):
else:
children[-1].tail = i
else:
el.append(i)
el.append(i)

View File

@ -99,7 +99,6 @@ setuptools.setup(
'mako',
'psycopg2',
'pydot',
'pyquery',
'python-dateutil < 2',
'python-ldap',
'python-openid',