[IMP] removed dependency to pyquery
bzr revid: nicolas.vanhoren@openerp.com-20120905153212-0gi1wjhf9m4xtnml
This commit is contained in:
parent
4e23bcf80a
commit
8d1e3d06ab
|
@ -26,11 +26,15 @@ class TestSanitizer(unittest.TestCase):
|
||||||
def test_simple(self):
|
def test_simple(self):
|
||||||
x = "yop"
|
x = "yop"
|
||||||
self.assertEqual(x, html_sanitize(x))
|
self.assertEqual(x, html_sanitize(x))
|
||||||
|
|
||||||
|
def test_trailing_text(self):
|
||||||
|
x = 'lala<p>yop</p>xxx'
|
||||||
|
self.assertEqual(x, html_sanitize(x))
|
||||||
|
|
||||||
def test_test_case(self):
|
def test_no_exception(self):
|
||||||
html_sanitize(test_case)
|
html_sanitize(test_case)
|
||||||
|
|
||||||
def test_crm(self):
|
def test_unicode(self):
|
||||||
html_sanitize("Merci à l'intérêt pour notre produit.nous vous contacterons bientôt. Merci")
|
html_sanitize("Merci à l'intérêt pour notre produit.nous vous contacterons bientôt. Merci")
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -1,17 +1,22 @@
|
||||||
|
|
||||||
from pyquery import PyQuery as pq
|
import lxml.html
|
||||||
import re
|
import re
|
||||||
|
|
||||||
def html_sanitize(x):
|
def html_sanitize(x):
|
||||||
if not x:
|
if not x:
|
||||||
return x
|
return x
|
||||||
root = pq("<div />")
|
|
||||||
if type(x) == str:
|
if type(x) == str:
|
||||||
x = unicode(x, "utf8", "replace")
|
x = unicode(x, "utf8", "replace")
|
||||||
root.html(x)
|
root = lxml.html.fromstring("<div>%s</div>" % x)
|
||||||
result = handle_element(root[0])
|
result = handle_element(root)
|
||||||
new = pq(result)
|
res = ""
|
||||||
return new.html()
|
for el in children(result[0]):
|
||||||
|
if type(el) == str or type(el) == unicode:
|
||||||
|
res += el
|
||||||
|
else:
|
||||||
|
el.tail = ""
|
||||||
|
res += lxml.html.tostring(el)
|
||||||
|
return res
|
||||||
|
|
||||||
to_remove = set(["script", "head", "meta", "title", "link", "img"])
|
to_remove = set(["script", "head", "meta", "title", "link", "img"])
|
||||||
to_unwrap = set(["html", "body"])
|
to_unwrap = set(["html", "body"])
|
||||||
|
@ -33,7 +38,7 @@ def handle_element(el):
|
||||||
return []
|
return []
|
||||||
if el.tag in to_unwrap:
|
if el.tag in to_unwrap:
|
||||||
return reduce(lambda x,y: x+y, [handle_element(x) for x in children(el)])
|
return reduce(lambda x,y: x+y, [handle_element(x) for x in children(el)])
|
||||||
new = pq("<%s />" % el.tag)[0]
|
new = lxml.html.fromstring("<%s />" % el.tag)
|
||||||
for i in children(el):
|
for i in children(el):
|
||||||
append_to(handle_element(i), new)
|
append_to(handle_element(i), new)
|
||||||
if el.tag in special:
|
if el.tag in special:
|
||||||
|
@ -59,4 +64,4 @@ def append_to(new_ones, el):
|
||||||
else:
|
else:
|
||||||
children[-1].tail = i
|
children[-1].tail = i
|
||||||
else:
|
else:
|
||||||
el.append(i)
|
el.append(i)
|
||||||
|
|
Loading…
Reference in New Issue