First working version of the html sanitizer

bzr revid: nicolas.vanhoren@openerp.com-20120813142232-xn7h0ov7mb3pls4o
This commit is contained in:
niv-openerp 2012-08-13 16:22:32 +02:00
parent 11780a2267
commit e5fb45a329
3 changed files with 70 additions and 1 deletions

View File

@ -0,0 +1,33 @@
#!/usr/bin/env python
import unittest
from openerp.tools.html_sanitize import html_sanitize
test_case = """
<font size="2" style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; ">test1</font>
<div style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; font-size: 12px; font-style: normal; ">
<b>test2</b></div><div style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; font-size: 12px; ">
<i>test3</i></div><div style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; font-size: 12px; ">
<u>test4</u></div><div style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; font-size: 12px; ">
<strike>test5</strike></div><div style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; ">
<font size="5">test6</font></div><div><ul><li><font color="#1f1f1f" face="monospace" size="2">test7</font></li><li>
<font color="#1f1f1f" face="monospace" size="2">test8</font></li></ul><div><ol><li><font color="#1f1f1f" face="monospace" size="2">test9</font>
</li><li><font color="#1f1f1f" face="monospace" size="2">test10</font></li></ol></div></div>
<blockquote style="margin: 0 0 0 40px; border: none; padding: 0px;"><div><div><div><font color="#1f1f1f" face="monospace" size="2">
test11</font></div></div></div></blockquote><blockquote style="margin: 0 0 0 40px; border: none; padding: 0px;">
<blockquote style="margin: 0 0 0 40px; border: none; padding: 0px;"><div><font color="#1f1f1f" face="monospace" size="2">
test12</font></div><div><font color="#1f1f1f" face="monospace" size="2"><br></font></div></blockquote></blockquote>
<font color="#1f1f1f" face="monospace" size="2"><a href="http://google.com">google</a></font>
"""
class TestSanitizer(unittest.TestCase):
def test_simple(self):
x = "yop"
self.assertEqual(x, html_sanitize(x))
def test_test_case(self):
res = html_sanitize(test_case)
print res
if __name__ == '__main__':
unittest.main()

View File

@ -33,6 +33,7 @@ from pdf_utils import *
from yaml_import import *
from sql import *
from float_utils import *
from html_sanitize import *
#.apidoc title: Tools

View File

@ -1,4 +1,39 @@
from pyquery import PyQuery as pq
def html_sanitize(x):
return x
root = pq("<div />")
root.html(x)
result = handle_element(root[0])
new = pq(result)
return new.html()
def handle_element(el):
if type(el) == str or type(el) == unicode:
return [el]
else:
new = pq("<%s />" % el.tag)[0]
for i in children(el):
append_to(handle_element(i), new)
return [new]
def children(el):
res = []
if el.text is not None:
res.append(el.text)
for i in el.getchildren():
res.append(i)
if i.tail is not None:
res.append(i.tail)
return res
def append_to(new_ones, el):
for i in new_ones:
if type(i) == str or type(i) == unicode:
children = el.getchildren()
if len(children) == 0:
el.text = i
else:
children[-1].tail = i
else:
el.append(i)