bzr revid: fp@openerp.com-20120814163901-chassx588kktn17y
This commit is contained in:
Fabien Pinckaers 2012-08-14 18:39:01 +02:00
commit d6a6522c34
6 changed files with 111 additions and 0 deletions

View File

@ -45,6 +45,7 @@ import openerp.tools as tools
from openerp.tools.translate import _
from openerp.tools import float_round, float_repr
import simplejson
from openerp.tools.html_sanitize import html_sanitize
_logger = logging.getLogger(__name__)
@ -228,6 +229,14 @@ class char(_column):
class text(_column):
_type = 'text'
class html(text):
_type = 'html'
_symbol_c = '%s'
def _symbol_f(x):
return html_sanitize(x)
_symbol_set = (_symbol_c, _symbol_f)
import __builtin__
class float(_column):

View File

@ -545,6 +545,7 @@ FIELDS_TO_PGTYPES = {
fields.boolean: 'bool',
fields.integer: 'int4',
fields.text: 'text',
fields.html: 'text',
fields.date: 'date',
fields.datetime: 'timestamp',
fields.binary: 'bytea',

View File

@ -0,0 +1,37 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import unittest
from openerp.tools.html_sanitize import html_sanitize
test_case = """
<font size="2" style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; ">test1</font>
<div style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; font-size: 12px; font-style: normal; ">
<b>test2</b></div><div style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; font-size: 12px; ">
<i>test3</i></div><div style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; font-size: 12px; ">
<u>test4</u></div><div style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; font-size: 12px; ">
<strike>test5</strike></div><div style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; ">
<font size="5">test6</font></div><div><ul><li><font color="#1f1f1f" face="monospace" size="2">test7</font></li><li>
<font color="#1f1f1f" face="monospace" size="2">test8</font></li></ul><div><ol><li><font color="#1f1f1f" face="monospace" size="2">test9</font>
</li><li><font color="#1f1f1f" face="monospace" size="2">test10</font></li></ol></div></div>
<blockquote style="margin: 0 0 0 40px; border: none; padding: 0px;"><div><div><div><font color="#1f1f1f" face="monospace" size="2">
test11</font></div></div></div></blockquote><blockquote style="margin: 0 0 0 40px; border: none; padding: 0px;">
<blockquote style="margin: 0 0 0 40px; border: none; padding: 0px;"><div><font color="#1f1f1f" face="monospace" size="2">
test12</font></div><div><font color="#1f1f1f" face="monospace" size="2"><br></font></div></blockquote></blockquote>
<font color="#1f1f1f" face="monospace" size="2"><a href="http://google.com">google</a></font>
<a href="javascript:alert('malicious code')">test link</a>
"""
class TestSanitizer(unittest.TestCase):
def test_simple(self):
x = "yop"
self.assertEqual(x, html_sanitize(x))
def test_test_case(self):
html_sanitize(test_case)
def test_crm(self):
html_sanitize("Merci à l'intérêt pour notre produit.nous vous contacterons bientôt. Merci")
if __name__ == '__main__':
unittest.main()

View File

@ -33,6 +33,7 @@ from pdf_utils import *
from yaml_import import *
from sql import *
from float_utils import *
from html_sanitize import *
#.apidoc title: Tools

View File

@ -0,0 +1,62 @@
from pyquery import PyQuery as pq
import re
def html_sanitize(x):
if not x:
return x
root = pq("<div />")
if type(x) == str:
x = unicode(x, "utf8", "replace")
root.html(x)
result = handle_element(root[0])
new = pq(result)
return new.html()
to_remove = set(["script", "head", "meta", "title", "link", "img"])
to_unwrap = set(["html", "body"])
javascript_regex = re.compile("""^\s*javascript\s*\:.*$""")
def handle_a(el, new):
href = el.get("href", "#")
if javascript_regex.search(href):
href = "#"
new.set("href", href)
special = {
"a": handle_a,
}
def handle_element(el):
if type(el) == str or type(el) == unicode:
return [el]
if el.tag in to_remove:
return []
if el.tag in to_unwrap:
return reduce(lambda x,y: x+y, [handle_element(x) for x in children(el)])
new = pq("<%s />" % el.tag)[0]
for i in children(el):
append_to(handle_element(i), new)
if el.tag in special:
special[el.tag](el, new)
return [new]
def children(el):
res = []
if el.text is not None:
res.append(el.text)
for i in el.getchildren():
res.append(i)
if i.tail is not None:
res.append(i.tail)
return res
def append_to(new_ones, el):
for i in new_ones:
if type(i) == str or type(i) == unicode:
children = el.getchildren()
if len(children) == 0:
el.text = i
else:
children[-1].tail = i
else:
el.append(i)

View File

@ -102,6 +102,7 @@ setuptools.setup(
'mako',
'psycopg2',
'pydot',
'pyquery',
'python-dateutil < 2',
'python-ldap',
'python-openid',