commit
d6a6522c34
|
@ -45,6 +45,7 @@ import openerp.tools as tools
|
||||||
from openerp.tools.translate import _
|
from openerp.tools.translate import _
|
||||||
from openerp.tools import float_round, float_repr
|
from openerp.tools import float_round, float_repr
|
||||||
import simplejson
|
import simplejson
|
||||||
|
from openerp.tools.html_sanitize import html_sanitize
|
||||||
|
|
||||||
_logger = logging.getLogger(__name__)
|
_logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -228,6 +229,14 @@ class char(_column):
|
||||||
class text(_column):
|
class text(_column):
|
||||||
_type = 'text'
|
_type = 'text'
|
||||||
|
|
||||||
|
class html(text):
|
||||||
|
_type = 'html'
|
||||||
|
_symbol_c = '%s'
|
||||||
|
def _symbol_f(x):
|
||||||
|
return html_sanitize(x)
|
||||||
|
|
||||||
|
_symbol_set = (_symbol_c, _symbol_f)
|
||||||
|
|
||||||
import __builtin__
|
import __builtin__
|
||||||
|
|
||||||
class float(_column):
|
class float(_column):
|
||||||
|
|
|
@ -545,6 +545,7 @@ FIELDS_TO_PGTYPES = {
|
||||||
fields.boolean: 'bool',
|
fields.boolean: 'bool',
|
||||||
fields.integer: 'int4',
|
fields.integer: 'int4',
|
||||||
fields.text: 'text',
|
fields.text: 'text',
|
||||||
|
fields.html: 'text',
|
||||||
fields.date: 'date',
|
fields.date: 'date',
|
||||||
fields.datetime: 'timestamp',
|
fields.datetime: 'timestamp',
|
||||||
fields.binary: 'bytea',
|
fields.binary: 'bytea',
|
||||||
|
|
|
@ -0,0 +1,37 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
import unittest
|
||||||
|
from openerp.tools.html_sanitize import html_sanitize
|
||||||
|
|
||||||
|
test_case = """
|
||||||
|
<font size="2" style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; ">test1</font>
|
||||||
|
<div style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; font-size: 12px; font-style: normal; ">
|
||||||
|
<b>test2</b></div><div style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; font-size: 12px; ">
|
||||||
|
<i>test3</i></div><div style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; font-size: 12px; ">
|
||||||
|
<u>test4</u></div><div style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; font-size: 12px; ">
|
||||||
|
<strike>test5</strike></div><div style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; ">
|
||||||
|
<font size="5">test6</font></div><div><ul><li><font color="#1f1f1f" face="monospace" size="2">test7</font></li><li>
|
||||||
|
<font color="#1f1f1f" face="monospace" size="2">test8</font></li></ul><div><ol><li><font color="#1f1f1f" face="monospace" size="2">test9</font>
|
||||||
|
</li><li><font color="#1f1f1f" face="monospace" size="2">test10</font></li></ol></div></div>
|
||||||
|
<blockquote style="margin: 0 0 0 40px; border: none; padding: 0px;"><div><div><div><font color="#1f1f1f" face="monospace" size="2">
|
||||||
|
test11</font></div></div></div></blockquote><blockquote style="margin: 0 0 0 40px; border: none; padding: 0px;">
|
||||||
|
<blockquote style="margin: 0 0 0 40px; border: none; padding: 0px;"><div><font color="#1f1f1f" face="monospace" size="2">
|
||||||
|
test12</font></div><div><font color="#1f1f1f" face="monospace" size="2"><br></font></div></blockquote></blockquote>
|
||||||
|
<font color="#1f1f1f" face="monospace" size="2"><a href="http://google.com">google</a></font>
|
||||||
|
<a href="javascript:alert('malicious code')">test link</a>
|
||||||
|
"""
|
||||||
|
|
||||||
|
class TestSanitizer(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_simple(self):
|
||||||
|
x = "yop"
|
||||||
|
self.assertEqual(x, html_sanitize(x))
|
||||||
|
|
||||||
|
def test_test_case(self):
|
||||||
|
html_sanitize(test_case)
|
||||||
|
|
||||||
|
def test_crm(self):
|
||||||
|
html_sanitize("Merci à l'intérêt pour notre produit.nous vous contacterons bientôt. Merci")
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
|
@ -33,6 +33,7 @@ from pdf_utils import *
|
||||||
from yaml_import import *
|
from yaml_import import *
|
||||||
from sql import *
|
from sql import *
|
||||||
from float_utils import *
|
from float_utils import *
|
||||||
|
from html_sanitize import *
|
||||||
|
|
||||||
#.apidoc title: Tools
|
#.apidoc title: Tools
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,62 @@
|
||||||
|
|
||||||
|
from pyquery import PyQuery as pq
|
||||||
|
import re
|
||||||
|
|
||||||
|
def html_sanitize(x):
|
||||||
|
if not x:
|
||||||
|
return x
|
||||||
|
root = pq("<div />")
|
||||||
|
if type(x) == str:
|
||||||
|
x = unicode(x, "utf8", "replace")
|
||||||
|
root.html(x)
|
||||||
|
result = handle_element(root[0])
|
||||||
|
new = pq(result)
|
||||||
|
return new.html()
|
||||||
|
|
||||||
|
to_remove = set(["script", "head", "meta", "title", "link", "img"])
|
||||||
|
to_unwrap = set(["html", "body"])
|
||||||
|
|
||||||
|
javascript_regex = re.compile("""^\s*javascript\s*\:.*$""")
|
||||||
|
def handle_a(el, new):
|
||||||
|
href = el.get("href", "#")
|
||||||
|
if javascript_regex.search(href):
|
||||||
|
href = "#"
|
||||||
|
new.set("href", href)
|
||||||
|
special = {
|
||||||
|
"a": handle_a,
|
||||||
|
}
|
||||||
|
|
||||||
|
def handle_element(el):
|
||||||
|
if type(el) == str or type(el) == unicode:
|
||||||
|
return [el]
|
||||||
|
if el.tag in to_remove:
|
||||||
|
return []
|
||||||
|
if el.tag in to_unwrap:
|
||||||
|
return reduce(lambda x,y: x+y, [handle_element(x) for x in children(el)])
|
||||||
|
new = pq("<%s />" % el.tag)[0]
|
||||||
|
for i in children(el):
|
||||||
|
append_to(handle_element(i), new)
|
||||||
|
if el.tag in special:
|
||||||
|
special[el.tag](el, new)
|
||||||
|
return [new]
|
||||||
|
|
||||||
|
def children(el):
|
||||||
|
res = []
|
||||||
|
if el.text is not None:
|
||||||
|
res.append(el.text)
|
||||||
|
for i in el.getchildren():
|
||||||
|
res.append(i)
|
||||||
|
if i.tail is not None:
|
||||||
|
res.append(i.tail)
|
||||||
|
return res
|
||||||
|
|
||||||
|
def append_to(new_ones, el):
|
||||||
|
for i in new_ones:
|
||||||
|
if type(i) == str or type(i) == unicode:
|
||||||
|
children = el.getchildren()
|
||||||
|
if len(children) == 0:
|
||||||
|
el.text = i
|
||||||
|
else:
|
||||||
|
children[-1].tail = i
|
||||||
|
else:
|
||||||
|
el.append(i)
|
Loading…
Reference in New Issue