diff --git a/openerp/osv/fields.py b/openerp/osv/fields.py index 7717c1fbf51..d6a5f4f63d8 100644 --- a/openerp/osv/fields.py +++ b/openerp/osv/fields.py @@ -45,6 +45,7 @@ import openerp.tools as tools from openerp.tools.translate import _ from openerp.tools import float_round, float_repr import simplejson +from openerp.tools.html_sanitize import html_sanitize _logger = logging.getLogger(__name__) @@ -228,6 +229,14 @@ class char(_column): class text(_column): _type = 'text' +class html(text): + _type = 'html' + _symbol_c = '%s' + def _symbol_f(x): + return html_sanitize(x) + + _symbol_set = (_symbol_c, _symbol_f) + import __builtin__ class float(_column): diff --git a/openerp/osv/orm.py b/openerp/osv/orm.py index 3ae9379697b..7c8085ea8ed 100644 --- a/openerp/osv/orm.py +++ b/openerp/osv/orm.py @@ -545,6 +545,7 @@ FIELDS_TO_PGTYPES = { fields.boolean: 'bool', fields.integer: 'int4', fields.text: 'text', + fields.html: 'text', fields.date: 'date', fields.datetime: 'timestamp', fields.binary: 'bytea', diff --git a/openerp/tests/test_html_sanitize.py b/openerp/tests/test_html_sanitize.py new file mode 100755 index 00000000000..90ccc88cc1d --- /dev/null +++ b/openerp/tests/test_html_sanitize.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import unittest +from openerp.tools.html_sanitize import html_sanitize + +test_case = """ +test1 +
+test2
+test3
+test4
+test5
+test6
  1. test9 +
  2. test10
+
+test11
+
+test12

+google +test link +""" + +class TestSanitizer(unittest.TestCase): + + def test_simple(self): + x = "yop" + self.assertEqual(x, html_sanitize(x)) + + def test_test_case(self): + html_sanitize(test_case) + + def test_crm(self): + html_sanitize("Merci à l'intérêt pour notre produit.nous vous contacterons bientôt. Merci") + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/openerp/tools/__init__.py b/openerp/tools/__init__.py index a6eeb14ba96..a0ca411a9df 100644 --- a/openerp/tools/__init__.py +++ b/openerp/tools/__init__.py @@ -33,6 +33,7 @@ from pdf_utils import * from yaml_import import * from sql import * from float_utils import * +from html_sanitize import * #.apidoc title: Tools diff --git a/openerp/tools/html_sanitize.py b/openerp/tools/html_sanitize.py new file mode 100644 index 00000000000..5164ceb276e --- /dev/null +++ b/openerp/tools/html_sanitize.py @@ -0,0 +1,62 @@ + +from pyquery import PyQuery as pq +import re + +def html_sanitize(x): + if not x: + return x + root = pq("
") + if type(x) == str: + x = unicode(x, "utf8", "replace") + root.html(x) + result = handle_element(root[0]) + new = pq(result) + return new.html() + +to_remove = set(["script", "head", "meta", "title", "link", "img"]) +to_unwrap = set(["html", "body"]) + +javascript_regex = re.compile("""^\s*javascript\s*\:.*$""") +def handle_a(el, new): + href = el.get("href", "#") + if javascript_regex.search(href): + href = "#" + new.set("href", href) +special = { + "a": handle_a, +} + +def handle_element(el): + if type(el) == str or type(el) == unicode: + return [el] + if el.tag in to_remove: + return [] + if el.tag in to_unwrap: + return reduce(lambda x,y: x+y, [handle_element(x) for x in children(el)]) + new = pq("<%s />" % el.tag)[0] + for i in children(el): + append_to(handle_element(i), new) + if el.tag in special: + special[el.tag](el, new) + return [new] + +def children(el): + res = [] + if el.text is not None: + res.append(el.text) + for i in el.getchildren(): + res.append(i) + if i.tail is not None: + res.append(i.tail) + return res + +def append_to(new_ones, el): + for i in new_ones: + if type(i) == str or type(i) == unicode: + children = el.getchildren() + if len(children) == 0: + el.text = i + else: + children[-1].tail = i + else: + el.append(i) \ No newline at end of file diff --git a/setup.py b/setup.py index 6e1adadde82..259207d9a77 100755 --- a/setup.py +++ b/setup.py @@ -102,6 +102,7 @@ setuptools.setup( 'mako', 'psycopg2', 'pydot', + 'pyquery', 'python-dateutil < 2', 'python-ldap', 'python-openid',