From 7341bd30cc02a4360406864a17fd2e8b801cd6b3 Mon Sep 17 00:00:00 2001 From: niv-openerp Date: Mon, 13 Aug 2012 13:46:19 +0200 Subject: [PATCH 01/10] Added html field type bzr revid: nicolas.vanhoren@openerp.com-20120813114619-lqr9zu0rkz6737rp --- openerp/osv/fields.py | 3 +++ openerp/osv/orm.py | 1 + 2 files changed, 4 insertions(+) diff --git a/openerp/osv/fields.py b/openerp/osv/fields.py index 4ff23956050..b361ab664b6 100644 --- a/openerp/osv/fields.py +++ b/openerp/osv/fields.py @@ -227,6 +227,9 @@ class char(_column): class text(_column): _type = 'text' +class html(text): + _type = 'html' + import __builtin__ class float(_column): diff --git a/openerp/osv/orm.py b/openerp/osv/orm.py index 563c514726c..069664cfebe 100644 --- a/openerp/osv/orm.py +++ b/openerp/osv/orm.py @@ -545,6 +545,7 @@ FIELDS_TO_PGTYPES = { fields.boolean: 'bool', fields.integer: 'int4', fields.text: 'text', + fields.html: 'text', fields.date: 'date', fields.datetime: 'timestamp', fields.binary: 'bytea', From 11780a2267e699b5fdfd183cad539f051737ec6e Mon Sep 17 00:00:00 2001 From: niv-openerp Date: Mon, 13 Aug 2012 14:53:07 +0200 Subject: [PATCH 02/10] Added some code to sanitize html bzr revid: nicolas.vanhoren@openerp.com-20120813125307-d8cycdvtd7ad0f8e --- openerp/osv/fields.py | 6 ++++++ openerp/tools/html_sanitize.py | 4 ++++ 2 files changed, 10 insertions(+) create mode 100644 openerp/tools/html_sanitize.py diff --git a/openerp/osv/fields.py b/openerp/osv/fields.py index b361ab664b6..249f9d5015d 100644 --- a/openerp/osv/fields.py +++ b/openerp/osv/fields.py @@ -45,6 +45,7 @@ import openerp.tools as tools from openerp.tools.translate import _ from openerp.tools import float_round, float_repr import simplejson +from openerp.tools.html_sanitize import html_sanitize _logger = logging.getLogger(__name__) @@ -229,6 +230,11 @@ class text(_column): class html(text): _type = 'html' + _symbol_c = '%s' + def _symbol_f(x): + return html_sanitize(x) + + _symbol_set = (_symbol_c, _symbol_f) import __builtin__ diff --git a/openerp/tools/html_sanitize.py b/openerp/tools/html_sanitize.py new file mode 100644 index 00000000000..4e6fb1540b1 --- /dev/null +++ b/openerp/tools/html_sanitize.py @@ -0,0 +1,4 @@ + + +def html_sanitize(x): + return x From e5fb45a329db25bb6b6312cd377ddd2a44e7e1b0 Mon Sep 17 00:00:00 2001 From: niv-openerp Date: Mon, 13 Aug 2012 16:22:32 +0200 Subject: [PATCH 03/10] First working version of the html sanitizer bzr revid: nicolas.vanhoren@openerp.com-20120813142232-xn7h0ov7mb3pls4o --- openerp/tests/test_html_sanitize.py | 33 +++++++++++++++++++++++++ openerp/tools/__init__.py | 1 + openerp/tools/html_sanitize.py | 37 ++++++++++++++++++++++++++++- 3 files changed, 70 insertions(+), 1 deletion(-) create mode 100755 openerp/tests/test_html_sanitize.py diff --git a/openerp/tests/test_html_sanitize.py b/openerp/tests/test_html_sanitize.py new file mode 100755 index 00000000000..906c78d1dcf --- /dev/null +++ b/openerp/tests/test_html_sanitize.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python +import unittest +from openerp.tools.html_sanitize import html_sanitize + +test_case = """ +test1 +
+test2
+test3
+test4
+test5
+test6
  • test7
  • +test8
  1. test9 +
  2. test10
+
+test11
+
+test12

+google +""" + +class TestSanitizer(unittest.TestCase): + + def test_simple(self): + x = "yop" + self.assertEqual(x, html_sanitize(x)) + + def test_test_case(self): + res = html_sanitize(test_case) + print res + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/openerp/tools/__init__.py b/openerp/tools/__init__.py index a6eeb14ba96..a0ca411a9df 100644 --- a/openerp/tools/__init__.py +++ b/openerp/tools/__init__.py @@ -33,6 +33,7 @@ from pdf_utils import * from yaml_import import * from sql import * from float_utils import * +from html_sanitize import * #.apidoc title: Tools diff --git a/openerp/tools/html_sanitize.py b/openerp/tools/html_sanitize.py index 4e6fb1540b1..d25d4466bb9 100644 --- a/openerp/tools/html_sanitize.py +++ b/openerp/tools/html_sanitize.py @@ -1,4 +1,39 @@ +from pyquery import PyQuery as pq def html_sanitize(x): - return x + root = pq("
") + root.html(x) + result = handle_element(root[0]) + new = pq(result) + return new.html() + +def handle_element(el): + if type(el) == str or type(el) == unicode: + return [el] + else: + new = pq("<%s />" % el.tag)[0] + for i in children(el): + append_to(handle_element(i), new) + return [new] + +def children(el): + res = [] + if el.text is not None: + res.append(el.text) + for i in el.getchildren(): + res.append(i) + if i.tail is not None: + res.append(i.tail) + return res + +def append_to(new_ones, el): + for i in new_ones: + if type(i) == str or type(i) == unicode: + children = el.getchildren() + if len(children) == 0: + el.text = i + else: + children[-1].tail = i + else: + el.append(i) \ No newline at end of file From 10155376c431c408ee8cdcb627d18c3db82a32d5 Mon Sep 17 00:00:00 2001 From: niv-openerp Date: Mon, 13 Aug 2012 16:37:55 +0200 Subject: [PATCH 04/10] Did better stuff bzr revid: nicolas.vanhoren@openerp.com-20120813143755-g9ccs0iubcwvm02i --- openerp/tools/html_sanitize.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/openerp/tools/html_sanitize.py b/openerp/tools/html_sanitize.py index d25d4466bb9..3d7206e64a3 100644 --- a/openerp/tools/html_sanitize.py +++ b/openerp/tools/html_sanitize.py @@ -8,14 +8,28 @@ def html_sanitize(x): new = pq(result) return new.html() +to_remove = set(["script", "head", "meta", "title", "link"]) +to_unwrap = set(["html", "body"]) + +def handle_a(el, new): + new.set("href", el.get("href", "#")) +special = { + "a": handle_a, +} + def handle_element(el): if type(el) == str or type(el) == unicode: return [el] - else: - new = pq("<%s />" % el.tag)[0] - for i in children(el): - append_to(handle_element(i), new) - return [new] + if el.tag in to_remove: + return [] + if el.tag in to_unwrap: + return reduce(lambda x,y: x+y, [handle_element(x) for x in children(el)]) + new = pq("<%s />" % el.tag)[0] + for i in children(el): + append_to(handle_element(i), new) + if el.tag in special: + special[el.tag](el, new) + return [new] def children(el): res = [] From 8dfa86afd9e78616740d45c121c98cd3fdf05cd8 Mon Sep 17 00:00:00 2001 From: niv-openerp Date: Mon, 13 Aug 2012 16:44:01 +0200 Subject: [PATCH 05/10] removed images bzr revid: nicolas.vanhoren@openerp.com-20120813144401-cll0enjblqkh8ina --- openerp/tools/html_sanitize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openerp/tools/html_sanitize.py b/openerp/tools/html_sanitize.py index 3d7206e64a3..2010459ca12 100644 --- a/openerp/tools/html_sanitize.py +++ b/openerp/tools/html_sanitize.py @@ -8,7 +8,7 @@ def html_sanitize(x): new = pq(result) return new.html() -to_remove = set(["script", "head", "meta", "title", "link"]) +to_remove = set(["script", "head", "meta", "title", "link", "img"]) to_unwrap = set(["html", "body"]) def handle_a(el, new): From dc170d1a9a0749588cf94f30990cc9b245ef7e2b Mon Sep 17 00:00:00 2001 From: niv-openerp Date: Mon, 13 Aug 2012 17:52:05 +0200 Subject: [PATCH 06/10] Added protection against javascript in bzr revid: nicolas.vanhoren@openerp.com-20120813155205-uohwb39ejn66bgmv --- openerp/tests/test_html_sanitize.py | 1 + openerp/tools/html_sanitize.py | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/openerp/tests/test_html_sanitize.py b/openerp/tests/test_html_sanitize.py index 906c78d1dcf..3efb4a43ca5 100755 --- a/openerp/tests/test_html_sanitize.py +++ b/openerp/tests/test_html_sanitize.py @@ -17,6 +17,7 @@ test11
test12

google +test link """ class TestSanitizer(unittest.TestCase): diff --git a/openerp/tools/html_sanitize.py b/openerp/tools/html_sanitize.py index 2010459ca12..74d83a2c453 100644 --- a/openerp/tools/html_sanitize.py +++ b/openerp/tools/html_sanitize.py @@ -1,5 +1,6 @@ from pyquery import PyQuery as pq +import re def html_sanitize(x): root = pq("
") @@ -11,8 +12,12 @@ def html_sanitize(x): to_remove = set(["script", "head", "meta", "title", "link", "img"]) to_unwrap = set(["html", "body"]) +javascript_regex = re.compile("""^\s*javascript\s*\:.*$""") def handle_a(el, new): - new.set("href", el.get("href", "#")) + href = el.get("href", "#") + if javascript_regex.search(href): + href = "#" + new.set("href", href) special = { "a": handle_a, } From 2522955ef51c01f0429e409b4f7d268993821253 Mon Sep 17 00:00:00 2001 From: niv-openerp Date: Mon, 13 Aug 2012 18:17:34 +0200 Subject: [PATCH 07/10] Added pyquery dependency bzr revid: nicolas.vanhoren@openerp.com-20120813161734-7akxglc5908ant25 --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 6e1adadde82..259207d9a77 100755 --- a/setup.py +++ b/setup.py @@ -102,6 +102,7 @@ setuptools.setup( 'mako', 'psycopg2', 'pydot', + 'pyquery', 'python-dateutil < 2', 'python-ldap', 'python-openid', From cfaeed15816fe9e9b4e65de164b6bbf637d29ea9 Mon Sep 17 00:00:00 2001 From: niv-openerp Date: Tue, 14 Aug 2012 09:14:57 +0200 Subject: [PATCH 08/10] Removed print in test bzr revid: nicolas.vanhoren@openerp.com-20120814071457-hw8d5yp6xzporiuo --- openerp/tests/test_html_sanitize.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/openerp/tests/test_html_sanitize.py b/openerp/tests/test_html_sanitize.py index 3efb4a43ca5..85001996ee1 100755 --- a/openerp/tests/test_html_sanitize.py +++ b/openerp/tests/test_html_sanitize.py @@ -27,8 +27,7 @@ class TestSanitizer(unittest.TestCase): self.assertEqual(x, html_sanitize(x)) def test_test_case(self): - res = html_sanitize(test_case) - print res + html_sanitize(test_case) if __name__ == '__main__': unittest.main() \ No newline at end of file From f111a51f10b23307191ec9aac85438eef596be83 Mon Sep 17 00:00:00 2001 From: niv-openerp Date: Tue, 14 Aug 2012 11:44:01 +0200 Subject: [PATCH 09/10] Fixed problem when message is false (i think) bzr revid: nicolas.vanhoren@openerp.com-20120814094401-uv548xbylkfq8bun --- openerp/tools/html_sanitize.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/openerp/tools/html_sanitize.py b/openerp/tools/html_sanitize.py index 74d83a2c453..e8896774dc1 100644 --- a/openerp/tools/html_sanitize.py +++ b/openerp/tools/html_sanitize.py @@ -3,6 +3,8 @@ from pyquery import PyQuery as pq import re def html_sanitize(x): + if not x: + return x root = pq("
") root.html(x) result = handle_element(root[0]) From 63ae87fb4cc5931e900806677b29366c0c9876fd Mon Sep 17 00:00:00 2001 From: niv-openerp Date: Tue, 14 Aug 2012 14:14:25 +0200 Subject: [PATCH 10/10] Fixed encoding problems bzr revid: nicolas.vanhoren@openerp.com-20120814121425-jq3ieebbsa7rcds5 --- openerp/tests/test_html_sanitize.py | 4 ++++ openerp/tools/html_sanitize.py | 2 ++ 2 files changed, 6 insertions(+) diff --git a/openerp/tests/test_html_sanitize.py b/openerp/tests/test_html_sanitize.py index 85001996ee1..90ccc88cc1d 100755 --- a/openerp/tests/test_html_sanitize.py +++ b/openerp/tests/test_html_sanitize.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- import unittest from openerp.tools.html_sanitize import html_sanitize @@ -28,6 +29,9 @@ class TestSanitizer(unittest.TestCase): def test_test_case(self): html_sanitize(test_case) + + def test_crm(self): + html_sanitize("Merci à l'intérêt pour notre produit.nous vous contacterons bientôt. Merci") if __name__ == '__main__': unittest.main() \ No newline at end of file diff --git a/openerp/tools/html_sanitize.py b/openerp/tools/html_sanitize.py index e8896774dc1..5164ceb276e 100644 --- a/openerp/tools/html_sanitize.py +++ b/openerp/tools/html_sanitize.py @@ -6,6 +6,8 @@ def html_sanitize(x): if not x: return x root = pq("
") + if type(x) == str: + x = unicode(x, "utf8", "replace") root.html(x) result = handle_element(root[0]) new = pq(result)