From 10155376c431c408ee8cdcb627d18c3db82a32d5 Mon Sep 17 00:00:00 2001 From: niv-openerp Date: Mon, 13 Aug 2012 16:37:55 +0200 Subject: [PATCH] Did better stuff bzr revid: nicolas.vanhoren@openerp.com-20120813143755-g9ccs0iubcwvm02i --- openerp/tools/html_sanitize.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/openerp/tools/html_sanitize.py b/openerp/tools/html_sanitize.py index d25d4466bb9..3d7206e64a3 100644 --- a/openerp/tools/html_sanitize.py +++ b/openerp/tools/html_sanitize.py @@ -8,14 +8,28 @@ def html_sanitize(x): new = pq(result) return new.html() +to_remove = set(["script", "head", "meta", "title", "link"]) +to_unwrap = set(["html", "body"]) + +def handle_a(el, new): + new.set("href", el.get("href", "#")) +special = { + "a": handle_a, +} + def handle_element(el): if type(el) == str or type(el) == unicode: return [el] - else: - new = pq("<%s />" % el.tag)[0] - for i in children(el): - append_to(handle_element(i), new) - return [new] + if el.tag in to_remove: + return [] + if el.tag in to_unwrap: + return reduce(lambda x,y: x+y, [handle_element(x) for x in children(el)]) + new = pq("<%s />" % el.tag)[0] + for i in children(el): + append_to(handle_element(i), new) + if el.tag in special: + special[el.tag](el, new) + return [new] def children(el): res = []