Added protection against javascript in <a>

bzr revid: nicolas.vanhoren@openerp.com-20120813155205-uohwb39ejn66bgmv
This commit is contained in:
niv-openerp 2012-08-13 17:52:05 +02:00
parent 8dfa86afd9
commit dc170d1a9a
2 changed files with 7 additions and 1 deletions

View File

@ -17,6 +17,7 @@ test11</font></div></div></div></blockquote><blockquote style="margin: 0 0 0 40p
<blockquote style="margin: 0 0 0 40px; border: none; padding: 0px;"><div><font color="#1f1f1f" face="monospace" size="2">
test12</font></div><div><font color="#1f1f1f" face="monospace" size="2"><br></font></div></blockquote></blockquote>
<font color="#1f1f1f" face="monospace" size="2"><a href="http://google.com">google</a></font>
<a href="javascript:alert('malicious code')">test link</a>
"""
class TestSanitizer(unittest.TestCase):

View File

@ -1,5 +1,6 @@
from pyquery import PyQuery as pq
import re
def html_sanitize(x):
root = pq("<div />")
@ -11,8 +12,12 @@ def html_sanitize(x):
to_remove = set(["script", "head", "meta", "title", "link", "img"])
to_unwrap = set(["html", "body"])
javascript_regex = re.compile("""^\s*javascript\s*\:.*$""")
def handle_a(el, new):
new.set("href", el.get("href", "#"))
href = el.get("href", "#")
if javascript_regex.search(href):
href = "#"
new.set("href", href)
special = {
"a": handle_a,
}