2012-08-13 14:22:32 +00:00
#!/usr/bin/env python
2012-08-14 12:14:25 +00:00
# -*- coding: utf-8 -*-
2012-11-06 12:18:24 +00:00
# This test can be run stand-alone with something like:
# > PYTHONPATH=. python2 openerp/tests/test_misc.py
2012-11-06 11:54:20 +00:00
##############################################################################
#
# OpenERP, Open Source Business Applications
# Copyright (c) 2012-TODAY OpenERP S.A. <http://openerp.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
##############################################################################
2012-11-06 12:18:24 +00:00
import unittest2
2013-04-23 14:49:03 +00:00
from . import test_mail_examples
2012-11-09 12:35:21 +00:00
from openerp . tools import html_sanitize , html_email_clean , append_content_to_html , plaintext2html
2012-08-13 14:22:32 +00:00
2012-11-06 12:18:24 +00:00
class TestSanitizer ( unittest2 . TestCase ) :
2012-11-14 10:38:49 +00:00
""" Test the html sanitizer that filters html to remove unwanted attributes """
2012-08-13 14:22:32 +00:00
2012-12-26 16:25:05 +00:00
def test_basic_sanitizer ( self ) :
cases = [
( " yop " , " <p>yop</p> " ) , # simple
( " lala<p>yop</p>xxx " , " <div><p>lala</p><p>yop</p>xxx</div> " ) , # trailing text
( " Merci à l ' intérêt pour notre produit.nous vous contacterons bientôt. Merci " ,
u " <p>Merci à l ' intérêt pour notre produit.nous vous contacterons bientôt. Merci</p> " ) , # unicode
]
for content , expected in cases :
html = html_sanitize ( content )
self . assertEqual ( html , expected , ' html_sanitize is broken ' )
2012-11-06 11:54:20 +00:00
2012-12-26 16:25:05 +00:00
def test_evil_malicious_code ( self ) :
2012-12-26 16:43:54 +00:00
# taken from https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#Tests
2012-12-26 16:25:05 +00:00
cases = [
( " <IMG SRC=javascript:alert( ' XSS ' )> " ) , # no quotes and semicolons
( " <IMG SRC=javascript:alert('XSS')> " ) , # UTF-8 Unicode encoding
( " <IMG SRC=javascript:alert('XSS')> " ) , # hex encoding
( " <IMG SRC= \" jav
ascript:alert( ' XSS ' ); \" > " ) , # embedded carriage return
( " <IMG SRC= \" jav
ascript:alert( ' XSS ' ); \" > " ) , # embedded newline
( " <IMG SRC= \" jav ascript:alert( ' XSS ' ); \" > " ) , # embedded tab
( " <IMG SRC= \" jav	ascript:alert( ' XSS ' ); \" > " ) , # embedded encoded tab
( " <IMG SRC= \"  javascript:alert( ' XSS ' ); \" > " ) , # spaces and meta-characters
( " <IMG SRC= \" javascript:alert( ' XSS ' ) \" " ) , # half-open html
( " <IMG \" \" \" ><SCRIPT>alert( \" XSS \" )</SCRIPT> \" > " ) , # malformed tag
( " <SCRIPT/XSS SRC= \" http://ha.ckers.org/xss.js \" ></SCRIPT> " ) , # non-alpha-non-digits
( " <SCRIPT/SRC= \" http://ha.ckers.org/xss.js \" ></SCRIPT> " ) , # non-alpha-non-digits
( " <<SCRIPT>alert( \" XSS \" );//<</SCRIPT> " ) , # extraneous open brackets
( " <SCRIPT SRC=http://ha.ckers.org/xss.js?< B > " ) , # non-closing script tags
( " <INPUT TYPE= \" IMAGE \" SRC= \" javascript:alert( ' XSS ' ); \" > " ) , # input image
( " <BODY BACKGROUND= \" javascript:alert( ' XSS ' ) \" > " ) , # body image
( " <IMG DYNSRC= \" javascript:alert( ' XSS ' ) \" > " ) , # img dynsrc
( " <IMG LOWSRC= \" javascript:alert( ' XSS ' ) \" > " ) , # img lowsrc
( " <TABLE BACKGROUND= \" javascript:alert( ' XSS ' ) \" > " ) , # table
( " <TABLE><TD BACKGROUND= \" javascript:alert( ' XSS ' ) \" > " ) , # td
( " <DIV STYLE= \" background-image: url(javascript:alert( ' XSS ' )) \" > " ) , # div background
( " <DIV STYLE= \" background-image: \007 5 \007 2 \006 C \002 8 ' \006 a \006 1 \007 6 \006 1 \007 3 \006 3 \007 2 \006 9 \007 0 \007 4 \003 a \006 1 \006 c \006 5 \007 2 \007 4 \002 8.1027 \005 8.1053 \005 3 \002 7 \002 9 ' \002 9 \" > " ) , # div background with unicoded exploit
( " <DIV STYLE= \" background-image: url(javascript:alert( ' XSS ' )) \" > " ) , # div background + extra characters
( " <IMG SRC= ' vbscript:msgbox( \" XSS \" ) ' > " ) , # VBscrip in an image
( " <BODY ONLOAD=alert( ' XSS ' )> " ) , # event handler
( " <BR SIZE= \" & { alert( ' XSS ' )} \ > " ) , # & javascript includes
( " <LINK REL= \" stylesheet \" HREF= \" javascript:alert( ' XSS ' ); \" > " ) , # style sheet
( " <LINK REL= \" stylesheet \" HREF= \" http://ha.ckers.org/xss.css \" > " ) , # remote style sheet
( " <STYLE>@import ' http://ha.ckers.org/xss.css ' ;</STYLE> " ) , # remote style sheet 2
( " <META HTTP-EQUIV= \" Link \" Content= \" <http://ha.ckers.org/xss.css>; REL=stylesheet \" > " ) , # remote style sheet 3
( " <STYLE>BODY { -moz-binding:url( \" http://ha.ckers.org/xssmoz.xml#xss \" )}</STYLE> " ) , # remote style sheet 4
( " <IMG STYLE= \" xss:expr/*XSS*/ession(alert( ' XSS ' )) \" > " ) , # style attribute using a comment to break up expression
( """ <!--[if gte IE 4]>
< SCRIPT > alert ( ' XSS ' ) ; < / SCRIPT >
< ! [ endif ] - - > """ ), # down-level hidden block
]
for content in cases :
html = html_sanitize ( content )
self . assertNotIn ( ' javascript ' , html , ' html_sanitize did not remove a malicious javascript ' )
self . assertTrue ( ' ha.ckers.org ' not in html or ' http://ha.ckers.org/xss.css ' in html , ' html_sanitize did not remove a malicious code in %s ( %s ) ' % ( content , html ) )
2012-11-06 11:54:20 +00:00
2012-11-14 10:38:49 +00:00
def test_html ( self ) :
2013-04-23 14:49:03 +00:00
sanitized_html = html_sanitize ( test_mail_examples . MISC_HTML_SOURCE )
2012-12-26 16:25:05 +00:00
for tag in [ ' <div ' , ' <b ' , ' <i ' , ' <u ' , ' <strike ' , ' <li ' , ' <blockquote ' , ' <a href ' ] :
2012-11-14 10:38:49 +00:00
self . assertIn ( tag , sanitized_html , ' html_sanitize stripped too much of original html ' )
2012-12-26 16:25:05 +00:00
for attr in [ ' javascript ' ] :
2012-11-14 10:38:49 +00:00
self . assertNotIn ( attr , sanitized_html , ' html_sanitize did not remove enough unwanted attributes ' )
2012-11-06 11:54:20 +00:00
2013-04-23 14:49:03 +00:00
emails = [ ( " Charles <charles.bidule@truc.fr> " , " Charles <charles.bidule@truc.fr> " ) ,
2013-01-02 16:25:44 +00:00
( " Dupuis < ' tr/-: $ { dupuis#$ ' @truc.baz.fr> " , " Dupuis < ' tr/-: $ { dupuis#$ ' @truc.baz.fr> " ) ,
2013-01-02 16:14:58 +00:00
( " Technical <service/technical+2@open.com> " , " Technical <service/technical+2@open.com> " ) ,
( " Div nico <div-nico@open.com> " , " Div nico <div-nico@open.com> " ) ]
2012-12-31 15:44:51 +00:00
for email in emails :
2013-01-02 16:14:58 +00:00
self . assertIn ( email [ 1 ] , html_sanitize ( email [ 0 ] ) , ' html_sanitize stripped emails of original html ' )
2012-12-31 15:44:51 +00:00
2012-12-26 16:25:05 +00:00
def test_edi_source ( self ) :
2013-04-23 14:49:03 +00:00
html = html_sanitize ( test_mail_examples . EDI_LIKE_HTML_SOURCE )
2012-12-26 16:25:05 +00:00
self . assertIn ( ' div style= " font-family: \' Lucica Grande \' , Ubuntu, Arial, Verdana, sans-serif; font-size: 12px; color: rgb(34, 34, 34); background-color: #FFF; ' , html ,
' html_sanitize removed valid style attribute ' )
self . assertIn ( ' <span style= " color: #222; margin-bottom: 5px; display: block; " > ' , html ,
' html_sanitize removed valid style attribute ' )
self . assertIn ( ' img class= " oe_edi_paypal_button " src= " https://www.paypal.com/en_US/i/btn/btn_paynowCC_LG.gif " ' , html ,
' html_sanitize removed valid img ' )
self . assertNotIn ( ' </body></html> ' , html , ' html_sanitize did not remove extra closing tags ' )
2012-08-13 14:22:32 +00:00
2012-11-06 11:54:20 +00:00
2012-11-06 12:18:24 +00:00
class TestCleaner ( unittest2 . TestCase ) :
2012-11-14 10:38:49 +00:00
""" Test the email cleaner function that filters the content of incoming emails """
2012-11-09 12:35:21 +00:00
2013-08-08 13:22:58 +00:00
def test_00_basic_text ( self ) :
2013-04-25 10:43:01 +00:00
""" html_email_clean test for signatures """
2013-08-08 13:22:58 +00:00
test_data = [
(
""" This is Sparta! \n -- \n Administrator \n +9988776655 """ ,
[ ' This is Sparta! ' ] ,
[ ' Administrator ' , ' 9988776655 ' ]
) , (
""" <p>-- \n Administrator</p> """ ,
[ ] ,
[ ' -- ' , ' Administrator ' ]
) , (
""" <p>This is Sparta! \n --- \n Administrator</p> """ ,
[ ' This is Sparta! ' ] ,
[ ' --- ' , ' Administrator ' ]
) , (
""" <p>--<br>Administrator</p> """ ,
[ ] ,
[ ]
) , (
""" <p>This is Sparta!<br/>--<br>Administrator</p> """ ,
[ ' This is Sparta! ' ] ,
[ ]
) , (
""" This is Sparta! \n >Ah bon ? \n Certes \n > Chouette ! \n Clair """ ,
[ ' This is Sparta! ' , ' Certes ' , ' Clair ' ] ,
[ ' Ah bon ' , ' Chouette ' ]
)
]
2013-04-25 10:43:01 +00:00
for test , in_lst , out_lst in test_data :
new_html = html_email_clean ( test , remove = True )
for text in in_lst :
self . assertIn ( text , new_html , ' html_email_cleaner wrongly removed content ' )
for text in out_lst :
self . assertNotIn ( text , new_html , ' html_email_cleaner did not remove unwanted content ' )
2013-08-08 13:22:58 +00:00
def test_10_email_text ( self ) :
2013-04-23 14:49:03 +00:00
""" html_email_clean test for text-based emails """
2013-04-25 10:43:01 +00:00
new_html = html_email_clean ( test_mail_examples . TEXT_1 , remove = True )
2013-04-23 14:49:03 +00:00
for ext in test_mail_examples . TEXT_1_IN :
self . assertIn ( ext , new_html , ' html_email_cleaner wrongly removed not quoted content ' )
for ext in test_mail_examples . TEXT_1_OUT :
self . assertNotIn ( ext , new_html , ' html_email_cleaner did not erase signature / quoted content ' )
2013-04-25 10:43:01 +00:00
new_html = html_email_clean ( test_mail_examples . TEXT_2 , remove = True )
2013-04-23 14:49:03 +00:00
for ext in test_mail_examples . TEXT_2_IN :
self . assertIn ( ext , new_html , ' html_email_cleaner wrongly removed not quoted content ' )
for ext in test_mail_examples . TEXT_2_OUT :
self . assertNotIn ( ext , new_html , ' html_email_cleaner did not erase signature / quoted content ' )
2013-08-08 13:22:58 +00:00
def test_20_email_html ( self ) :
2013-04-25 10:43:01 +00:00
new_html = html_email_clean ( test_mail_examples . HTML_1 , remove = True )
2013-04-23 14:49:03 +00:00
for ext in test_mail_examples . HTML_1_IN :
self . assertIn ( ext , new_html , ' html_email_cleaner wrongly removed not quoted content ' )
for ext in test_mail_examples . HTML_1_OUT :
self . assertNotIn ( ext , new_html , ' html_email_cleaner did not erase signature / quoted content ' )
2013-04-25 10:43:01 +00:00
new_html = html_email_clean ( test_mail_examples . HTML_2 , remove = True )
2013-04-23 14:49:03 +00:00
for ext in test_mail_examples . HTML_2_IN :
self . assertIn ( ext , new_html , ' html_email_cleaner wrongly removed not quoted content ' )
for ext in test_mail_examples . HTML_2_OUT :
self . assertNotIn ( ext , new_html , ' html_email_cleaner did not erase signature / quoted content ' )
2013-08-08 13:22:58 +00:00
# --- MAIL ORIGINAL --- -> can't parse this one currently, too much language-dependent
# new_html = html_email_clean(test_mail_examples.HTML_3, remove=False)
# for ext in test_mail_examples.HTML_3_IN:
# self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
2013-04-23 14:49:03 +00:00
# for ext in test_mail_examples.HTML_3_OUT:
# self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
2013-08-08 13:22:58 +00:00
def test_30_email_msoffice ( self ) :
2013-04-25 10:43:01 +00:00
new_html = html_email_clean ( test_mail_examples . MSOFFICE_1 , remove = True )
2013-04-23 14:49:03 +00:00
for ext in test_mail_examples . MSOFFICE_1_IN :
self . assertIn ( ext , new_html , ' html_email_cleaner wrongly removed not quoted content ' )
for ext in test_mail_examples . MSOFFICE_1_OUT :
self . assertNotIn ( ext , new_html , ' html_email_cleaner did not erase signature / quoted content ' )
2013-08-08 13:22:58 +00:00
new_html = html_email_clean ( test_mail_examples . MSOFFICE_2 , remove = True )
for ext in test_mail_examples . MSOFFICE_2_IN :
self . assertIn ( ext , new_html , ' html_email_cleaner wrongly removed not quoted content ' )
for ext in test_mail_examples . MSOFFICE_2_OUT :
self . assertNotIn ( ext , new_html , ' html_email_cleaner did not erase signature / quoted content ' )
new_html = html_email_clean ( test_mail_examples . MSOFFICE_3 , remove = True )
for ext in test_mail_examples . MSOFFICE_3_IN :
self . assertIn ( ext , new_html , ' html_email_cleaner wrongly removed not quoted content ' )
for ext in test_mail_examples . MSOFFICE_3_OUT :
self . assertNotIn ( ext , new_html , ' html_email_cleaner did not erase signature / quoted content ' )
def test_40_email_hotmail ( self ) :
2013-04-25 10:43:01 +00:00
new_html = html_email_clean ( test_mail_examples . HOTMAIL_1 , remove = True )
2013-04-23 14:49:03 +00:00
for ext in test_mail_examples . HOTMAIL_1_IN :
self . assertIn ( ext , new_html , ' html_email_cleaner wrongly removed not quoted content ' )
for ext in test_mail_examples . HOTMAIL_1_OUT :
self . assertNotIn ( ext , new_html , ' html_email_cleaner did not erase signature / quoted content ' )
2013-08-08 13:22:58 +00:00
def test_50_email_gmail ( self ) :
2013-04-25 10:43:01 +00:00
new_html = html_email_clean ( test_mail_examples . GMAIL_1 , remove = True )
2013-04-23 14:49:03 +00:00
for ext in test_mail_examples . GMAIL_1_IN :
self . assertIn ( ext , new_html , ' html_email_cleaner wrongly removed not quoted content ' )
for ext in test_mail_examples . GMAIL_1_OUT :
self . assertNotIn ( ext , new_html , ' html_email_cleaner did not erase signature / quoted content ' )
2013-08-08 13:22:58 +00:00
def test_60_email_thunderbird ( self ) :
2013-04-25 10:43:01 +00:00
new_html = html_email_clean ( test_mail_examples . THUNDERBIRD_1 , remove = True )
2013-04-23 14:49:03 +00:00
for ext in test_mail_examples . THUNDERBIRD_1_IN :
self . assertIn ( ext , new_html , ' html_email_cleaner wrongly removed not quoted content ' )
for ext in test_mail_examples . THUNDERBIRD_1_OUT :
self . assertNotIn ( ext , new_html , ' html_email_cleaner did not erase signature / quoted content ' )
2013-10-16 10:35:16 +00:00
def test_70_read_more ( self ) :
new_html = html_email_clean ( test_mail_examples . BUG1 , remove = True , shorten = True , max_length = 100 )
for ext in test_mail_examples . BUG_1_IN :
self . assertIn ( ext , new_html , ' html_email_cleaner wrongly removed valid content ' )
for ext in test_mail_examples . BUG_1_OUT :
self . assertNotIn ( ext , new_html , ' html_email_cleaner did not removed invalid content ' )
2013-08-08 13:22:58 +00:00
def test_90_misc ( self ) :
2013-04-23 14:49:03 +00:00
# False boolean for text must return empty string
2012-11-19 15:17:39 +00:00
new_html = html_email_clean ( False )
2012-11-20 13:40:01 +00:00
self . assertEqual ( new_html , False , ' html_email_cleaner did change a False in an other value. ' )
2012-11-06 11:54:20 +00:00
2013-04-23 14:49:03 +00:00
# Message with xml and doctype tags don't crash
2012-12-19 14:39:12 +00:00
new_html = html_email_clean ( u ' <?xml version= " 1.0 " encoding= " iso-8859-1 " ?> \n <!DOCTYPE html PUBLIC " -//W3C//DTD XHTML 1.0 Transitional//EN " \n " http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd " > \n <html xmlns= " http://www.w3.org/1999/xhtml " xml:lang= " en " lang= " en " > \n <head> \n <title>404 - Not Found</title> \n </head> \n <body> \n <h1>404 - Not Found</h1> \n </body> \n </html> \n ' )
self . assertNotIn ( ' encoding ' , new_html , ' html_email_cleaner did not remove correctly encoding attributes ' )
2012-12-19 11:18:53 +00:00
2012-12-26 16:25:05 +00:00
2012-11-14 10:38:49 +00:00
class TestHtmlTools ( unittest2 . TestCase ) :
2012-11-09 12:35:21 +00:00
""" Test some of our generic utility functions about html """
2012-11-07 16:41:17 +00:00
2012-11-09 12:35:21 +00:00
def test_plaintext2html ( self ) :
2012-11-07 16:41:17 +00:00
cases = [
( " First \n Second \n Third \n \n Paragraph \n \r -- \n Signature paragraph " , ' div ' ,
" <div><p>First <br/>Second <br/>Third</p><p>Paragraph</p><p>--<br/>Signature paragraph</p></div> " ) ,
2012-11-09 12:35:21 +00:00
( " First<p>It should be escaped</p> \n Signature " , False ,
" <p>First<p>It should be escaped</p><br/>Signature</p> " )
2012-11-07 16:41:17 +00:00
]
for content , container_tag , expected in cases :
2012-11-09 12:35:21 +00:00
html = plaintext2html ( content , container_tag )
2012-11-14 13:51:59 +00:00
self . assertEqual ( html , expected , ' plaintext2html is broken ' )
2012-11-07 16:41:17 +00:00
2012-11-09 12:35:21 +00:00
def test_append_to_html ( self ) :
test_samples = [
( ' <!DOCTYPE...><HTML encoding= " blah " >some <b>content</b></HtMl> ' , ' -- \n Yours truly ' , True , True , False ,
' <!DOCTYPE...><html encoding= " blah " >some <b>content</b> \n <pre>-- \n Yours truly</pre> \n </html> ' ) ,
( ' <!DOCTYPE...><HTML encoding= " blah " >some <b>content</b></HtMl> ' , ' -- \n Yours truly ' , True , False , False ,
' <!DOCTYPE...><html encoding= " blah " >some <b>content</b> \n <p>--<br/>Yours truly</p> \n </html> ' ) ,
( ' <html><body>some <b>content</b></body></html> ' , ' <!DOCTYPE...> \n <html><body> \n <p>--</p> \n <p>Yours truly</p> \n </body> \n </html> ' , False , False , False ,
' <html><body>some <b>content</b> \n \n \n <p>--</p> \n <p>Yours truly</p> \n \n \n </body></html> ' ) ,
]
for html , content , plaintext_flag , preserve_flag , container_tag , expected in test_samples :
self . assertEqual ( append_content_to_html ( html , content , plaintext_flag , preserve_flag , container_tag ) , expected , ' append_content_to_html is broken ' )
2012-11-07 16:41:17 +00:00
2012-08-13 14:22:32 +00:00
if __name__ == ' __main__ ' :
2012-11-06 12:18:24 +00:00
unittest2 . main ( )