[REF] html_email_clean: refactoring of the algorithm, now better taking into account hotmail / msoffice emails. Added tests.
bzr revid: tde@openerp.com-20130423144903-he22jz9zs6nc6dsd
This commit is contained in:
parent
261dea6fec
commit
f97bd8bd63
|
@ -23,146 +23,9 @@
|
|||
##############################################################################
|
||||
|
||||
import unittest2
|
||||
from . import test_mail_examples
|
||||
from openerp.tools import html_sanitize, html_email_clean, append_content_to_html, plaintext2html
|
||||
|
||||
HTML_SOURCE = """
|
||||
<font size="2" style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; ">test1</font>
|
||||
<div style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; font-size: 12px; font-style: normal; ">
|
||||
<b>test2</b></div><div style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; font-size: 12px; ">
|
||||
<i>test3</i></div><div style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; font-size: 12px; ">
|
||||
<u>test4</u></div><div style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; font-size: 12px; ">
|
||||
<strike>test5</strike></div><div style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; ">
|
||||
<font size="5">test6</font></div><div><ul><li><font color="#1f1f1f" face="monospace" size="2">test7</font></li><li>
|
||||
<font color="#1f1f1f" face="monospace" size="2">test8</font></li></ul><div><ol><li><font color="#1f1f1f" face="monospace" size="2">test9</font>
|
||||
</li><li><font color="#1f1f1f" face="monospace" size="2">test10</font></li></ol></div></div>
|
||||
<blockquote style="margin: 0 0 0 40px; border: none; padding: 0px;"><div><div><div><font color="#1f1f1f" face="monospace" size="2">
|
||||
test11</font></div></div></div></blockquote><blockquote style="margin: 0 0 0 40px; border: none; padding: 0px;">
|
||||
<blockquote style="margin: 0 0 0 40px; border: none; padding: 0px;"><div><font color="#1f1f1f" face="monospace" size="2">
|
||||
test12</font></div><div><font color="#1f1f1f" face="monospace" size="2"><br></font></div></blockquote></blockquote>
|
||||
<font color="#1f1f1f" face="monospace" size="2"><a href="http://google.com">google</a></font>
|
||||
<a href="javascript:alert('malicious code')">test link</a>
|
||||
"""
|
||||
|
||||
EDI_LIKE_HTML_SOURCE = """<div style="font-family: 'Lucica Grande', Ubuntu, Arial, Verdana, sans-serif; font-size: 12px; color: rgb(34, 34, 34); background-color: #FFF; ">
|
||||
<p>Hello ${object.partner_id.name},</p>
|
||||
<p>A new invoice is available for you: </p>
|
||||
<p style="border-left: 1px solid #8e0000; margin-left: 30px;">
|
||||
<strong>REFERENCES</strong><br />
|
||||
Invoice number: <strong>${object.number}</strong><br />
|
||||
Invoice total: <strong>${object.amount_total} ${object.currency_id.name}</strong><br />
|
||||
Invoice date: ${object.date_invoice}<br />
|
||||
Order reference: ${object.origin}<br />
|
||||
Your contact: <a href="mailto:${object.user_id.email or ''}?subject=Invoice%20${object.number}">${object.user_id.name}</a>
|
||||
</p>
|
||||
<br/>
|
||||
<p>It is also possible to directly pay with Paypal:</p>
|
||||
<a style="margin-left: 120px;" href="${object.paypal_url}">
|
||||
<img class="oe_edi_paypal_button" src="https://www.paypal.com/en_US/i/btn/btn_paynowCC_LG.gif"/>
|
||||
</a>
|
||||
<br/>
|
||||
<p>If you have any question, do not hesitate to contact us.</p>
|
||||
<p>Thank you for choosing ${object.company_id.name or 'us'}!</p>
|
||||
<br/>
|
||||
<br/>
|
||||
<div style="width: 375px; margin: 0px; padding: 0px; background-color: #8E0000; border-top-left-radius: 5px 5px; border-top-right-radius: 5px 5px; background-repeat: repeat no-repeat;">
|
||||
<h3 style="margin: 0px; padding: 2px 14px; font-size: 12px; color: #DDD;">
|
||||
<strong style="text-transform:uppercase;">${object.company_id.name}</strong></h3>
|
||||
</div>
|
||||
<div style="width: 347px; margin: 0px; padding: 5px 14px; line-height: 16px; background-color: #F2F2F2;">
|
||||
<span style="color: #222; margin-bottom: 5px; display: block; ">
|
||||
${object.company_id.street}<br/>
|
||||
${object.company_id.street2}<br/>
|
||||
${object.company_id.zip} ${object.company_id.city}<br/>
|
||||
${object.company_id.state_id and ('%s, ' % object.company_id.state_id.name) or ''} ${object.company_id.country_id.name or ''}<br/>
|
||||
</span>
|
||||
<div style="margin-top: 0px; margin-right: 0px; margin-bottom: 0px; margin-left: 0px; padding-top: 0px; padding-right: 0px; padding-bottom: 0px; padding-left: 0px; ">
|
||||
Phone: ${object.company_id.phone}
|
||||
</div>
|
||||
<div>
|
||||
Web : <a href="${object.company_id.website}">${object.company_id.website}</a>
|
||||
</div>
|
||||
</div>
|
||||
</div></body></html>"""
|
||||
|
||||
TEXT_MAIL1 = """I contact you about our meeting for tomorrow. Here is the schedule I propose:
|
||||
9 AM: brainstorming about our new amazing business app</span></li>
|
||||
9.45 AM: summary
|
||||
10 AM: meeting with Fabien to present our app
|
||||
Is everything ok for you ?
|
||||
--
|
||||
Administrator"""
|
||||
|
||||
HTML_MAIL1 = """<div>
|
||||
<font><span>I contact you about our meeting for tomorrow. Here is the schedule I propose:</span></font>
|
||||
</div>
|
||||
<div><ul>
|
||||
<li><span>9 AM: brainstorming about our new amazing business app</span></li>
|
||||
<li><span>9.45 AM: summary</span></li>
|
||||
<li><span>10 AM: meeting with Fabien to present our app</span></li>
|
||||
</ul></div>
|
||||
<div><font><span>Is everything ok for you ?</span></font></div>"""
|
||||
|
||||
GMAIL_REPLY1_SAN = """Hello,<div><br></div><div>Ok for me. I am replying directly in gmail, without signature.</div><div><br></div><div>Kind regards,</div><div><br></div><div>Demo.<br><br><div>On Thu, Nov 8, 2012 at 5:29 PM, <span><<a href="mailto:dummy@example.com">dummy@example.com</a>></span> wrote:<br><blockquote><div>I contact you about our meeting for tomorrow. Here is the schedule I propose:</div><div><ul><li>9 AM: brainstorming about our new amazing business app</span></li></li>
|
||||
<li>9.45 AM: summary</li><li>10 AM: meeting with Fabien to present our app</li></ul></div><div>Is everything ok for you ?</div>
|
||||
<div><p>--<br>Administrator</p></div>
|
||||
|
||||
<div><p>Log in our portal at: <a href="http://localhost:8069#action=login&db=mail_1&login=demo">http://localhost:8069#action=login&db=mail_1&login=demo</a></p></div>
|
||||
</blockquote></div><br></div>"""
|
||||
|
||||
THUNDERBIRD_16_REPLY1_SAN = """ <div>On 11/08/2012 05:29 PM,
|
||||
<a href="mailto:dummy@example.com">dummy@example.com</a> wrote:<br></div>
|
||||
<blockquote>
|
||||
<div>I contact you about our meeting for tomorrow. Here is the
|
||||
schedule I propose:</div>
|
||||
<div>
|
||||
<ul><li>9 AM: brainstorming about our new amazing business
|
||||
app</span></li></li>
|
||||
<li>9.45 AM: summary</li>
|
||||
<li>10 AM: meeting with Fabien to present our app</li>
|
||||
</ul></div>
|
||||
<div>Is everything ok for you ?</div>
|
||||
<div>
|
||||
<p>--<br>
|
||||
Administrator</p>
|
||||
</div>
|
||||
<div>
|
||||
<p>Log in our portal at:
|
||||
<a href="http://localhost:8069#action=login&db=mail_1&token=rHdWcUART5PhEnJRaXjH">http://localhost:8069#action=login&db=mail_1&token=rHdWcUART5PhEnJRaXjH</a></p>
|
||||
</div>
|
||||
</blockquote>
|
||||
Ok for me. I am replying directly below your mail, using
|
||||
Thunderbird, with a signature.<br><br>
|
||||
Did you receive my email about my new laptop, by the way ?<br><br>
|
||||
Raoul.<br><pre>--
|
||||
Raoul Grosbedonnée
|
||||
</pre>"""
|
||||
|
||||
TEXT_TPL = """Salut Raoul!
|
||||
Le 28 oct. 2012 à 00:02, Raoul Grosbedon a écrit :
|
||||
|
||||
> C'est sûr que je suis intéressé (quote)!
|
||||
|
||||
Trouloulou pouet pouet. Je ne vais quand même pas écrire de vrais mails, non mais ho.
|
||||
|
||||
> 2012/10/27 Bert Tartopoils :
|
||||
>> Diantre, me disè-je en envoyant un message similaire à Martine, mais comment vas-tu (quote)?
|
||||
>>
|
||||
>> A la base le contenu était un vrai mail, mais je l'ai quand même réécrit pour ce test, histoire de dire que, quand même, on ne met pas n'importe quoi ici. (quote)
|
||||
>>
|
||||
>> Et sinon bon courage pour trouver tes clefs (quote).
|
||||
>>
|
||||
>> Bert TARTOPOILS
|
||||
>> bert.tartopoils@miam.miam
|
||||
>>
|
||||
>
|
||||
>
|
||||
> --
|
||||
> Raoul Grosbedon
|
||||
|
||||
Bert TARTOPOILS
|
||||
bert.tartopoils@miam.miam
|
||||
"""
|
||||
|
||||
|
||||
class TestSanitizer(unittest2.TestCase):
|
||||
""" Test the html sanitizer that filters html to remove unwanted attributes """
|
||||
|
@ -223,22 +86,21 @@ class TestSanitizer(unittest2.TestCase):
|
|||
self.assertTrue('ha.ckers.org' not in html or 'http://ha.ckers.org/xss.css' in html, 'html_sanitize did not remove a malicious code in %s (%s)' % (content, html))
|
||||
|
||||
def test_html(self):
|
||||
sanitized_html = html_sanitize(HTML_SOURCE)
|
||||
sanitized_html = html_sanitize(test_mail_examples.MISC_HTML_SOURCE)
|
||||
for tag in ['<div', '<b', '<i', '<u', '<strike', '<li', '<blockquote', '<a href']:
|
||||
self.assertIn(tag, sanitized_html, 'html_sanitize stripped too much of original html')
|
||||
for attr in ['javascript']:
|
||||
self.assertNotIn(attr, sanitized_html, 'html_sanitize did not remove enough unwanted attributes')
|
||||
|
||||
emails =[("Charles <charles.bidule@truc.fr>", "Charles <charles.bidule@truc.fr>"),
|
||||
emails = [("Charles <charles.bidule@truc.fr>", "Charles <charles.bidule@truc.fr>"),
|
||||
("Dupuis <'tr/-: ${dupuis#$'@truc.baz.fr>", "Dupuis <'tr/-: ${dupuis#$'@truc.baz.fr>"),
|
||||
("Technical <service/technical+2@open.com>", "Technical <service/technical+2@open.com>"),
|
||||
("Div nico <div-nico@open.com>", "Div nico <div-nico@open.com>")]
|
||||
for email in emails:
|
||||
self.assertIn(email[1], html_sanitize(email[0]), 'html_sanitize stripped emails of original html')
|
||||
|
||||
|
||||
def test_edi_source(self):
|
||||
html = html_sanitize(EDI_LIKE_HTML_SOURCE)
|
||||
html = html_sanitize(test_mail_examples.EDI_LIKE_HTML_SOURCE)
|
||||
self.assertIn('div style="font-family: \'Lucica Grande\', Ubuntu, Arial, Verdana, sans-serif; font-size: 12px; color: rgb(34, 34, 34); background-color: #FFF;', html,
|
||||
'html_sanitize removed valid style attribute')
|
||||
self.assertIn('<span style="color: #222; margin-bottom: 5px; display: block; ">', html,
|
||||
|
@ -251,36 +113,73 @@ class TestSanitizer(unittest2.TestCase):
|
|||
class TestCleaner(unittest2.TestCase):
|
||||
""" Test the email cleaner function that filters the content of incoming emails """
|
||||
|
||||
def test_html_email_clean(self):
|
||||
# Test1: reply through gmail: quote in blockquote, signature --\nAdministrator
|
||||
new_html = html_email_clean(GMAIL_REPLY1_SAN)
|
||||
self.assertNotIn('blockquote', new_html, 'html_email_cleaner did not remove a blockquote')
|
||||
self.assertNotIn('I contact you about our meeting', new_html, 'html_email_cleaner wrongly removed the quoted content')
|
||||
self.assertNotIn('Administrator', new_html, 'html_email_cleaner did not erase the signature')
|
||||
self.assertIn('Ok for me', new_html, 'html_email_cleaner erased too much content')
|
||||
def test_00_html_email_clean_text(self):
|
||||
""" html_email_clean test for text-based emails """
|
||||
new_html = html_email_clean(test_mail_examples.TEXT_1, remove_unwanted=True)
|
||||
for ext in test_mail_examples.TEXT_1_IN:
|
||||
self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
|
||||
for ext in test_mail_examples.TEXT_1_OUT:
|
||||
self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
|
||||
|
||||
# Test2: reply through Tunderbird 16.0.2
|
||||
new_html = html_email_clean(THUNDERBIRD_16_REPLY1_SAN)
|
||||
self.assertNotIn('blockquote', new_html, 'html_email_cleaner did not remove a blockquote')
|
||||
self.assertNotIn('I contact you about our meeting', new_html, 'html_email_cleaner wrongly removed the quoted content')
|
||||
self.assertNotIn('Administrator', new_html, 'html_email_cleaner did not erase the signature')
|
||||
self.assertNotIn('Grosbedonn', new_html, 'html_email_cleaner did not erase the signature')
|
||||
self.assertIn('Ok for me', new_html, 'html_email_cleaner erased too much content')
|
||||
new_html = html_email_clean(test_mail_examples.TEXT_2, remove_unwanted=True)
|
||||
for ext in test_mail_examples.TEXT_2_IN:
|
||||
self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
|
||||
for ext in test_mail_examples.TEXT_2_OUT:
|
||||
self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
|
||||
|
||||
# Test3: text email
|
||||
new_html = html_email_clean(TEXT_MAIL1)
|
||||
self.assertIn('I contact you about our meeting', new_html, 'html_email_cleaner wrongly removed the quoted content')
|
||||
self.assertNotIn('Administrator', new_html, 'html_email_cleaner did not erase the signature')
|
||||
def test_10_html_email_clean_html(self):
|
||||
new_html = html_email_clean(test_mail_examples.HTML_1, remove_unwanted=True)
|
||||
for ext in test_mail_examples.HTML_1_IN:
|
||||
self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
|
||||
for ext in test_mail_examples.HTML_1_OUT:
|
||||
self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
|
||||
|
||||
# Test4: more complex text email
|
||||
new_html = html_email_clean(TEXT_TPL)
|
||||
self.assertNotIn('quote', new_html, 'html_email_cleaner did not remove correctly plaintext quotes')
|
||||
new_html = html_email_clean(test_mail_examples.HTML_2, remove_unwanted=False)
|
||||
for ext in test_mail_examples.HTML_2_IN:
|
||||
self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
|
||||
for ext in test_mail_examples.HTML_2_OUT:
|
||||
self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
|
||||
|
||||
# Test5: False boolean for text must return empty string
|
||||
new_html = html_email_clean(test_mail_examples.HTML_3, remove_unwanted=False)
|
||||
for ext in test_mail_examples.HTML_3_IN:
|
||||
self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
|
||||
# for ext in test_mail_examples.HTML_3_OUT:
|
||||
# self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
|
||||
|
||||
def test_20_html_email_clean_msoffice(self):
|
||||
new_html = html_email_clean(test_mail_examples.MSOFFICE_1, remove_unwanted=True)
|
||||
for ext in test_mail_examples.MSOFFICE_1_IN:
|
||||
self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
|
||||
for ext in test_mail_examples.MSOFFICE_1_OUT:
|
||||
self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
|
||||
|
||||
def test_30_html_email_clean_hotmail(self):
|
||||
new_html = html_email_clean(test_mail_examples.HOTMAIL_1, remove_unwanted=True)
|
||||
for ext in test_mail_examples.HOTMAIL_1_IN:
|
||||
self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
|
||||
for ext in test_mail_examples.HOTMAIL_1_OUT:
|
||||
self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
|
||||
|
||||
def test_40_html_email_clean_gmail(self):
|
||||
new_html = html_email_clean(test_mail_examples.GMAIL_1, remove_unwanted=True)
|
||||
for ext in test_mail_examples.GMAIL_1_IN:
|
||||
self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
|
||||
for ext in test_mail_examples.GMAIL_1_OUT:
|
||||
self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
|
||||
|
||||
def test_50_html_email_clean_thunderbird(self):
|
||||
new_html = html_email_clean(test_mail_examples.THUNDERBIRD_1, remove_unwanted=True)
|
||||
for ext in test_mail_examples.THUNDERBIRD_1_IN:
|
||||
self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
|
||||
for ext in test_mail_examples.THUNDERBIRD_1_OUT:
|
||||
self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
|
||||
|
||||
def test_90_html_email_clean_misc(self):
|
||||
# False boolean for text must return empty string
|
||||
new_html = html_email_clean(False)
|
||||
self.assertEqual(new_html, False, 'html_email_cleaner did change a False in an other value.')
|
||||
|
||||
# Test6: Message with xml and doctype tags don't crash
|
||||
# Message with xml and doctype tags don't crash
|
||||
new_html = html_email_clean(u'<?xml version="1.0" encoding="iso-8859-1"?>\n<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\n "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">\n <head>\n <title>404 - Not Found</title>\n </head>\n <body>\n <h1>404 - Not Found</h1>\n </body>\n</html>\n')
|
||||
self.assertNotIn('encoding', new_html, 'html_email_cleaner did not remove correctly encoding attributes')
|
||||
|
||||
|
|
|
@ -0,0 +1,354 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
MISC_HTML_SOURCE = """
|
||||
<font size="2" style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; ">test1</font>
|
||||
<div style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; font-size: 12px; font-style: normal; ">
|
||||
<b>test2</b></div><div style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; font-size: 12px; ">
|
||||
<i>test3</i></div><div style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; font-size: 12px; ">
|
||||
<u>test4</u></div><div style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; font-size: 12px; ">
|
||||
<strike>test5</strike></div><div style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; ">
|
||||
<font size="5">test6</font></div><div><ul><li><font color="#1f1f1f" face="monospace" size="2">test7</font></li><li>
|
||||
<font color="#1f1f1f" face="monospace" size="2">test8</font></li></ul><div><ol><li><font color="#1f1f1f" face="monospace" size="2">test9</font>
|
||||
</li><li><font color="#1f1f1f" face="monospace" size="2">test10</font></li></ol></div></div>
|
||||
<blockquote style="margin: 0 0 0 40px; border: none; padding: 0px;"><div><div><div><font color="#1f1f1f" face="monospace" size="2">
|
||||
test11</font></div></div></div></blockquote><blockquote style="margin: 0 0 0 40px; border: none; padding: 0px;">
|
||||
<blockquote style="margin: 0 0 0 40px; border: none; padding: 0px;"><div><font color="#1f1f1f" face="monospace" size="2">
|
||||
test12</font></div><div><font color="#1f1f1f" face="monospace" size="2"><br></font></div></blockquote></blockquote>
|
||||
<font color="#1f1f1f" face="monospace" size="2"><a href="http://google.com">google</a></font>
|
||||
<a href="javascript:alert('malicious code')">test link</a>
|
||||
"""
|
||||
|
||||
EDI_LIKE_HTML_SOURCE = """<div style="font-family: 'Lucica Grande', Ubuntu, Arial, Verdana, sans-serif; font-size: 12px; color: rgb(34, 34, 34); background-color: #FFF; ">
|
||||
<p>Hello ${object.partner_id.name},</p>
|
||||
<p>A new invoice is available for you: </p>
|
||||
<p style="border-left: 1px solid #8e0000; margin-left: 30px;">
|
||||
<strong>REFERENCES</strong><br />
|
||||
Invoice number: <strong>${object.number}</strong><br />
|
||||
Invoice total: <strong>${object.amount_total} ${object.currency_id.name}</strong><br />
|
||||
Invoice date: ${object.date_invoice}<br />
|
||||
Order reference: ${object.origin}<br />
|
||||
Your contact: <a href="mailto:${object.user_id.email or ''}?subject=Invoice%20${object.number}">${object.user_id.name}</a>
|
||||
</p>
|
||||
<br/>
|
||||
<p>It is also possible to directly pay with Paypal:</p>
|
||||
<a style="margin-left: 120px;" href="${object.paypal_url}">
|
||||
<img class="oe_edi_paypal_button" src="https://www.paypal.com/en_US/i/btn/btn_paynowCC_LG.gif"/>
|
||||
</a>
|
||||
<br/>
|
||||
<p>If you have any question, do not hesitate to contact us.</p>
|
||||
<p>Thank you for choosing ${object.company_id.name or 'us'}!</p>
|
||||
<br/>
|
||||
<br/>
|
||||
<div style="width: 375px; margin: 0px; padding: 0px; background-color: #8E0000; border-top-left-radius: 5px 5px; border-top-right-radius: 5px 5px; background-repeat: repeat no-repeat;">
|
||||
<h3 style="margin: 0px; padding: 2px 14px; font-size: 12px; color: #DDD;">
|
||||
<strong style="text-transform:uppercase;">${object.company_id.name}</strong></h3>
|
||||
</div>
|
||||
<div style="width: 347px; margin: 0px; padding: 5px 14px; line-height: 16px; background-color: #F2F2F2;">
|
||||
<span style="color: #222; margin-bottom: 5px; display: block; ">
|
||||
${object.company_id.street}<br/>
|
||||
${object.company_id.street2}<br/>
|
||||
${object.company_id.zip} ${object.company_id.city}<br/>
|
||||
${object.company_id.state_id and ('%s, ' % object.company_id.state_id.name) or ''} ${object.company_id.country_id.name or ''}<br/>
|
||||
</span>
|
||||
<div style="margin-top: 0px; margin-right: 0px; margin-bottom: 0px; margin-left: 0px; padding-top: 0px; padding-right: 0px; padding-bottom: 0px; padding-left: 0px; ">
|
||||
Phone: ${object.company_id.phone}
|
||||
</div>
|
||||
<div>
|
||||
Web : <a href="${object.company_id.website}">${object.company_id.website}</a>
|
||||
</div>
|
||||
</div>
|
||||
</div></body></html>"""
|
||||
|
||||
TEXT_1 = """I contact you about our meeting tomorrow. Here is the schedule I propose:
|
||||
9 AM: brainstorming about our new amazing business app
|
||||
9.45 AM: summary
|
||||
10 AM: meeting with Ignasse to present our app
|
||||
Is everything ok for you ?
|
||||
--
|
||||
MySignature"""
|
||||
|
||||
TEXT_1_IN = ["""I contact you about our meeting tomorrow. Here is the schedule I propose:
|
||||
9 AM: brainstorming about our new amazing business app
|
||||
9.45 AM: summary
|
||||
10 AM: meeting with Ignasse to present our app
|
||||
Is everything ok for you ?"""]
|
||||
TEXT_1_OUT = ["""--
|
||||
MySignature"""]
|
||||
|
||||
TEXT_2 = """Salut Raoul!
|
||||
Le 28 oct. 2012 à 00:02, Raoul Grosbedon a écrit :
|
||||
|
||||
> I contact you about our meeting tomorrow. Here is the schedule I propose: (quote)
|
||||
|
||||
Of course. This seems viable.
|
||||
|
||||
> 2012/10/27 Bert Tartopoils :
|
||||
>> blahblahblah (quote)?
|
||||
>>
|
||||
>> blahblahblah (quote)
|
||||
>>
|
||||
>> Bert TARTOPOILS
|
||||
>> bert.tartopoils@miam.miam
|
||||
>>
|
||||
>
|
||||
>
|
||||
> --
|
||||
> RaoulSignature
|
||||
|
||||
Bert TARTOPOILS
|
||||
bert.tartopoils@miam.miam
|
||||
"""
|
||||
|
||||
TEXT_2_IN = ["Salut Raoul!", "Of course. This seems viable."]
|
||||
TEXT_2_OUT = ["I contact you about our meeting tomorrow. Here is the schedule I propose: (quote)",
|
||||
"""> 2012/10/27 Bert Tartopoils :
|
||||
>> blahblahblah (quote)?
|
||||
>>
|
||||
>> blahblahblah (quote)
|
||||
>>
|
||||
>> Bert TARTOPOILS
|
||||
>> bert.tartopoils@miam.miam
|
||||
>>
|
||||
>
|
||||
>
|
||||
> --
|
||||
> RaoulSignature"""]
|
||||
|
||||
HTML_1 = """<p>I contact you about our meeting for tomorrow. Here is the schedule I propose: (keep)
|
||||
9 AM: brainstorming about our new amazing business app
|
||||
9.45 AM: summary
|
||||
10 AM: meeting with Ignasse to present our app
|
||||
Is everything ok for you ?
|
||||
--
|
||||
MySignature</p>"""
|
||||
|
||||
HTML_1_IN = ["""I contact you about our meeting for tomorrow. Here is the schedule I propose: (keep)
|
||||
9 AM: brainstorming about our new amazing business app
|
||||
9.45 AM: summary
|
||||
10 AM: meeting with Ignasse to present our app
|
||||
Is everything ok for you ?"""]
|
||||
HTML_1_OUT = ["""--
|
||||
MySignature"""]
|
||||
|
||||
HTML_2 = """<div>
|
||||
<font><span>I contact you about our meeting for tomorrow. Here is the schedule I propose:</span></font>
|
||||
</div>
|
||||
<div>
|
||||
<ul>
|
||||
<li><span>9 AM: brainstorming about our new amazing business app</span></li>
|
||||
<li><span>9.45 AM: summary</span></li>
|
||||
<li><span>10 AM: meeting with Fabien to present our app</span></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div>
|
||||
<font><span>Is everything ok for you ?</span></font>
|
||||
</div>"""
|
||||
|
||||
HTML_2_IN = ["<font><span>I contact you about our meeting for tomorrow. Here is the schedule I propose:</span></font>",
|
||||
"<li><span>9 AM: brainstorming about our new amazing business app</span></li>",
|
||||
"<li><span>9.45 AM: summary</span></li>",
|
||||
"<li><span>10 AM: meeting with Fabien to present our app</span></li>",
|
||||
"<font><span>Is everything ok for you ?</span></font>"]
|
||||
HTML_2_OUT = []
|
||||
|
||||
HTML_3 = """<div><pre>This is an answer.
|
||||
|
||||
Regards,
|
||||
XXXXXX
|
||||
----- Mail original -----</pre>
|
||||
|
||||
|
||||
<pre>Hi,
|
||||
|
||||
|
||||
My CRM-related question.
|
||||
|
||||
Regards,
|
||||
|
||||
XXXX</pre></div>"""
|
||||
|
||||
HTML_3_IN = ["""<div><pre>This is an answer.
|
||||
|
||||
Regards,
|
||||
XXXXXX
|
||||
----- Mail original -----</pre>"""]
|
||||
HTML_3_OUT = ["Hi,", "My CRM-related question.",
|
||||
"Regards,"]
|
||||
|
||||
GMAIL_1 = """Hello,<div><br></div><div>Ok for me. I am replying directly in gmail, without signature.</div><div><br></div><div>Kind regards,</div><div><br></div><div>Demo.<br><br><div>On Thu, Nov 8, 2012 at 5:29 PM, <span><<a href="mailto:dummy@example.com">dummy@example.com</a>></span> wrote:<br><blockquote><div>I contact you about our meeting for tomorrow. Here is the schedule I propose:</div><div><ul><li>9 AM: brainstorming about our new amazing business app</span></li></li>
|
||||
<li>9.45 AM: summary</li><li>10 AM: meeting with Fabien to present our app</li></ul></div><div>Is everything ok for you ?</div>
|
||||
<div><p>--<br>Administrator</p></div>
|
||||
|
||||
<div><p>Log in our portal at: <a href="http://localhost:8069#action=login&db=mail_1&login=demo">http://localhost:8069#action=login&db=mail_1&login=demo</a></p></div>
|
||||
</blockquote></div><br></div>"""
|
||||
|
||||
GMAIL_1_IN = ['Ok for me. I am replying directly in gmail, without signature.']
|
||||
GMAIL_1_OUT = ['Administrator', 'Log in our portal at:']
|
||||
|
||||
THUNDERBIRD_1 = """<div>On 11/08/2012 05:29 PM,
|
||||
<a href="mailto:dummy@example.com">dummy@example.com</a> wrote:<br></div>
|
||||
<blockquote>
|
||||
<div>I contact you about our meeting for tomorrow. Here is the
|
||||
schedule I propose:</div>
|
||||
<div>
|
||||
<ul><li>9 AM: brainstorming about our new amazing business
|
||||
app</span></li></li>
|
||||
<li>9.45 AM: summary</li>
|
||||
<li>10 AM: meeting with Fabien to present our app</li>
|
||||
</ul></div>
|
||||
<div>Is everything ok for you ?</div>
|
||||
<div>
|
||||
<p>--<br>
|
||||
Administrator</p>
|
||||
</div>
|
||||
<div>
|
||||
<p>Log in our portal at:
|
||||
<a href="http://localhost:8069#action=login&db=mail_1&token=rHdWcUART5PhEnJRaXjH">http://localhost:8069#action=login&db=mail_1&token=rHdWcUART5PhEnJRaXjH</a></p>
|
||||
</div>
|
||||
</blockquote>
|
||||
Ok for me. I am replying directly below your mail, using Thunderbird, with a signature.<br><br>
|
||||
Did you receive my email about my new laptop, by the way ?<br><br>
|
||||
Raoul.<br><pre>--
|
||||
Raoul Grosbedonnée
|
||||
</pre>"""
|
||||
|
||||
THUNDERBIRD_1_IN = ['Ok for me. I am replying directly below your mail, using Thunderbird, with a signature.']
|
||||
THUNDERBIRD_1_OUT = ['I contact you about our meeting for tomorrow.', 'Raoul Grosbedon']
|
||||
|
||||
HOTMAIL_1 = """<div>
|
||||
<div dir="ltr"><br>
|
||||
I have an amazing company, i'm learning OpenERP, it is a small company yet, but plannig to grow up quickly.
|
||||
<br> <br>Kindest regards,<br>xxx<br>
|
||||
<div>
|
||||
<div id="SkyDrivePlaceholder">
|
||||
</div>
|
||||
<hr id="stopSpelling">
|
||||
Subject: Re: your OpenERP.com registration<br>From: xxx@xxx.xxx<br>To: xxx@xxx.xxx<br>Date: Wed, 27 Mar 2013 17:12:12 +0000
|
||||
<br><br>
|
||||
Hello xxx,
|
||||
<br>
|
||||
I noticed you recently created an OpenERP.com account to access OpenERP Apps.
|
||||
<br>
|
||||
You indicated that you wish to use OpenERP in your own company.
|
||||
We would like to know more about your your business needs and requirements, and see how
|
||||
we can help you. When would you be available to discuss your project ?<br>
|
||||
Best regards,<br>
|
||||
<pre>
|
||||
<a href="http://openerp.com" target="_blank">http://openerp.com</a>
|
||||
Belgium: +32.81.81.37.00
|
||||
U.S.: +1 (650) 307-6736
|
||||
India: +91 (79) 40 500 100
|
||||
</pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>"""
|
||||
|
||||
HOTMAIL_1_IN = ["I have an amazing company, i'm learning OpenERP, it is a small company yet, but plannig to grow up quickly."]
|
||||
HOTMAIL_1_OUT = ["Subject: Re: your OpenERP.com registration", " I noticed you recently created an OpenERP.com account to access OpenERP Apps.",
|
||||
"We would like to know more about your your business needs and requirements", "Belgium: +32.81.81.37.00"]
|
||||
|
||||
MSOFFICE_1 = """
|
||||
<div>
|
||||
<div class="WordSection1">
|
||||
<p class="MsoNormal">
|
||||
<span style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D">
|
||||
Our requirements are simple. Just looking to replace some spreadsheets for tracking quotes and possibly using the timecard module.
|
||||
We are a company of 25 engineers providing product design services to clients.
|
||||
</span>
|
||||
</p>
|
||||
<p></p>
|
||||
<p></p>
|
||||
<p class="MsoNormal">
|
||||
<span style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D">
|
||||
I’ll install on a windows server and run a very limited trial to see how it works.
|
||||
If we adopt OpenERP we will probably move to Linux or look for a hosted SaaS option.
|
||||
</span>
|
||||
</p>
|
||||
<p></p>
|
||||
<p></p>
|
||||
<p class="MsoNormal">
|
||||
<span style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D">
|
||||
<br>
|
||||
I am also evaluating Adempiere and maybe others.
|
||||
</span>
|
||||
</p>
|
||||
<p></p>
|
||||
<p></p>
|
||||
<p class="MsoNormal">
|
||||
<span style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D">
|
||||
</span>
|
||||
</p>
|
||||
<p> </p>
|
||||
<p></p>
|
||||
<p class="MsoNormal">
|
||||
<span style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D">
|
||||
I expect the trial will take 2-3 months as this is not a high priority for us.
|
||||
</span>
|
||||
</p>
|
||||
<p></p>
|
||||
<p></p>
|
||||
<p class="MsoNormal">
|
||||
<span style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D">
|
||||
</span>
|
||||
</p>
|
||||
<p> </p>
|
||||
<p></p>
|
||||
<p class="MsoNormal">
|
||||
<span style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D">
|
||||
Alan
|
||||
</span>
|
||||
</p>
|
||||
<p></p>
|
||||
<p></p>
|
||||
<p class="MsoNormal">
|
||||
<span style="font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D">
|
||||
</span>
|
||||
</p>
|
||||
<p> </p>
|
||||
<p></p>
|
||||
<div>
|
||||
<div style="border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0in 0in 0in">
|
||||
<p class="MsoNormal">
|
||||
<b><span style="font-size:10.0pt;font-family:"Tahoma","sans-serif"">
|
||||
From:
|
||||
</span></b>
|
||||
<span style="font-size:10.0pt;font-family:"Tahoma","sans-serif"">
|
||||
OpenERP Enterprise [mailto:sales@openerp.com]
|
||||
<br><b>Sent:</b> Monday, 11 March, 2013 14:47<br><b>To:</b> Alan Widmer<br><b>Subject:</b> Re: your OpenERP.com registration
|
||||
</span>
|
||||
</p>
|
||||
<p></p>
|
||||
<p></p>
|
||||
</div>
|
||||
</div>
|
||||
<p class="MsoNormal"></p>
|
||||
<p> </p>
|
||||
<p>Hello Alan Widmer, </p>
|
||||
<p></p>
|
||||
<p>I noticed you recently downloaded OpenERP. </p>
|
||||
<p></p>
|
||||
<p>
|
||||
Uou mentioned you wish to use OpenERP in your own company. Please let me more about your
|
||||
business needs and requirements? When will you be available to discuss about your project?
|
||||
</p>
|
||||
<p></p>
|
||||
<p>Thanks for your interest in OpenERP, </p>
|
||||
<p></p>
|
||||
<p>Feel free to contact me if you have any questions, </p>
|
||||
<p></p>
|
||||
<p>Looking forward to hear from you soon. </p>
|
||||
<p></p>
|
||||
<pre><p> </p></pre>
|
||||
<pre>--<p></p></pre>
|
||||
<pre>Nicolas<p></p></pre>
|
||||
<pre><a href="http://openerp.com">http://openerp.com</a><p></p></pre>
|
||||
<pre>Belgium: +32.81.81.37.00<p></p></pre>
|
||||
<pre>U.S.: +1 (650) 307-6736<p></p></pre>
|
||||
<pre>India: +91 (79) 40 500 100<p></p></pre>
|
||||
<pre> <p></p></pre>
|
||||
</div>
|
||||
</div>"""
|
||||
|
||||
MSOFFICE_1_IN = ['Our requirements are simple. Just looking to replace some spreadsheets for tracking quotes and possibly using the timecard module.']
|
||||
MSOFFICE_1_OUT = ['I noticed you recently downloaded OpenERP.', 'Uou mentioned you wish to use OpenERP in your own company.']
|
|
@ -52,7 +52,7 @@ def html_sanitize(src):
|
|||
# html encode email tags
|
||||
part = re.compile(r"(<(([^a<>]|a[^<>\s])[^<>]*)@[^<>]+>)", re.IGNORECASE | re.DOTALL)
|
||||
src = part.sub(lambda m: cgi.escape(m.group(1)), src)
|
||||
|
||||
|
||||
# some corner cases make the parser crash (such as <SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT> in test_mail)
|
||||
try:
|
||||
cleaner = clean.Cleaner(page_structure=True, style=False, safe_attrs_only=False, forms=False, kill_tags=tags_to_kill, remove_tags=tags_to_remove)
|
||||
|
@ -60,11 +60,14 @@ def html_sanitize(src):
|
|||
except TypeError, e:
|
||||
# lxml.clean version < 2.3.1 does not have a kill_tags attribute
|
||||
# to remove in 2014
|
||||
cleaner = clean.Cleaner(page_structure=True, style=False, safe_attrs_only=False, forms=False, remove_tags=tags_to_kill+tags_to_remove)
|
||||
cleaner = clean.Cleaner(page_structure=True, style=False, safe_attrs_only=False, forms=False, remove_tags=tags_to_kill + tags_to_remove)
|
||||
cleaned = cleaner.clean_html(src)
|
||||
except:
|
||||
_logger.warning('html_sanitize failed to parse %s' % (src))
|
||||
cleaned = '<p>Impossible to parse</p>'
|
||||
except etree.ParserError, e:
|
||||
_logger.warning('html_sanitize: ParserError "%s" obtained when sanitizing "%s"' % (e, src))
|
||||
cleaned = '<p>ParserError when sanitizing</p>'
|
||||
except Exception, e:
|
||||
_logger.warning('html_sanitize: unknown error "%s" obtained when sanitizing "%s"' % (e, src))
|
||||
cleaned = '<p>Unknown error when sanitizing</p>'
|
||||
return cleaned
|
||||
|
||||
|
||||
|
@ -72,7 +75,7 @@ def html_sanitize(src):
|
|||
# HTML Cleaner
|
||||
#----------------------------------------------------------
|
||||
|
||||
def html_email_clean(html):
|
||||
def html_email_clean(html, remove_unwanted=False, use_max_length=False, max_length=300):
|
||||
""" html_email_clean: clean the html to display in the web client.
|
||||
- strip email quotes (remove blockquote nodes)
|
||||
- strip signatures (remove --\n{\n)Blahblah), by replacing <br> by
|
||||
|
@ -83,6 +86,8 @@ def html_email_clean(html):
|
|||
html code coming from a sanitized source, like fields.html.
|
||||
"""
|
||||
def _replace_matching_regex(regex, source, replace=''):
|
||||
if not source:
|
||||
return source
|
||||
dest = ''
|
||||
idx = 0
|
||||
for item in re.finditer(regex, source):
|
||||
|
@ -91,63 +96,114 @@ def html_email_clean(html):
|
|||
dest += source[idx:]
|
||||
return dest
|
||||
|
||||
def _tag_matching_regex_in_text(regex, node, new_node_tag='span', new_node_attrs=None):
|
||||
# print '\t_tag_matching_regex_in_text'
|
||||
text = node.text or ''
|
||||
node.text = ''
|
||||
cur_node = node
|
||||
idx = 0
|
||||
caca = 0
|
||||
for item in re.finditer(regex, text):
|
||||
# print '\t\tfound', item.start(), item.end(), '-', text[item.start():item.end()], '-'
|
||||
if caca == 0:
|
||||
cur_node.text = text[idx:item.start()]
|
||||
else:
|
||||
cur_node.tail = text[idx:item.start()]
|
||||
|
||||
# create element
|
||||
new_node = etree.Element(new_node_tag)
|
||||
new_node.text = text[item.start():item.end()]
|
||||
for key, val in new_node_attrs.iteritems():
|
||||
new_node.set(key, val)
|
||||
|
||||
# insert element in DOM
|
||||
node.insert(caca, new_node)
|
||||
cur_node = new_node
|
||||
idx = item.end()
|
||||
caca += 1
|
||||
if caca == 0:
|
||||
cur_node.text = (cur_node.text or '') + text[idx:]
|
||||
else:
|
||||
cur_node.tail = text[idx:] + (cur_node.tail or '')
|
||||
|
||||
if not html or not isinstance(html, basestring):
|
||||
return html
|
||||
|
||||
html = ustr(html)
|
||||
|
||||
# 0. remove encoding attribute inside tags
|
||||
# Pre processing
|
||||
# ------------------------------------------------------------
|
||||
|
||||
# --- MAIL ORIGINAL ---: '[\-]{4,}([^\-]*)[\-]{4,}'
|
||||
|
||||
# html: remove encoding attribute inside tags
|
||||
doctype = re.compile(r'(<[^>]*\s)(encoding=(["\'][^"\']*?["\']|[^\s\n\r>]+)(\s[^>]*|/)?>)', re.IGNORECASE | re.DOTALL)
|
||||
html = doctype.sub(r"", html)
|
||||
|
||||
# 1. <br[ /]> -> \n, because otherwise the tree is obfuscated
|
||||
# html: ClEditor seems to love using <div><br /><div> -> replace with <br />
|
||||
br_div_tags = re.compile(r'(<div>\s*<br\s*\/>\s*<\/div>)')
|
||||
html = _replace_matching_regex(br_div_tags, html, '<br />')
|
||||
|
||||
# html: <br[ /]> -> \n, to de-obfuscate the tree
|
||||
br_tags = re.compile(r'([<]\s*[bB][rR]\s*\/?[>])')
|
||||
html = _replace_matching_regex(br_tags, html, '__BR_TAG__')
|
||||
|
||||
# 2. form a tree, handle (currently ?) pure-text by enclosing them in a pre
|
||||
# form a tree
|
||||
root = lxml.html.fromstring(html)
|
||||
if not len(root) and root.text is None and root.tail is None:
|
||||
html = '<div>%s</div>' % html
|
||||
root = lxml.html.fromstring(html)
|
||||
|
||||
# 2.5 remove quoted text in nodes
|
||||
# form node and tag text-based quotes and signature
|
||||
quote_tags = re.compile(r'(\n(>)+[^\n\r]*)')
|
||||
for node in root.getiterator():
|
||||
if not node.text:
|
||||
continue
|
||||
node.text = _replace_matching_regex(quote_tags, node.text)
|
||||
|
||||
# 3. remove blockquotes
|
||||
quotes = [el for el in root.getiterator(tag='blockquote')]
|
||||
for node in quotes:
|
||||
# copy the node tail into parent text
|
||||
if node.tail:
|
||||
parent = node.getparent()
|
||||
parent.text = parent.text or '' + node.tail
|
||||
# remove the node
|
||||
node.getparent().remove(node)
|
||||
|
||||
# 4. strip signatures
|
||||
signature = re.compile(r'([-]{2}[\s]?[\r\n]{1,2}[^\z]+)')
|
||||
for elem in root.getiterator():
|
||||
if elem.text:
|
||||
match = re.search(signature, elem.text)
|
||||
if match:
|
||||
elem.text = elem.text[:match.start()] + elem.text[match.end():]
|
||||
if elem.tail:
|
||||
match = re.search(signature, elem.tail)
|
||||
if match:
|
||||
elem.tail = elem.tail[:match.start()] + elem.tail[match.end():]
|
||||
for node in root.getiterator():
|
||||
_tag_matching_regex_in_text(quote_tags, node, 'span', {'text_quote': '1'})
|
||||
_tag_matching_regex_in_text(signature, node, 'span', {'text_signature': '1'})
|
||||
|
||||
# 5. \n back to <br/>
|
||||
# Processing
|
||||
# ------------------------------------------------------------
|
||||
|
||||
# tree: tag nodes
|
||||
quote_begin = False
|
||||
for node in root.getiterator():
|
||||
if node.get('class') in ['WordSection1', 'MsoNormal']:
|
||||
root.set('msoffice', '1')
|
||||
if node.get('class') in ['SkyDrivePlaceholder'] or node.get('id') in ['SkyDrivePlaceholder']:
|
||||
root.set('hotmail', '1')
|
||||
|
||||
if quote_begin:
|
||||
node.set('quote', '1')
|
||||
|
||||
if root.get('msoffice') and node.tag == 'div' and 'border-top:solid' in node.get('style', ''):
|
||||
quote_begin = True
|
||||
if root.get('hotmail') and node.tag == 'hr' and ('stopSpelling' in node.get('class', '') or 'stopSpelling' in node.get('id', '')):
|
||||
quote_begin = True
|
||||
|
||||
if node.tag == 'blockquote' or node.get('text_quote') or node.get('text_signature'):
|
||||
node.set('remove', '1')
|
||||
if quote_begin:
|
||||
node.set('remove', '1')
|
||||
node.set('tail_remove', '1')
|
||||
|
||||
# Post processing
|
||||
# ------------------------------------------------------------
|
||||
|
||||
if remove_unwanted:
|
||||
to_delete = []
|
||||
for node in root.getiterator():
|
||||
if node.get('remove'):
|
||||
# copy the node tail into parent text
|
||||
if node.tail and not node.get('tail_remove'):
|
||||
parent = node.getparent()
|
||||
parent.tail = node.tail + (parent.tail or '')
|
||||
to_delete.append(node)
|
||||
for node in to_delete:
|
||||
node.getparent().remove(node)
|
||||
|
||||
# html: \n back to <br/>
|
||||
html = etree.tostring(root, pretty_print=True)
|
||||
html = html.replace('__BR_TAG__', '<br />')
|
||||
|
||||
# 6. Misc cleaning :
|
||||
# - ClEditor seems to love using <div><br /><div> -> replace with <br />
|
||||
br_div_tags = re.compile(r'(<div>\s*<br\s*\/>\s*<\/div>)')
|
||||
html = _replace_matching_regex(br_div_tags, html, '<br />')
|
||||
|
||||
return html
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue