[IMP] tools: mail: improved append_content_to_html, including plaintext2html. Updated tests.
bzr revid: tde@openerp.com-20121109123521-e04g4zrhtv947l1r
This commit is contained in:
parent
0d027a3f8b
commit
62c419e66f
|
@ -3,7 +3,6 @@
|
||||||
<data noupdate="1">
|
<data noupdate="1">
|
||||||
<record id="partner_demo" model="res.partner">
|
<record id="partner_demo" model="res.partner">
|
||||||
<field name="name">Demo User</field>
|
<field name="name">Demo User</field>
|
||||||
<field name="email">demo@example.com</field>
|
|
||||||
<field name="company_id" ref="main_company"/>
|
<field name="company_id" ref="main_company"/>
|
||||||
<field name="customer" eval="False"/>
|
<field name="customer" eval="False"/>
|
||||||
<field name="email">demo@example.com</field>
|
<field name="email">demo@example.com</field>
|
||||||
|
|
|
@ -44,8 +44,8 @@ import openerp
|
||||||
import openerp.tools as tools
|
import openerp.tools as tools
|
||||||
from openerp.tools.translate import _
|
from openerp.tools.translate import _
|
||||||
from openerp.tools import float_round, float_repr
|
from openerp.tools import float_round, float_repr
|
||||||
|
from openerp.tools import html_sanitize
|
||||||
import simplejson
|
import simplejson
|
||||||
from openerp.tools.mail import html_sanitize
|
|
||||||
from openerp import SUPERUSER_ID
|
from openerp import SUPERUSER_ID
|
||||||
|
|
||||||
_logger = logging.getLogger(__name__)
|
_logger = logging.getLogger(__name__)
|
||||||
|
|
|
@ -23,9 +23,9 @@
|
||||||
##############################################################################
|
##############################################################################
|
||||||
|
|
||||||
import unittest2
|
import unittest2
|
||||||
from openerp.tools.mail import html_sanitize, html_email_clean, append_content_to_html, text2html
|
from openerp.tools import html_sanitize, html_email_clean, append_content_to_html, plaintext2html
|
||||||
|
|
||||||
test_case = """
|
HTML_SOURCE = """
|
||||||
<font size="2" style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; ">test1</font>
|
<font size="2" style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; ">test1</font>
|
||||||
<div style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; font-size: 12px; font-style: normal; ">
|
<div style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; font-size: 12px; font-style: normal; ">
|
||||||
<b>test2</b></div><div style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; font-size: 12px; ">
|
<b>test2</b></div><div style="color: rgb(31, 31, 31); font-family: monospace; font-variant: normal; line-height: normal; font-size: 12px; ">
|
||||||
|
@ -43,88 +43,65 @@ test12</font></div><div><font color="#1f1f1f" face="monospace" size="2"><br></fo
|
||||||
<a href="javascript:alert('malicious code')">test link</a>
|
<a href="javascript:alert('malicious code')">test link</a>
|
||||||
"""
|
"""
|
||||||
|
|
||||||
GMAIL_REPLY_SAN = """<div>Réponse via thunderbird, classique.<br><br>
|
TEXT_MAIL1 = """I contact you about our meeting for tomorrow. Here is the schedule I propose:
|
||||||
On 11/05/2012 10:51 AM, Raoul Tartopoils wrote:<br></div>
|
9 AM: brainstorming about our new amazing business app</span></li>
|
||||||
<blockquote>
|
9.45 AM: summary
|
||||||
<div>Plop !</div>
|
10 AM: meeting with Fabien to present our app
|
||||||
<ul><li>Vive les lapins rapides !<br></li>
|
Is everything ok for you ?
|
||||||
<li>Nouille</li>
|
--
|
||||||
<li>Frites</li>
|
Administrator"""
|
||||||
</ul><div><br></div>
|
|
||||||
<div>Clairement, hein ?</div>
|
|
||||||
-- <br>
|
|
||||||
Raoul Tartopoils<br></blockquote>
|
|
||||||
<br><br><pre>--
|
|
||||||
Raoul Tartopoils
|
|
||||||
</pre>"""
|
|
||||||
|
|
||||||
GMAIL_REPLY2_SAN = """<div>Je réponds, hop, via thunderbird. Mais
|
HTML_MAIL1 = """<div>
|
||||||
je vais répodnre aussi au milieu du thread.<br><br>
|
<font><span>I contact you about our meeting for tomorrow. Here is the schedule I propose:</span></font>
|
||||||
On 11/05/2012 10:53 AM, Raoul Tartopoils wrote:<br></div>
|
</div>
|
||||||
<blockquote>Reply rapide de gmail.</blockquote>
|
<div><ul>
|
||||||
<br>
|
<li><span>9 AM: brainstorming about our new amazing business app</span></li>
|
||||||
Jamais.<br><br><blockquote>
|
<li><span>9.45 AM: summary</span></li>
|
||||||
<div><br><br><div>2012/11/5 Thibault Delavallée <span><<a href="mailto:tde@openerp.com">tde@openerp.com</a>></span><br><blockquote>
|
<li><span>10 AM: meeting with Fabien to present our app</span></li>
|
||||||
<div>
|
</ul></div>
|
||||||
<div>Réponse via thunderbird, classique.
|
<div><font><span>Is everything ok for you ?</span></font></div>"""
|
||||||
<div>
|
|
||||||
<div><br><br>
|
GMAIL_REPLY1_SAN = """Hello,<div><br></div><div>Ok for me. I am replying directly in gmail, without signature.</div><div><br></div><div>Kind regards,</div><div><br></div><div>Demo.<br><br><div>On Thu, Nov 8, 2012 at 5:29 PM, <span><<a href="mailto:dummy@example.com">dummy@example.com</a>></span> wrote:<br><blockquote><div>I contact you about our meeting for tomorrow. Here is the schedule I propose:</div><div><ul><li>9 AM: brainstorming about our new amazing business app</span></li></li>
|
||||||
On 11/05/2012 10:51 AM, Raoul Tartopoils wrote:<br></div>
|
<li>9.45 AM: summary</li><li>10 AM: meeting with Fabien to present our app</li></ul></div><div>Is everything ok for you ?</div>
|
||||||
</div>
|
<div><p>--<br>Administrator</p></div>
|
||||||
</div>
|
|
||||||
<div>
|
<div><p>Log in our portal at: <a href="http://localhost:8069#action=login&db=mail_1&login=demo">http://localhost:8069#action=login&db=mail_1&login=demo</a></p></div>
|
||||||
<div>
|
</blockquote></div><br></div>"""
|
||||||
<blockquote>
|
|
||||||
<div>Plop !</div>
|
THUNDERBIRD_16_REPLY1_SAN = """ <div>On 11/08/2012 05:29 PM,
|
||||||
<ul><li>Vive les lapins rapides !<br></li>
|
<a href="mailto:dummy@example.com">dummy@example.com</a> wrote:<br></div>
|
||||||
<li>Nouille</li>
|
<blockquote>
|
||||||
</ul></blockquote>
|
<div>I contact you about our meeting for tomorrow. Here is the
|
||||||
</div>
|
schedule I propose:</div>
|
||||||
</div>
|
<div>
|
||||||
</div>
|
<ul><li>9 AM: brainstorming about our new amazing business
|
||||||
</blockquote>
|
app</span></li></li>
|
||||||
</div>
|
<li>9.45 AM: summary</li>
|
||||||
|
<li>10 AM: meeting with Fabien to present our app</li>
|
||||||
|
</ul></div>
|
||||||
|
<div>Is everything ok for you ?</div>
|
||||||
|
<div>
|
||||||
|
<p>--<br>
|
||||||
|
Administrator</p>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<p>Log in our portal at:
|
||||||
|
<a href="http://localhost:8069#action=login&db=mail_1&token=rHdWcUART5PhEnJRaXjH">http://localhost:8069#action=login&db=mail_1&token=rHdWcUART5PhEnJRaXjH</a></p>
|
||||||
</div>
|
</div>
|
||||||
</blockquote>
|
</blockquote>
|
||||||
je rajotuerais bien pommes de terre dans la liste.<br><blockquote>
|
Ok for me. I am replying directly below your mail, using
|
||||||
<div>
|
Thunderbird, with a signature.<br><br>
|
||||||
<div>
|
Did you receive my email about my new laptop, by the way ?<br><br>
|
||||||
<blockquote>
|
Raoul.<br><pre>--
|
||||||
<div>
|
Raoul Grosbedonnée
|
||||||
<div>
|
|
||||||
<div>
|
|
||||||
<blockquote>
|
|
||||||
<ul><li>Frites</li>
|
|
||||||
</ul><div><br></div>
|
|
||||||
<div>Clairement, hein ?</div>
|
|
||||||
-- <br>
|
|
||||||
Raoul Tartopoils<br></blockquote>
|
|
||||||
<br><br></div>
|
|
||||||
</div>
|
|
||||||
<span><font>
|
|
||||||
<pre>--
|
|
||||||
Raoul Tartopoils
|
|
||||||
</pre>
|
|
||||||
</font></span></div>
|
|
||||||
</blockquote>
|
|
||||||
</div>
|
|
||||||
<br><br><div><br></div>
|
|
||||||
-- <br>
|
|
||||||
Raoul Tartopoils<br></div>
|
|
||||||
</blockquote>
|
|
||||||
<br><br><pre>--
|
|
||||||
Raoul Tartopoils
|
|
||||||
</pre>"""
|
</pre>"""
|
||||||
|
|
||||||
|
|
||||||
TEXT_TPL = """Salut Raoul!
|
TEXT_TPL = """Salut Raoul!
|
||||||
Le 28 oct. 2012 à 00:02, Raoul Grosbedon a écrit :
|
Le 28 oct. 2012 à 00:02, Raoul Grosbedon a écrit :
|
||||||
|
|
||||||
> C'est sûr que je suis intéressé (quote)!
|
> C'est sûr que je suis intéressé (quote)!
|
||||||
|
|
||||||
Trouloulou pouet pouet.
|
Trouloulou pouet pouet. Je ne vais quand même pas écrire de vrais mails, non mais ho.
|
||||||
|
|
||||||
Je ne vais quand même pas écrire de vrais mails, non mais ho.
|
|
||||||
|
|
||||||
> 2012/10/27 Bert Tartopoils :
|
> 2012/10/27 Bert Tartopoils :
|
||||||
>> Diantre, me disè-je en envoyant un message similaire à Martine, mais comment vas-tu (quote)?
|
>> Diantre, me disè-je en envoyant un message similaire à Martine, mais comment vas-tu (quote)?
|
||||||
|
@ -138,7 +115,6 @@ Je ne vais quand même pas écrire de vrais mails, non mais ho.
|
||||||
>>
|
>>
|
||||||
>
|
>
|
||||||
>
|
>
|
||||||
>
|
|
||||||
> --
|
> --
|
||||||
> Raoul Grosbedon
|
> Raoul Grosbedon
|
||||||
|
|
||||||
|
@ -147,21 +123,8 @@ bert.tartopoils@miam.miam
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
class TestAppendContentToHtml(unittest2.TestCase):
|
|
||||||
""" Test some of our generic utility functions """
|
|
||||||
|
|
||||||
def test_append_to_html(self):
|
|
||||||
test_samples = [
|
|
||||||
('<!DOCTYPE...><HTML encoding="blah">some <b>content</b></HtMl>', '--\nYours truly', True,
|
|
||||||
'<!DOCTYPE...><html encoding="blah">some <b>content</b>\n<pre>--\nYours truly</pre>\n</html>'),
|
|
||||||
('<html><body>some <b>content</b></body></html>', '<!DOCTYPE...>\n<html><body>\n<p>--</p>\n<p>Yours truly</p>\n</body>\n</html>', False,
|
|
||||||
'<html><body>some <b>content</b>\n\n\n<p>--</p>\n<p>Yours truly</p>\n\n\n</body></html>'),
|
|
||||||
]
|
|
||||||
for html, content, flag, expected in test_samples:
|
|
||||||
self.assertEqual(append_content_to_html(html, content, flag), expected, 'append_content_to_html is broken')
|
|
||||||
|
|
||||||
|
|
||||||
class TestSanitizer(unittest2.TestCase):
|
class TestSanitizer(unittest2.TestCase):
|
||||||
|
""" Test the html sanitizer """
|
||||||
# TDE note: could be improved by actually checking the output
|
# TDE note: could be improved by actually checking the output
|
||||||
|
|
||||||
def test_simple(self):
|
def test_simple(self):
|
||||||
|
@ -173,33 +136,67 @@ class TestSanitizer(unittest2.TestCase):
|
||||||
self.assertEqual(x, html_sanitize(x))
|
self.assertEqual(x, html_sanitize(x))
|
||||||
|
|
||||||
def test_no_exception(self):
|
def test_no_exception(self):
|
||||||
html_sanitize(test_case)
|
html_sanitize(HTML_SOURCE)
|
||||||
|
|
||||||
def test_unicode(self):
|
def test_unicode(self):
|
||||||
html_sanitize("Merci à l'intérêt pour notre produit.nous vous contacterons bientôt. Merci")
|
html_sanitize("Merci à l'intérêt pour notre produit.nous vous contacterons bientôt. Merci")
|
||||||
|
|
||||||
|
|
||||||
class TestCleaner(unittest2.TestCase):
|
class TestCleaner(unittest2.TestCase):
|
||||||
|
""" Test the email cleaner function that filter the content of incoming emails """
|
||||||
|
|
||||||
def test_gmail(self):
|
def test_html_email_clean(self):
|
||||||
# Test1: blahblah
|
# Test1: reply through gmail: quote in blockquote, signature --\nAdministrator
|
||||||
new_html = html_email_clean(GMAIL_REPLY_SAN)
|
new_html = html_email_clean(GMAIL_REPLY1_SAN)
|
||||||
self.assertNotIn(new_html, 'blockquote')
|
self.assertNotIn('blockquote', new_html, 'html_email_cleaner did not remove a blockquote')
|
||||||
self.assertNotIn(new_html, 'Vive les lapins rapides !')
|
self.assertNotIn('I contact you about our meeting', new_html, 'html_email_cleaner wrongly removed the quoted content')
|
||||||
self.assertNotIn(new_html, 'Bert Tartopoils')
|
self.assertNotIn('Administrator', new_html, 'html_email_cleaner did not erase the signature')
|
||||||
|
self.assertIn('Ok for me', new_html, 'html_email_cleaner erased too much content')
|
||||||
|
|
||||||
|
# Test2: reply through Tunderbird 16.0.2
|
||||||
|
new_html = html_email_clean(THUNDERBIRD_16_REPLY1_SAN)
|
||||||
|
self.assertNotIn('blockquote', new_html, 'html_email_cleaner did not remove a blockquote')
|
||||||
|
self.assertNotIn('I contact you about our meeting', new_html, 'html_email_cleaner wrongly removed the quoted content')
|
||||||
|
self.assertNotIn('Administrator', new_html, 'html_email_cleaner did not erase the signature')
|
||||||
|
self.assertNotIn('Grosbedonn', new_html, 'html_email_cleaner did not erase the signature')
|
||||||
|
self.assertIn('Ok for me', new_html, 'html_email_cleaner erased too much content')
|
||||||
|
|
||||||
|
# Test3: text email
|
||||||
|
new_html = html_email_clean(TEXT_MAIL1)
|
||||||
|
self.assertIn('I contact you about our meeting', new_html, 'html_email_cleaner wrongly removed the quoted content')
|
||||||
|
self.assertNotIn('Administrator', new_html, 'html_email_cleaner did not erase the signature')
|
||||||
|
|
||||||
|
# Test4: more complex text email
|
||||||
|
new_html = html_email_clean(TEXT_TPL)
|
||||||
|
self.assertNotIn('quote', new_html, 'html_email_cleaner did not remove correctly plaintext quotes')
|
||||||
|
|
||||||
|
|
||||||
class TestText2Html(unittest2.TestCase):
|
class TestAppendContentToHtml(unittest2.TestCase):
|
||||||
|
""" Test some of our generic utility functions about html """
|
||||||
|
|
||||||
def test_text2html(self):
|
def test_plaintext2html(self):
|
||||||
cases = [
|
cases = [
|
||||||
("First \nSecond \nThird\n \nParagraph\n\r--\nSignature paragraph", 'div',
|
("First \nSecond \nThird\n \nParagraph\n\r--\nSignature paragraph", 'div',
|
||||||
"<div><p>First <br/>Second <br/>Third</p><p>Paragraph</p><p>--<br/>Signature paragraph</p></div>"),
|
"<div><p>First <br/>Second <br/>Third</p><p>Paragraph</p><p>--<br/>Signature paragraph</p></div>"),
|
||||||
|
("First<p>It should be escaped</p>\nSignature", False,
|
||||||
|
"<p>First<p>It should be escaped</p><br/>Signature</p>")
|
||||||
]
|
]
|
||||||
for content, container_tag, expected in cases:
|
for content, container_tag, expected in cases:
|
||||||
html = text2html(content, container_tag)
|
html = plaintext2html(content, container_tag)
|
||||||
self.assertEqual(html, expected, 'text2html is broken')
|
self.assertEqual(html, expected, 'text2html is broken')
|
||||||
|
|
||||||
|
def test_append_to_html(self):
|
||||||
|
test_samples = [
|
||||||
|
('<!DOCTYPE...><HTML encoding="blah">some <b>content</b></HtMl>', '--\nYours truly', True, True, False,
|
||||||
|
'<!DOCTYPE...><html encoding="blah">some <b>content</b>\n<pre>--\nYours truly</pre>\n</html>'),
|
||||||
|
('<!DOCTYPE...><HTML encoding="blah">some <b>content</b></HtMl>', '--\nYours truly', True, False, False,
|
||||||
|
'<!DOCTYPE...><html encoding="blah">some <b>content</b>\n<p>--<br/>Yours truly</p>\n</html>'),
|
||||||
|
('<html><body>some <b>content</b></body></html>', '<!DOCTYPE...>\n<html><body>\n<p>--</p>\n<p>Yours truly</p>\n</body>\n</html>', False, False, False,
|
||||||
|
'<html><body>some <b>content</b>\n\n\n<p>--</p>\n<p>Yours truly</p>\n\n\n</body></html>'),
|
||||||
|
]
|
||||||
|
for html, content, plaintext_flag, preserve_flag, container_tag, expected in test_samples:
|
||||||
|
self.assertEqual(append_content_to_html(html, content, plaintext_flag, preserve_flag, container_tag), expected, 'append_content_to_html is broken')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest2.main()
|
unittest2.main()
|
||||||
|
|
|
@ -121,6 +121,7 @@ def html_email_clean(html):
|
||||||
be present in the html string. This method therefore takes as input
|
be present in the html string. This method therefore takes as input
|
||||||
html code coming from a sanitized source, like fields.html.
|
html code coming from a sanitized source, like fields.html.
|
||||||
"""
|
"""
|
||||||
|
html = ustr(html)
|
||||||
modified_html = ''
|
modified_html = ''
|
||||||
|
|
||||||
# 1. <br[ /]> -> \n, because otherwise the tree is obfuscated
|
# 1. <br[ /]> -> \n, because otherwise the tree is obfuscated
|
||||||
|
@ -131,6 +132,7 @@ def html_email_clean(html):
|
||||||
idx = item.end()
|
idx = item.end()
|
||||||
modified_html += html[idx:]
|
modified_html += html[idx:]
|
||||||
html = modified_html
|
html = modified_html
|
||||||
|
# TDE note: seems to have lots of <div><br></div> in emails... needs to be checks, could be cleaned
|
||||||
|
|
||||||
# 2. form a tree, handle (currently ?) pure-text by enclosing them in a pre
|
# 2. form a tree, handle (currently ?) pure-text by enclosing them in a pre
|
||||||
root = lxml.html.fromstring(html)
|
root = lxml.html.fromstring(html)
|
||||||
|
@ -138,9 +140,28 @@ def html_email_clean(html):
|
||||||
html = '<div>%s</div>' % html
|
html = '<div>%s</div>' % html
|
||||||
root = lxml.html.fromstring(html)
|
root = lxml.html.fromstring(html)
|
||||||
|
|
||||||
|
# 2.5 remove quoted text in nodes
|
||||||
|
quote_tags = re.compile(r'(\n(>)+[^\n\r]*)')
|
||||||
|
for node in root.getiterator():
|
||||||
|
if not node.text:
|
||||||
|
continue
|
||||||
|
idx = 0
|
||||||
|
text = ''
|
||||||
|
for item in re.finditer(quote_tags, node.text):
|
||||||
|
print item
|
||||||
|
text += node.text[idx:item.start()]
|
||||||
|
idx = item.end()
|
||||||
|
text += node.text[idx:]
|
||||||
|
node.text = text
|
||||||
|
|
||||||
# 3. remove blockquotes
|
# 3. remove blockquotes
|
||||||
quotes = [el for el in root.getiterator(tag='blockquote')]
|
quotes = [el for el in root.getiterator(tag='blockquote')]
|
||||||
for node in quotes:
|
for node in quotes:
|
||||||
|
# copy the node tail into parent text
|
||||||
|
if node.tail:
|
||||||
|
parent = node.getparent()
|
||||||
|
parent.text = parent.text or '' + node.tail
|
||||||
|
# remove the node
|
||||||
node.getparent().remove(node)
|
node.getparent().remove(node)
|
||||||
|
|
||||||
# 4. strip signatures
|
# 4. strip signatures
|
||||||
|
@ -187,9 +208,7 @@ def html2plaintext(html, body_id=None, encoding='utf-8'):
|
||||||
## download here: http://www.peterbe.com/plog/html2plaintext
|
## download here: http://www.peterbe.com/plog/html2plaintext
|
||||||
|
|
||||||
html = ustr(html)
|
html = ustr(html)
|
||||||
|
tree = etree.fromstring(html, parser=etree.HTMLParser())
|
||||||
from lxml.etree import tostring, fromstring, HTMLParser
|
|
||||||
tree = fromstring(html, parser=HTMLParser())
|
|
||||||
|
|
||||||
if body_id is not None:
|
if body_id is not None:
|
||||||
source = tree.xpath('//*[@id=%s]' % (body_id,))
|
source = tree.xpath('//*[@id=%s]' % (body_id,))
|
||||||
|
@ -208,7 +227,7 @@ def html2plaintext(html, body_id=None, encoding='utf-8'):
|
||||||
link.text = '%s [%s]' % (link.text, i)
|
link.text = '%s [%s]' % (link.text, i)
|
||||||
url_index.append(url)
|
url_index.append(url)
|
||||||
|
|
||||||
html = ustr(tostring(tree, encoding=encoding))
|
html = ustr(etree.tostring(tree, encoding=encoding))
|
||||||
|
|
||||||
html = html.replace('<strong>', '*').replace('</strong>', '*')
|
html = html.replace('<strong>', '*').replace('</strong>', '*')
|
||||||
html = html.replace('<b>', '*').replace('</b>', '*')
|
html = html.replace('<b>', '*').replace('</b>', '*')
|
||||||
|
@ -233,7 +252,7 @@ def html2plaintext(html, body_id=None, encoding='utf-8'):
|
||||||
|
|
||||||
return html
|
return html
|
||||||
|
|
||||||
def text2html(text, container_tag='div'):
|
def plaintext2html(text, container_tag=False):
|
||||||
""" Convert plaintext into html. Content of the text is escaped to manage
|
""" Convert plaintext into html. Content of the text is escaped to manage
|
||||||
html entities, using cgi.escape().
|
html entities, using cgi.escape().
|
||||||
- all \n,\r are replaced by <br />
|
- all \n,\r are replaced by <br />
|
||||||
|
@ -243,7 +262,7 @@ def text2html(text, container_tag='div'):
|
||||||
:param string container_tag: container of the html; by default the
|
:param string container_tag: container of the html; by default the
|
||||||
content is embedded into a <div>
|
content is embedded into a <div>
|
||||||
"""
|
"""
|
||||||
text = cgi.escape(text)
|
text = cgi.escape(ustr(text))
|
||||||
|
|
||||||
# 1. replace \n and \r
|
# 1. replace \n and \r
|
||||||
text = text.replace('\n', '<br/>')
|
text = text.replace('\n', '<br/>')
|
||||||
|
@ -261,7 +280,45 @@ def text2html(text, container_tag='div'):
|
||||||
# 4. container
|
# 4. container
|
||||||
if container_tag:
|
if container_tag:
|
||||||
final = '<%s>%s</%s>' % (container_tag, final, container_tag)
|
final = '<%s>%s</%s>' % (container_tag, final, container_tag)
|
||||||
return final
|
return ustr(final)
|
||||||
|
|
||||||
|
def append_content_to_html(html, content, plaintext=True, preserve=False, container_tag=False):
|
||||||
|
""" Append extra content at the end of an HTML snippet, trying
|
||||||
|
to locate the end of the HTML document (</body>, </html>, or
|
||||||
|
EOF), and converting the provided content in html unless ``plaintext``
|
||||||
|
is False.
|
||||||
|
Content conversion can be done in two ways:
|
||||||
|
- wrapping it into a pre (preserve=True)
|
||||||
|
- use plaintext2html (preserve=False, using container_tag to wrap the
|
||||||
|
whole content)
|
||||||
|
A side-effect of this method is to coerce all HTML tags to
|
||||||
|
lowercase in ``html``, and strip enclosing <html> or <body> tags in
|
||||||
|
content if ``plaintext`` is False.
|
||||||
|
|
||||||
|
:param str html: html tagsoup (doesn't have to be XHTML)
|
||||||
|
:param str content: extra content to append
|
||||||
|
:param bool plaintext: whether content is plaintext and should
|
||||||
|
be wrapped in a <pre/> tag.
|
||||||
|
:param bool preserve: if content is plaintext, wrap it into a <pre>
|
||||||
|
instead of converting it into html
|
||||||
|
"""
|
||||||
|
html = ustr(html)
|
||||||
|
if plaintext and preserve:
|
||||||
|
content = u'\n<pre>%s</pre>\n' % ustr(content)
|
||||||
|
elif plaintext:
|
||||||
|
content = '\n%s\n' % plaintext2html(content, container_tag)
|
||||||
|
else:
|
||||||
|
content = re.sub(r'(?i)(</?html.*>|</?body.*>|<!\W*DOCTYPE.*>)', '', content)
|
||||||
|
content = u'\n%s\n' % ustr(content)
|
||||||
|
# Force all tags to lowercase
|
||||||
|
html = re.sub(r'(</?)\W*(\w+)([ >])',
|
||||||
|
lambda m: '%s%s%s' % (m.group(1), m.group(2).lower(), m.group(3)), html)
|
||||||
|
insert_location = html.find('</body>')
|
||||||
|
if insert_location == -1:
|
||||||
|
insert_location = html.find('</html>')
|
||||||
|
if insert_location == -1:
|
||||||
|
return '%s%s' % (html, content)
|
||||||
|
return '%s%s%s' % (html[:insert_location], content, html[insert_location:])
|
||||||
|
|
||||||
#----------------------------------------------------------
|
#----------------------------------------------------------
|
||||||
# Emails
|
# Emails
|
||||||
|
@ -339,33 +396,3 @@ def email_split(text):
|
||||||
if not text:
|
if not text:
|
||||||
return []
|
return []
|
||||||
return re.findall(r'([^ ,<@]+@[^> ,]+)', text)
|
return re.findall(r'([^ ,<@]+@[^> ,]+)', text)
|
||||||
|
|
||||||
def append_content_to_html(html, content, plaintext=True):
|
|
||||||
"""Append extra content at the end of an HTML snippet, trying
|
|
||||||
to locate the end of the HTML document (</body>, </html>, or
|
|
||||||
EOF), and wrapping the provided content in a <pre/> block
|
|
||||||
unless ``plaintext`` is False. A side-effect of this
|
|
||||||
method is to coerce all HTML tags to lowercase in ``html``,
|
|
||||||
and strip enclosing <html> or <body> tags in content if
|
|
||||||
``plaintext`` is False.
|
|
||||||
|
|
||||||
:param str html: html tagsoup (doesn't have to be XHTML)
|
|
||||||
:param str content: extra content to append
|
|
||||||
:param bool plaintext: whether content is plaintext and should
|
|
||||||
be wrapped in a <pre/> tag.
|
|
||||||
"""
|
|
||||||
html = ustr(html)
|
|
||||||
if plaintext:
|
|
||||||
content = u'\n<pre>%s</pre>\n' % ustr(content)
|
|
||||||
else:
|
|
||||||
content = re.sub(r'(?i)(</?html.*>|</?body.*>|<!\W*DOCTYPE.*>)', '', content)
|
|
||||||
content = u'\n%s\n' % ustr(content)
|
|
||||||
# Force all tags to lowercase
|
|
||||||
html = re.sub(r'(</?)\W*(\w+)([ >])',
|
|
||||||
lambda m: '%s%s%s' % (m.group(1), m.group(2).lower(), m.group(3)), html)
|
|
||||||
insert_location = html.find('</body>')
|
|
||||||
if insert_location == -1:
|
|
||||||
insert_location = html.find('</html>')
|
|
||||||
if insert_location == -1:
|
|
||||||
return '%s%s' % (html, content)
|
|
||||||
return '%s%s%s' % (html[:insert_location], content, html[insert_location:])
|
|
||||||
|
|
Loading…
Reference in New Issue