[FIX] tools: html_email_clean: fixed regex for
signature that was buggy when having dots. Also fixed read more link addition. Added test case that triggered the error. bzr revid: tde@openerp.com-20131016103516-w44j6r5oaljpwvmx
This commit is contained in:
parent
54f740960e
commit
983d5eb9fa
|
@ -223,6 +223,13 @@ class TestCleaner(unittest2.TestCase):
|
||||||
for ext in test_mail_examples.THUNDERBIRD_1_OUT:
|
for ext in test_mail_examples.THUNDERBIRD_1_OUT:
|
||||||
self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
|
self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
|
||||||
|
|
||||||
|
def test_70_read_more(self):
|
||||||
|
new_html = html_email_clean(test_mail_examples.BUG1, remove=True, shorten=True, max_length=100)
|
||||||
|
for ext in test_mail_examples.BUG_1_IN:
|
||||||
|
self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed valid content')
|
||||||
|
for ext in test_mail_examples.BUG_1_OUT:
|
||||||
|
self.assertNotIn(ext, new_html, 'html_email_cleaner did not removed invalid content')
|
||||||
|
|
||||||
def test_90_misc(self):
|
def test_90_misc(self):
|
||||||
# False boolean for text must return empty string
|
# False boolean for text must return empty string
|
||||||
new_html = html_email_clean(False)
|
new_html = html_email_clean(False)
|
||||||
|
|
|
@ -637,3 +637,59 @@ MSOFFICE_3 = """<div>
|
||||||
|
|
||||||
MSOFFICE_3_IN = ['I saw your boss yesterday']
|
MSOFFICE_3_IN = ['I saw your boss yesterday']
|
||||||
MSOFFICE_3_OUT = ['I noticed you recently downloaded OpenERP.', 'You indicated that you wish', 'Belgium: +32.81.81.37.00']
|
MSOFFICE_3_OUT = ['I noticed you recently downloaded OpenERP.', 'You indicated that you wish', 'Belgium: +32.81.81.37.00']
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------
|
||||||
|
# Test cases coming from bugs
|
||||||
|
# ------------------------------------------------------------
|
||||||
|
|
||||||
|
# bug: read more not apparent, strange message in read more span
|
||||||
|
BUG1 = """<pre>Hi Migration Team,
|
||||||
|
|
||||||
|
Paragraph 1, blah blah blah blah blah blah blah blah blah blah blah blah
|
||||||
|
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||||
|
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||||
|
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||||
|
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||||
|
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||||
|
blah blah blah blah blah blah blah blah.
|
||||||
|
|
||||||
|
Paragraph 2, blah blah blah blah blah blah blah blah blah blah blah blah
|
||||||
|
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||||
|
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||||
|
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||||
|
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||||
|
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||||
|
blah blah blah blah blah blah blah blah.
|
||||||
|
|
||||||
|
Paragraph 3, blah blah blah blah blah blah blah blah blah blah blah blah
|
||||||
|
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||||
|
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||||
|
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||||
|
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||||
|
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||||
|
blah blah blah blah blah blah blah blah.
|
||||||
|
|
||||||
|
Thanks.
|
||||||
|
|
||||||
|
Regards,
|
||||||
|
|
||||||
|
--
|
||||||
|
Olivier Laurent
|
||||||
|
Migration Manager
|
||||||
|
OpenERP SA
|
||||||
|
Chaussée de Namur, 40
|
||||||
|
B-1367 Gérompont
|
||||||
|
Tel: +32.81.81.37.00
|
||||||
|
Web: http://www.openerp.com</pre>"""
|
||||||
|
|
||||||
|
BUG_1_IN = [
|
||||||
|
'Hi Migration Team',
|
||||||
|
'Paragraph 1'
|
||||||
|
]
|
||||||
|
BUG_1_OUT = [
|
||||||
|
'Olivier Laurent',
|
||||||
|
'Chaussée de Namur',
|
||||||
|
'81.81.37.00',
|
||||||
|
'openerp.com',
|
||||||
|
]
|
||||||
|
|
|
@ -206,7 +206,7 @@ def html_email_clean(html, remove=False, shorten=False, max_length=300):
|
||||||
|
|
||||||
# form node and tag text-based quotes and signature
|
# form node and tag text-based quotes and signature
|
||||||
quote_tags = re.compile(r'(\n(>)+[^\n\r]*)')
|
quote_tags = re.compile(r'(\n(>)+[^\n\r]*)')
|
||||||
signature = re.compile(r'([-]{2,}[\s]?[\r\n]{1,2}[^.]+)')
|
signature = re.compile(r'([-]{2,}[\s]?[\r\n]{1,2}[\s\S]+)')
|
||||||
for node in root.getiterator():
|
for node in root.getiterator():
|
||||||
_tag_matching_regex_in_text(quote_tags, node, 'span', {'text_quote': '1'})
|
_tag_matching_regex_in_text(quote_tags, node, 'span', {'text_quote': '1'})
|
||||||
_tag_matching_regex_in_text(signature, node, 'span', {'text_signature': '1'})
|
_tag_matching_regex_in_text(signature, node, 'span', {'text_signature': '1'})
|
||||||
|
@ -263,8 +263,7 @@ def html_email_clean(html, remove=False, shorten=False, max_length=300):
|
||||||
# create outertext node
|
# create outertext node
|
||||||
new_node = _create_node('span', outertext[stop_idx:])
|
new_node = _create_node('span', outertext[stop_idx:])
|
||||||
# add newly created nodes in dom
|
# add newly created nodes in dom
|
||||||
node.addnext(new_node)
|
node.append(read_more_node)
|
||||||
node.addnext(read_more_node)
|
|
||||||
# tag node
|
# tag node
|
||||||
new_node.set('in_overlength', '1')
|
new_node.set('in_overlength', '1')
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue