[FIX] tools: html_email_clean: fixed regex for
signature that was buggy when having dots. Also fixed read more link addition. Added test case that triggered the error. bzr revid: tde@openerp.com-20131016103516-w44j6r5oaljpwvmx
This commit is contained in:
parent
54f740960e
commit
983d5eb9fa
|
@ -223,6 +223,13 @@ class TestCleaner(unittest2.TestCase):
|
|||
for ext in test_mail_examples.THUNDERBIRD_1_OUT:
|
||||
self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
|
||||
|
||||
def test_70_read_more(self):
|
||||
new_html = html_email_clean(test_mail_examples.BUG1, remove=True, shorten=True, max_length=100)
|
||||
for ext in test_mail_examples.BUG_1_IN:
|
||||
self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed valid content')
|
||||
for ext in test_mail_examples.BUG_1_OUT:
|
||||
self.assertNotIn(ext, new_html, 'html_email_cleaner did not removed invalid content')
|
||||
|
||||
def test_90_misc(self):
|
||||
# False boolean for text must return empty string
|
||||
new_html = html_email_clean(False)
|
||||
|
|
|
@ -637,3 +637,59 @@ MSOFFICE_3 = """<div>
|
|||
|
||||
MSOFFICE_3_IN = ['I saw your boss yesterday']
|
||||
MSOFFICE_3_OUT = ['I noticed you recently downloaded OpenERP.', 'You indicated that you wish', 'Belgium: +32.81.81.37.00']
|
||||
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# Test cases coming from bugs
|
||||
# ------------------------------------------------------------
|
||||
|
||||
# bug: read more not apparent, strange message in read more span
|
||||
BUG1 = """<pre>Hi Migration Team,
|
||||
|
||||
Paragraph 1, blah blah blah blah blah blah blah blah blah blah blah blah
|
||||
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||
blah blah blah blah blah blah blah blah.
|
||||
|
||||
Paragraph 2, blah blah blah blah blah blah blah blah blah blah blah blah
|
||||
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||
blah blah blah blah blah blah blah blah.
|
||||
|
||||
Paragraph 3, blah blah blah blah blah blah blah blah blah blah blah blah
|
||||
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||
blah blah blah blah blah blah blah blah blah blah blah blah blah blah
|
||||
blah blah blah blah blah blah blah blah.
|
||||
|
||||
Thanks.
|
||||
|
||||
Regards,
|
||||
|
||||
--
|
||||
Olivier Laurent
|
||||
Migration Manager
|
||||
OpenERP SA
|
||||
Chaussée de Namur, 40
|
||||
B-1367 Gérompont
|
||||
Tel: +32.81.81.37.00
|
||||
Web: http://www.openerp.com</pre>"""
|
||||
|
||||
BUG_1_IN = [
|
||||
'Hi Migration Team',
|
||||
'Paragraph 1'
|
||||
]
|
||||
BUG_1_OUT = [
|
||||
'Olivier Laurent',
|
||||
'Chaussée de Namur',
|
||||
'81.81.37.00',
|
||||
'openerp.com',
|
||||
]
|
||||
|
|
|
@ -206,7 +206,7 @@ def html_email_clean(html, remove=False, shorten=False, max_length=300):
|
|||
|
||||
# form node and tag text-based quotes and signature
|
||||
quote_tags = re.compile(r'(\n(>)+[^\n\r]*)')
|
||||
signature = re.compile(r'([-]{2,}[\s]?[\r\n]{1,2}[^.]+)')
|
||||
signature = re.compile(r'([-]{2,}[\s]?[\r\n]{1,2}[\s\S]+)')
|
||||
for node in root.getiterator():
|
||||
_tag_matching_regex_in_text(quote_tags, node, 'span', {'text_quote': '1'})
|
||||
_tag_matching_regex_in_text(signature, node, 'span', {'text_signature': '1'})
|
||||
|
@ -263,8 +263,7 @@ def html_email_clean(html, remove=False, shorten=False, max_length=300):
|
|||
# create outertext node
|
||||
new_node = _create_node('span', outertext[stop_idx:])
|
||||
# add newly created nodes in dom
|
||||
node.addnext(new_node)
|
||||
node.addnext(read_more_node)
|
||||
node.append(read_more_node)
|
||||
# tag node
|
||||
new_node.set('in_overlength', '1')
|
||||
|
||||
|
|
Loading…
Reference in New Issue