From bd52298073f054ae817990a840d570b23c1e1a25 Mon Sep 17 00:00:00 2001 From: Martin Trigaux Date: Thu, 9 Oct 2014 09:14:22 +0200 Subject: [PATCH] [IMP] mail: parsing emails with several html parts If an email contains several text/html parts inside a multipart email, the previous code was only keeping the last content part. The Content-Type: multipart/mixed allows several independent part (RFC1341 7.2.2), so two html is technically valid. With this patch, the two parts are concatenated. (opw 614755) Modify append_content_to_html regex to make sure the regex keeps the content of the html instead of removing it. e.g.: "123 456 789" used to be stripped to "123 789" while we expect "123 456 789" --- addons/mail/mail_thread.py | 9 ++++- addons/mail/tests/test_mail_gateway.py | 55 ++++++++++++++++++++++++++ openerp/tools/mail.py | 2 +- 3 files changed, 64 insertions(+), 2 deletions(-) diff --git a/addons/mail/mail_thread.py b/addons/mail/mail_thread.py index 63cb3a845a1..ee039533af6 100644 --- a/addons/mail/mail_thread.py +++ b/addons/mail/mail_thread.py @@ -801,9 +801,13 @@ class mail_thread(osv.AbstractModel): body = tools.append_content_to_html(u'', body, preserve=True) else: alternative = False + mixed = False + html = u'' for part in message.walk(): if part.get_content_type() == 'multipart/alternative': alternative = True + if part.get_content_type() == 'multipart/mixed': + mixed = True if part.get_content_maintype() == 'multipart': continue # skip container # part.get_filename returns decoded value if able to decode, coded otherwise. @@ -830,8 +834,11 @@ class mail_thread(osv.AbstractModel): encoding, errors='replace'), preserve=True) # 3) text/html -> raw elif part.get_content_type() == 'text/html': + # mutlipart/alternative have one text and a html part, keep only the second + # mixed allows several html parts, append html content + append_content = not alternative or (html and mixed) html = tools.ustr(part.get_payload(decode=True), encoding, errors='replace') - if alternative: + if not append_content: body = html else: body = tools.append_content_to_html(body, html, plaintext=False) diff --git a/addons/mail/tests/test_mail_gateway.py b/addons/mail/tests/test_mail_gateway.py index bc2f207464c..c952c6ab076 100644 --- a/addons/mail/tests/test_mail_gateway.py +++ b/addons/mail/tests/test_mail_gateway.py @@ -141,6 +141,53 @@ X-Attachment-Id: f_hkpb27k00 dGVzdAo= --089e01536c4ed4d17204e49b8e96--""" +MAIL_MULTIPART_MIXED_TWO = """X-Original-To: raoul@grosbedon.fr +Delivered-To: raoul@grosbedon.fr +Received: by mail1.grosbedon.com (Postfix, from userid 10002) + id E8166BFACA; Fri, 23 Aug 2013 13:18:01 +0200 (CEST) +From: "Bruce Wayne" +Content-Type: multipart/alternative; + boundary="Apple-Mail=_9331E12B-8BD2-4EC7-B53E-01F3FBEC9227" +Message-Id: <6BB1FAB2-2104-438E-9447-07AE2C8C4A92@sexample.com> +Mime-Version: 1.0 (Mac OS X Mail 7.3 \(1878.6\)) + +--Apple-Mail=_9331E12B-8BD2-4EC7-B53E-01F3FBEC9227 +Content-Transfer-Encoding: 7bit +Content-Type: text/plain; + charset=us-ascii + +First and second part + +--Apple-Mail=_9331E12B-8BD2-4EC7-B53E-01F3FBEC9227 +Content-Type: multipart/mixed; + boundary="Apple-Mail=_CA6C687E-6AA0-411E-B0FE-F0ABB4CFED1F" + +--Apple-Mail=_CA6C687E-6AA0-411E-B0FE-F0ABB4CFED1F +Content-Transfer-Encoding: 7bit +Content-Type: text/html; + charset=us-ascii + +First part + +--Apple-Mail=_CA6C687E-6AA0-411E-B0FE-F0ABB4CFED1F +Content-Disposition: inline; + filename=thetruth.pdf +Content-Type: application/pdf; + name="thetruth.pdf" +Content-Transfer-Encoding: base64 + +SSBhbSB0aGUgQmF0TWFuCg== + +--Apple-Mail=_CA6C687E-6AA0-411E-B0FE-F0ABB4CFED1F +Content-Transfer-Encoding: 7bit +Content-Type: text/html; + charset=us-ascii + +Second part +--Apple-Mail=_CA6C687E-6AA0-411E-B0FE-F0ABB4CFED1F-- + +--Apple-Mail=_9331E12B-8BD2-4EC7-B53E-01F3FBEC9227-- +""" class TestMailgateway(TestMailBase): @@ -202,6 +249,14 @@ class TestMailgateway(TestMailBase): self.assertIn('
Should create a multipart/mixed: from gmail, bold, with attachment.

', res.get('body', ''), 'message_parse: html version should be in body after parsing multipart/mixed') + res = self.mail_thread.message_parse(cr, uid, MAIL_MULTIPART_MIXED_TWO) + self.assertNotIn('First and second part', res.get('body', ''), + 'message_parse: text version should not be in body after parsing multipart/mixed') + self.assertIn('First part', res.get('body', ''), + 'message_parse: first part of the html version should be in body after parsing multipart/mixed') + self.assertIn('Second part', res.get('body', ''), + 'message_parse: second part of the html version should be in body after parsing multipart/mixed') + def test_10_message_process(self): """ Testing incoming emails processing. """ cr, uid, user_raoul = self.cr, self.uid, self.user_raoul diff --git a/openerp/tools/mail.py b/openerp/tools/mail.py index 49ec5849b18..b0212db11ae 100644 --- a/openerp/tools/mail.py +++ b/openerp/tools/mail.py @@ -282,7 +282,7 @@ def append_content_to_html(html, content, plaintext=True, preserve=False, contai elif plaintext: content = '\n%s\n' % plaintext2html(content, container_tag) else: - content = re.sub(r'(?i)(||)', '', content) + content = re.sub(r'(?i)(]*>)', '', content) content = u'\n%s\n' % ustr(content) # Force all tags to lowercase html = re.sub(r'(])',