[IMP] mail: parsing emails with several html parts
If an email contains several text/html parts inside a multipart email, the previous code was only keeping the last content part. The Content-Type: multipart/mixed allows several independent part (RFC1341 7.2.2), so two html is technically valid. With this patch, the two parts are concatenated. (opw 614755) Modify append_content_to_html regex to make sure the regex keeps the content of the html instead of removing it. e.g.: "123 <html> 456 </html> 789" used to be stripped to "123 789" while we expect "123 456 789"
This commit is contained in:
parent
03df412faf
commit
bd52298073
|
@ -801,9 +801,13 @@ class mail_thread(osv.AbstractModel):
|
||||||
body = tools.append_content_to_html(u'', body, preserve=True)
|
body = tools.append_content_to_html(u'', body, preserve=True)
|
||||||
else:
|
else:
|
||||||
alternative = False
|
alternative = False
|
||||||
|
mixed = False
|
||||||
|
html = u''
|
||||||
for part in message.walk():
|
for part in message.walk():
|
||||||
if part.get_content_type() == 'multipart/alternative':
|
if part.get_content_type() == 'multipart/alternative':
|
||||||
alternative = True
|
alternative = True
|
||||||
|
if part.get_content_type() == 'multipart/mixed':
|
||||||
|
mixed = True
|
||||||
if part.get_content_maintype() == 'multipart':
|
if part.get_content_maintype() == 'multipart':
|
||||||
continue # skip container
|
continue # skip container
|
||||||
# part.get_filename returns decoded value if able to decode, coded otherwise.
|
# part.get_filename returns decoded value if able to decode, coded otherwise.
|
||||||
|
@ -830,8 +834,11 @@ class mail_thread(osv.AbstractModel):
|
||||||
encoding, errors='replace'), preserve=True)
|
encoding, errors='replace'), preserve=True)
|
||||||
# 3) text/html -> raw
|
# 3) text/html -> raw
|
||||||
elif part.get_content_type() == 'text/html':
|
elif part.get_content_type() == 'text/html':
|
||||||
|
# mutlipart/alternative have one text and a html part, keep only the second
|
||||||
|
# mixed allows several html parts, append html content
|
||||||
|
append_content = not alternative or (html and mixed)
|
||||||
html = tools.ustr(part.get_payload(decode=True), encoding, errors='replace')
|
html = tools.ustr(part.get_payload(decode=True), encoding, errors='replace')
|
||||||
if alternative:
|
if not append_content:
|
||||||
body = html
|
body = html
|
||||||
else:
|
else:
|
||||||
body = tools.append_content_to_html(body, html, plaintext=False)
|
body = tools.append_content_to_html(body, html, plaintext=False)
|
||||||
|
|
|
@ -141,6 +141,53 @@ X-Attachment-Id: f_hkpb27k00
|
||||||
dGVzdAo=
|
dGVzdAo=
|
||||||
--089e01536c4ed4d17204e49b8e96--"""
|
--089e01536c4ed4d17204e49b8e96--"""
|
||||||
|
|
||||||
|
MAIL_MULTIPART_MIXED_TWO = """X-Original-To: raoul@grosbedon.fr
|
||||||
|
Delivered-To: raoul@grosbedon.fr
|
||||||
|
Received: by mail1.grosbedon.com (Postfix, from userid 10002)
|
||||||
|
id E8166BFACA; Fri, 23 Aug 2013 13:18:01 +0200 (CEST)
|
||||||
|
From: "Bruce Wayne" <bruce@wayneenterprises.com>
|
||||||
|
Content-Type: multipart/alternative;
|
||||||
|
boundary="Apple-Mail=_9331E12B-8BD2-4EC7-B53E-01F3FBEC9227"
|
||||||
|
Message-Id: <6BB1FAB2-2104-438E-9447-07AE2C8C4A92@sexample.com>
|
||||||
|
Mime-Version: 1.0 (Mac OS X Mail 7.3 \(1878.6\))
|
||||||
|
|
||||||
|
--Apple-Mail=_9331E12B-8BD2-4EC7-B53E-01F3FBEC9227
|
||||||
|
Content-Transfer-Encoding: 7bit
|
||||||
|
Content-Type: text/plain;
|
||||||
|
charset=us-ascii
|
||||||
|
|
||||||
|
First and second part
|
||||||
|
|
||||||
|
--Apple-Mail=_9331E12B-8BD2-4EC7-B53E-01F3FBEC9227
|
||||||
|
Content-Type: multipart/mixed;
|
||||||
|
boundary="Apple-Mail=_CA6C687E-6AA0-411E-B0FE-F0ABB4CFED1F"
|
||||||
|
|
||||||
|
--Apple-Mail=_CA6C687E-6AA0-411E-B0FE-F0ABB4CFED1F
|
||||||
|
Content-Transfer-Encoding: 7bit
|
||||||
|
Content-Type: text/html;
|
||||||
|
charset=us-ascii
|
||||||
|
|
||||||
|
<html><head></head><body>First part</body></html>
|
||||||
|
|
||||||
|
--Apple-Mail=_CA6C687E-6AA0-411E-B0FE-F0ABB4CFED1F
|
||||||
|
Content-Disposition: inline;
|
||||||
|
filename=thetruth.pdf
|
||||||
|
Content-Type: application/pdf;
|
||||||
|
name="thetruth.pdf"
|
||||||
|
Content-Transfer-Encoding: base64
|
||||||
|
|
||||||
|
SSBhbSB0aGUgQmF0TWFuCg==
|
||||||
|
|
||||||
|
--Apple-Mail=_CA6C687E-6AA0-411E-B0FE-F0ABB4CFED1F
|
||||||
|
Content-Transfer-Encoding: 7bit
|
||||||
|
Content-Type: text/html;
|
||||||
|
charset=us-ascii
|
||||||
|
|
||||||
|
<html><head></head><body>Second part</body></html>
|
||||||
|
--Apple-Mail=_CA6C687E-6AA0-411E-B0FE-F0ABB4CFED1F--
|
||||||
|
|
||||||
|
--Apple-Mail=_9331E12B-8BD2-4EC7-B53E-01F3FBEC9227--
|
||||||
|
"""
|
||||||
|
|
||||||
class TestMailgateway(TestMailBase):
|
class TestMailgateway(TestMailBase):
|
||||||
|
|
||||||
|
@ -202,6 +249,14 @@ class TestMailgateway(TestMailBase):
|
||||||
self.assertIn('<div dir="ltr">Should create a multipart/mixed: from gmail, <b>bold</b>, with attachment.<br clear="all"><div><br></div>', res.get('body', ''),
|
self.assertIn('<div dir="ltr">Should create a multipart/mixed: from gmail, <b>bold</b>, with attachment.<br clear="all"><div><br></div>', res.get('body', ''),
|
||||||
'message_parse: html version should be in body after parsing multipart/mixed')
|
'message_parse: html version should be in body after parsing multipart/mixed')
|
||||||
|
|
||||||
|
res = self.mail_thread.message_parse(cr, uid, MAIL_MULTIPART_MIXED_TWO)
|
||||||
|
self.assertNotIn('First and second part', res.get('body', ''),
|
||||||
|
'message_parse: text version should not be in body after parsing multipart/mixed')
|
||||||
|
self.assertIn('First part', res.get('body', ''),
|
||||||
|
'message_parse: first part of the html version should be in body after parsing multipart/mixed')
|
||||||
|
self.assertIn('Second part', res.get('body', ''),
|
||||||
|
'message_parse: second part of the html version should be in body after parsing multipart/mixed')
|
||||||
|
|
||||||
def test_10_message_process(self):
|
def test_10_message_process(self):
|
||||||
""" Testing incoming emails processing. """
|
""" Testing incoming emails processing. """
|
||||||
cr, uid, user_raoul = self.cr, self.uid, self.user_raoul
|
cr, uid, user_raoul = self.cr, self.uid, self.user_raoul
|
||||||
|
|
|
@ -282,7 +282,7 @@ def append_content_to_html(html, content, plaintext=True, preserve=False, contai
|
||||||
elif plaintext:
|
elif plaintext:
|
||||||
content = '\n%s\n' % plaintext2html(content, container_tag)
|
content = '\n%s\n' % plaintext2html(content, container_tag)
|
||||||
else:
|
else:
|
||||||
content = re.sub(r'(?i)(</?html.*>|</?body.*>|<!\W*DOCTYPE.*>)', '', content)
|
content = re.sub(r'(?i)(</?(?:html|body|head|!\s*DOCTYPE)[^>]*>)', '', content)
|
||||||
content = u'\n%s\n' % ustr(content)
|
content = u'\n%s\n' % ustr(content)
|
||||||
# Force all tags to lowercase
|
# Force all tags to lowercase
|
||||||
html = re.sub(r'(</?)\W*(\w+)([ >])',
|
html = re.sub(r'(</?)\W*(\w+)([ >])',
|
||||||
|
|
Loading…
Reference in New Issue