From 8c8a9c4b92b7901d2871e3a50decca9689c0c96e Mon Sep 17 00:00:00 2001 From: Olivier Dony Date: Fri, 2 Jul 2010 15:44:45 +0200 Subject: [PATCH] [IMP] mail_gateway: another round of encoding handling improvements. WARNING: this version requires to use the updated openerp_mailgate.py script to benefit from all improvements bzr revid: odo@openerp.com-20100702134445-eoeu987j6smszk02 --- addons/mail_gateway/mail_gateway.py | 97 ++++++++++--------- .../openerp_mailgate/openerp_mailgate.py | 4 +- 2 files changed, 55 insertions(+), 46 deletions(-) diff --git a/addons/mail_gateway/mail_gateway.py b/addons/mail_gateway/mail_gateway.py index 087403ba460..e893e3648d5 100644 --- a/addons/mail_gateway/mail_gateway.py +++ b/addons/mail_gateway/mail_gateway.py @@ -29,6 +29,7 @@ import base64 import re from tools.translate import _ import logging +import xmlrpclib _logger = logging.getLogger('mailgate') @@ -173,21 +174,11 @@ class mailgate_tool(osv.osv_memory): _name = 'email.server.tools' _description = "Email Server Tools" - def _to_decode(self, s, charsets): - if not s: - return s - for charset in charsets: - if charset: - try: - return s.decode(charset) - except UnicodeError: - pass - return s.decode('latin1') - def _decode_header(self, text): + """Returns unicode() string conversion of the the given encoded smtp header""" if text: - text = decode_header(text.replace('\r', '')) - return ''.join(map(lambda x:self._to_decode(x[0], [x[1]]), text or [])) + text = decode_header(text.replace('\r', '')) + return ''.join([tools.ustr(x[0], x[1]) for x in text]) def to_email(self,text): return re.findall(r'([^ ,<@]+@[^> ,]+)',text) @@ -219,7 +210,7 @@ class mailgate_tool(osv.osv_memory): 'message_id': msg.get('message-id'), 'references': msg.get('references'), 'res_id': res_id, - 'user_id': uid, + 'user_id': uid, 'attachment_ids': [(6, 0, attach)] } msg_pool.create(cr, uid, msg_data, context=context) @@ -236,9 +227,10 @@ class mailgate_tool(osv.osv_memory): for res in model_pool.browse(cr, uid, res_ids, context=context): message_followers = model_pool.message_followers(cr, uid, [res.id])[res.id] message_followers_emails = self.to_email(','.join(message_followers)) - message_recipients = self.to_email(','.join([self._decode_header(msg['from']), + message_recipients = self.to_email(','.join(filter(None, + [self._decode_header(msg['from']), self._decode_header(msg['to']), - self._decode_header(msg['cc'])])) + self._decode_header(msg['cc'])]))) message_forward = [i for i in message_followers_emails if (i and (i not in message_recipients))] if message_forward: @@ -261,13 +253,22 @@ class mailgate_tool(osv.osv_memory): @param cr: the current row, from the database cursor, @param uid: the current user’s ID for security checks, @param model: OpenObject Model - @param message: Email details + @param message: Email details, passed as a string or an xmlrpclib.Binary @param attach: Email attachments @param context: A standard dictionary for contextual values""" - model_pool = self.pool.get(model) + + # extract message bytes, we are forced to pass the message as binary because + # we don't know its encoding until we parse its headers and hence can't + # convert it to utf-8 for transport between the mailgate script and here. + if isinstance(message, xmlrpclib.Binary): + message = str(message.data) + if not context: context = {} + + model_pool = self.pool.get(model) res_id = False + # Create New Record into particular model def create_record(msg): att_ids = [] @@ -301,14 +302,16 @@ class mailgate_tool(osv.osv_memory): # Warning: message_from_string doesn't always work correctly on unicode, # we must use utf-8 strings here :-( - msg_txt = email.message_from_string(tools.ustr(message).encode('utf-8')) - message_id = msg_txt.get('Message-ID', False) + if isinstance(message, unicode): + message = message.encode('utf-8') + msg_txt = email.message_from_string(message) + message_id = msg_txt.get('message-id', False) msg = {} if not message_id: # Very unusual situation, be we should be fault-tolerant here message_id = time.time() - msg_txt['Message-ID'] = message_id + msg_txt['message-id'] = message_id _logger.info('Message without message-id, generating a random one: %s', message_id) fields = msg_txt.keys() @@ -327,14 +330,14 @@ class mailgate_tool(osv.osv_memory): if 'Delivered-To' in fields: msg['to'] = self._decode_header(msg_txt.get('Delivered-To')) - if 'Cc' in fields: - msg['cc'] = self._decode_header(msg_txt.get('Cc')) + if 'CC' in fields: + msg['cc'] = self._decode_header(msg_txt.get('CC')) - if 'Reply-To' in fields: + if 'Reply-to' in fields: msg['reply'] = self._decode_header(msg_txt.get('Reply-To')) if 'Date' in fields: - msg['date'] = msg_txt.get('Date') + msg['date'] = self._decode_header(msg_txt.get('Date')) if 'Content-Transfer-Encoding' in fields: msg['encoding'] = msg_txt.get('Content-Transfer-Encoding') @@ -343,15 +346,15 @@ class mailgate_tool(osv.osv_memory): msg['references'] = msg_txt.get('References') if 'X-Priority' in fields: - msg['priority'] = msg_txt.get('X-priority', '3 (Normal)').split(' ')[0] + msg['priority'] = msg_txt.get('X-Priority', '3 (Normal)').split(' ')[0] - if not msg_txt.is_multipart() or 'text/plain' in msg.get('content-type', ''): + if not msg_txt.is_multipart() or 'text/plain' in msg.get('Content-Type', ''): encoding = msg_txt.get_content_charset() - msg['body'] = msg_txt.get_payload(decode=True) - if encoding: - msg['body'] = tools.ustr(msg['body']) + body = msg_txt.get_payload(decode=True) + msg['body'] = tools.ustr(body, encoding) attachments = {} + has_plain_text = False if msg_txt.is_multipart() or 'multipart/alternative' in msg.get('content-type', ''): body = "" for part in msg_txt.walk(): @@ -359,26 +362,28 @@ class mailgate_tool(osv.osv_memory): continue encoding = part.get_content_charset() - + filename = part.get_filename() if part.get_content_maintype()=='text': content = part.get_payload(decode=True) - filename = part.get_filename() - if filename : + if filename: attachments[filename] = content - else: - if encoding: - content = unicode(content, encoding) + elif not has_plain_text: + # main content parts should have 'text' maintype + # and no filename. we ignore the html part if + # there is already a plaintext part without filename, + # because presumably these are alternatives. + content = tools.ustr(content, encoding) if part.get_content_subtype() == 'html': body = tools.ustr(tools.html2plaintext(content)) elif part.get_content_subtype() == 'plain': body = content - elif part.get_content_maintype() in ('application', 'image', 'text'): - filename = part.get_filename(); + has_plain_text = True + elif part.get_content_maintype() in ('application', 'image'): if filename : attachments[filename] = part.get_payload(decode=True) else: res = part.get_payload(decode=True) - body += tools.ustr(res) + body += tools.ustr(res, encoding) msg['body'] = body msg['attachments'] = attachments @@ -402,20 +407,22 @@ class mailgate_tool(osv.osv_memory): res_id = res_id.group(1) if res_id: res_id = int(res_id) - res_ids.append(res_id) model_pool = self.pool.get(model) - - vals = {} - if hasattr(model_pool, 'message_update'): - model_pool.message_update(cr, uid, [res_id], vals, msg, context=context) + if model_pool.exists(cr, uid, res_id): + res_ids.append(res_id) + if hasattr(model_pool, 'message_update'): + model_pool.message_update(cr, uid, [res_id], {}, msg, context=context) + else: + raise NotImplementedError('model %s does not support updating records, mailgate API method message_update() is missing'%model) if not len(res_ids): new_res_id, attachment_ids = create_record(msg) res_ids = [new_res_id] + # Store messages context.update({'model' : model}) if hasattr(model_pool, '_history'): - model_pool._history(cr, uid, res_ids, _('Receive'), history=True, + model_pool._history(cr, uid, res_ids, _('receive'), history=True, subject = msg.get('subject'), email = msg.get('to'), details = msg.get('body'), diff --git a/addons/mail_gateway/scripts/openerp_mailgate/openerp_mailgate.py b/addons/mail_gateway/scripts/openerp_mailgate/openerp_mailgate.py index 166e1e8781f..affd03e03a0 100755 --- a/addons/mail_gateway/scripts/openerp_mailgate/openerp_mailgate.py +++ b/addons/mail_gateway/scripts/openerp_mailgate/openerp_mailgate.py @@ -51,7 +51,9 @@ class email_parser(object): def parse(self, message): try: - res_id = self.rpc('email.server.tools', 'process_email', self.model, message) + # pass message as bytes because we don't know its encoding until we parse its headers + # and hence can't convert it to utf-8 for transport + res_id = self.rpc('email.server.tools', 'process_email', self.model, xmlrpclib.Binary(message)) except Exception, e: logger = logging.getLogger('mail-gateway') logger.warning('Failed to process incoming email. Source of the failed mail is available at debug level.', exc_info=True)