From 7d3d3a6abafa4560ef7e1ad803a013252668ddaf Mon Sep 17 00:00:00 2001 From: Olivier Dony Date: Tue, 6 Sep 2011 19:31:40 +0200 Subject: [PATCH] [FIX] fields.function: type=binary: workaround for the low byte values (<=0x1f) unsupported in XML We have a workaround in place for fields.function of binary type that may return values that are invalid in XML documents, and thus in our XML-RPC protocol. But out workaround failed to care for the invalid XML codepoints (below 0x1f) that are well valid in UTF-8 encoding. Added a sanity check for that as well, using a terrible workaround for this last resort case: b64-encode the bytes, to avoid crashing the request. bzr revid: odo@openerp.com-20110906173140-vc4tl6wstzt8h06o --- openerp/osv/fields.py | 47 ++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/openerp/osv/fields.py b/openerp/osv/fields.py index fb973d2e521..7a8f460f5cb 100644 --- a/openerp/osv/fields.py +++ b/openerp/osv/fields.py @@ -32,7 +32,9 @@ * size """ +import base64 import datetime as DT +import re import string import sys import warnings @@ -727,34 +729,41 @@ def get_nice_size(value): size = len(value) return tools.human_size(size) +# See http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char +# and http://bugs.python.org/issue10066 +invalid_xml_low_bytes = re.compile(r'[\x00-\x08\x0b-\x0c\x0e-\x1f]') + def sanitize_binary_value(value): # binary fields should be 7-bit ASCII base64-encoded data, # but we do additional sanity checks to make sure the values - # are not something else that won't pass via xmlrpc + # are not something else that won't pass via XML-RPC if isinstance(value, (xmlrpclib.Binary, tuple, list, dict)): # these builtin types are meant to pass untouched return value - # For all other cases, handle the value as a binary string: - # it could be a 7-bit ASCII string (e.g base64 data), but also - # any 8-bit content from files, with byte values that cannot - # be passed inside XML! - # See for more info: + # Handle invalid bytes values that will cause problems + # for XML-RPC. See for more info: # - http://bugs.python.org/issue10066 # - http://www.w3.org/TR/2000/REC-xml-20001006#NT-Char - # - # One solution is to convert the byte-string to unicode, - # so it gets serialized as utf-8 encoded data (always valid XML) - # If invalid XML byte values were present, tools.ustr() uses - # the Latin-1 codec as fallback, which converts any 8-bit - # byte value, resulting in valid utf-8-encoded bytes - # in the end: - # >>> unicode('\xe1','latin1').encode('utf8') == '\xc3\xa1' - # Note: when this happens, decoding on the other endpoint - # is not likely to produce the expected output, but this is - # just a safety mechanism (in these cases base64 data or - # xmlrpc.Binary values should be used instead) - return tools.ustr(value) + + # Coercing to unicode would normally allow it to properly pass via + # XML-RPC, transparently encoded as UTF-8 by xmlrpclib. + # (this works for _any_ byte values, thanks to the fallback + # to latin-1 passthrough encoding when decoding to unicode) + value = tools.ustr(value) + + # Due to Python bug #10066 this could still yield invalid XML + # bytes, specifically in the low byte range, that will crash + # the decoding side: [\x00-\x08\x0b-\x0c\x0e-\x1f] + # So check for low bytes values, and if any, perform + # base64 encoding - not very smart or useful, but this is + # our last resort to avoid crashing the request. + if invalid_xml_low_bytes.search(value): + # b64-encode after restoring the pure bytes with latin-1 + # passthrough encoding + value = base64.b64encode(value.encode('latin-1')) + + return value # ---------------------------------------------------------