[IMP] tools.ustr: allow specific error-handling flags a la `unicode`

Also cleaned up docstrings

bzr revid: odo@openerp.com-20120316144002-og07it5u7x2azhu4
This commit is contained in:
Olivier Dony 2012-03-16 15:40:02 +01:00
parent 4af38cc64a
commit 7f8965a00a
1 changed files with 20 additions and 15 deletions

View File

@ -121,17 +121,22 @@ def get_encodings(hint_encoding='utf-8'):
if prefenc: if prefenc:
yield prefenc yield prefenc
def ustr(value, hint_encoding='utf-8'): def ustr(value, hint_encoding='utf-8', errors='strict'):
"""This method is similar to the builtin `str` method, except """This method is similar to the builtin `unicode`, except
it will return unicode() string. that it may try multiple encodings to find one that works
for decoding `value`, and defaults to 'utf-8' first.
@param value: the value to convert :param: value: the value to convert
@param hint_encoding: an optional encoding that was detected :param: hint_encoding: an optional encoding that was detecte
upstream and should be tried first to upstream and should be tried first to decode ``value``.
decode ``value``. :param str error: optional `errors` flag to pass to the unicode
built-in to indicate how illegal character values should be
@rtype: unicode treated: 'strict', 'ignore' or 'replace'. Passing anything
@return: unicode string other than 'strict' means that the first encoding tried will
succeed, even if it's not the correct one to use, so be
careful!
:rtype: unicode
:raise: UnicodeError if value cannot be coerced to unicode
""" """
if isinstance(value, Exception): if isinstance(value, Exception):
return exception_to_unicode(value) return exception_to_unicode(value)
@ -141,25 +146,25 @@ def ustr(value, hint_encoding='utf-8'):
if not isinstance(value, basestring): if not isinstance(value, basestring):
try: try:
return unicode(value) return unicode(value, errors=errors)
except Exception: except Exception:
raise UnicodeError('unable to convert %r' % (value,)) raise UnicodeError('unable to convert %r' % (value,))
for ln in get_encodings(hint_encoding): for ln in get_encodings(hint_encoding):
try: try:
return unicode(value, ln) return unicode(value, ln, errors=errors)
except Exception: except Exception:
pass pass
raise UnicodeError('unable to convert %r' % (value,)) raise UnicodeError('unable to convert %r' % (value,))
def exception_to_unicode(e): def exception_to_unicode(e, errors='strict'):
if (sys.version_info[:2] < (2,6)) and hasattr(e, 'message'): if (sys.version_info[:2] < (2,6)) and hasattr(e, 'message'):
return ustr(e.message) return ustr(e.message)
if hasattr(e, 'args'): if hasattr(e, 'args'):
return "\n".join((ustr(a) for a in e.args)) return "\n".join((ustr(a, errors=errors) for a in e.args))
try: try:
return unicode(e) return unicode(e, errors=errors)
except Exception: except Exception:
return u"Unknown message" return u"Unknown message"