[IMP] tools.ustr: allow specific error-handling flags a la `unicode`
Also cleaned up docstrings bzr revid: odo@openerp.com-20120316144002-og07it5u7x2azhu4
This commit is contained in:
parent
4af38cc64a
commit
7f8965a00a
|
@ -121,17 +121,22 @@ def get_encodings(hint_encoding='utf-8'):
|
||||||
if prefenc:
|
if prefenc:
|
||||||
yield prefenc
|
yield prefenc
|
||||||
|
|
||||||
def ustr(value, hint_encoding='utf-8'):
|
def ustr(value, hint_encoding='utf-8', errors='strict'):
|
||||||
"""This method is similar to the builtin `str` method, except
|
"""This method is similar to the builtin `unicode`, except
|
||||||
it will return unicode() string.
|
that it may try multiple encodings to find one that works
|
||||||
|
for decoding `value`, and defaults to 'utf-8' first.
|
||||||
|
|
||||||
@param value: the value to convert
|
:param: value: the value to convert
|
||||||
@param hint_encoding: an optional encoding that was detected
|
:param: hint_encoding: an optional encoding that was detecte
|
||||||
upstream and should be tried first to
|
upstream and should be tried first to decode ``value``.
|
||||||
decode ``value``.
|
:param str error: optional `errors` flag to pass to the unicode
|
||||||
|
built-in to indicate how illegal character values should be
|
||||||
@rtype: unicode
|
treated: 'strict', 'ignore' or 'replace'. Passing anything
|
||||||
@return: unicode string
|
other than 'strict' means that the first encoding tried will
|
||||||
|
succeed, even if it's not the correct one to use, so be
|
||||||
|
careful!
|
||||||
|
:rtype: unicode
|
||||||
|
:raise: UnicodeError if value cannot be coerced to unicode
|
||||||
"""
|
"""
|
||||||
if isinstance(value, Exception):
|
if isinstance(value, Exception):
|
||||||
return exception_to_unicode(value)
|
return exception_to_unicode(value)
|
||||||
|
@ -141,25 +146,25 @@ def ustr(value, hint_encoding='utf-8'):
|
||||||
|
|
||||||
if not isinstance(value, basestring):
|
if not isinstance(value, basestring):
|
||||||
try:
|
try:
|
||||||
return unicode(value)
|
return unicode(value, errors=errors)
|
||||||
except Exception:
|
except Exception:
|
||||||
raise UnicodeError('unable to convert %r' % (value,))
|
raise UnicodeError('unable to convert %r' % (value,))
|
||||||
|
|
||||||
for ln in get_encodings(hint_encoding):
|
for ln in get_encodings(hint_encoding):
|
||||||
try:
|
try:
|
||||||
return unicode(value, ln)
|
return unicode(value, ln, errors=errors)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
raise UnicodeError('unable to convert %r' % (value,))
|
raise UnicodeError('unable to convert %r' % (value,))
|
||||||
|
|
||||||
|
|
||||||
def exception_to_unicode(e):
|
def exception_to_unicode(e, errors='strict'):
|
||||||
if (sys.version_info[:2] < (2,6)) and hasattr(e, 'message'):
|
if (sys.version_info[:2] < (2,6)) and hasattr(e, 'message'):
|
||||||
return ustr(e.message)
|
return ustr(e.message)
|
||||||
if hasattr(e, 'args'):
|
if hasattr(e, 'args'):
|
||||||
return "\n".join((ustr(a) for a in e.args))
|
return "\n".join((ustr(a, errors=errors) for a in e.args))
|
||||||
try:
|
try:
|
||||||
return unicode(e)
|
return unicode(e, errors=errors)
|
||||||
except Exception:
|
except Exception:
|
||||||
return u"Unknown message"
|
return u"Unknown message"
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue