summaryrefslogtreecommitdiffstats
path: root/webapp/django/utils/encoding.py
diff options
context:
space:
mode:
Diffstat (limited to 'webapp/django/utils/encoding.py')
-rw-r--r--webapp/django/utils/encoding.py100
1 files changed, 100 insertions, 0 deletions
diff --git a/webapp/django/utils/encoding.py b/webapp/django/utils/encoding.py
new file mode 100644
index 0000000000..c54e67610f
--- /dev/null
+++ b/webapp/django/utils/encoding.py
@@ -0,0 +1,100 @@
+import types
+import urllib
+import datetime
+from django.utils.functional import Promise
+
+class DjangoUnicodeDecodeError(UnicodeDecodeError):
+ def __init__(self, obj, *args):
+ self.obj = obj
+ UnicodeDecodeError.__init__(self, *args)
+
+ def __str__(self):
+ original = UnicodeDecodeError.__str__(self)
+ return '%s. You passed in %r (%s)' % (original, self.obj,
+ type(self.obj))
+
+class StrAndUnicode(object):
+ """
+ A class whose __str__ returns its __unicode__ as a UTF-8 bytestring.
+
+ Useful as a mix-in.
+ """
+ def __str__(self):
+ return self.__unicode__().encode('utf-8')
+
+def smart_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
+ """
+ Returns a unicode object representing 's'. Treats bytestrings using the
+ 'encoding' codec.
+
+ If strings_only is True, don't convert (some) non-string-like objects.
+ """
+ if isinstance(s, Promise):
+ # The input is the result of a gettext_lazy() call.
+ return s
+ return force_unicode(s, encoding, strings_only, errors)
+
+def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
+ """
+ Similar to smart_unicode, except that lazy instances are resolved to
+ strings, rather than kept as lazy objects.
+
+ If strings_only is True, don't convert (some) non-string-like objects.
+ """
+ if strings_only and isinstance(s, (types.NoneType, int, long, datetime.datetime, datetime.date, datetime.time, float)):
+ return s
+ try:
+ if not isinstance(s, basestring,):
+ if hasattr(s, '__unicode__'):
+ s = unicode(s)
+ else:
+ s = unicode(str(s), encoding, errors)
+ elif not isinstance(s, unicode):
+ # Note: We use .decode() here, instead of unicode(s, encoding,
+ # errors), so that if s is a SafeString, it ends up being a
+ # SafeUnicode at the end.
+ s = s.decode(encoding, errors)
+ except UnicodeDecodeError, e:
+ raise DjangoUnicodeDecodeError(s, *e.args)
+ return s
+
+def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'):
+ """
+ Returns a bytestring version of 's', encoded as specified in 'encoding'.
+
+ If strings_only is True, don't convert (some) non-string-like objects.
+ """
+ if strings_only and isinstance(s, (types.NoneType, int)):
+ return s
+ if isinstance(s, Promise):
+ return unicode(s).encode(encoding, errors)
+ elif not isinstance(s, basestring):
+ try:
+ return str(s)
+ except UnicodeEncodeError:
+ return unicode(s).encode(encoding, errors)
+ elif isinstance(s, unicode):
+ return s.encode(encoding, errors)
+ elif s and encoding != 'utf-8':
+ return s.decode('utf-8', errors).encode(encoding, errors)
+ else:
+ return s
+
+def iri_to_uri(iri):
+ """
+ Convert an Internationalized Resource Identifier (IRI) portion to a URI
+ portion that is suitable for inclusion in a URL.
+
+ This is the algorithm from section 3.1 of RFC 3987. However, since we are
+ assuming input is either UTF-8 or unicode already, we can simplify things a
+ little from the full method.
+
+ Returns an ASCII string containing the encoded result.
+ """
+ # The list of safe characters here is constructed from the printable ASCII
+ # characters that are not explicitly excluded by the list at the end of
+ # section 3.1 of RFC 3987.
+ if iri is None:
+ return iri
+ return urllib.quote(smart_str(iri), safe='/#%[]=:;$&()+,!?*')
+