1 files changed, 41 insertions, 23 deletions
diff --git a/src/corelib/codecs/qtextcodec.cpp b/src/corelib/codecs/qtextcodec.cpp
index fedd39e104..466c575c3e 100644
--- a/src/corelib/codecs/qtextcodec.cpp
+++ b/src/corelib/codecs/qtextcodec.cpp
@@ -39,14 +39,14 @@
 ****************************************************************************/
 
 #include "qplatformdefs.h"
+
 #include "qtextcodec.h"
 #include "qtextcodec_p.h"
 
-#ifndef QT_NO_TEXTCODEC
-
 #include "qbytearraymatcher.h"
-#include "qlist.h"
+#include "qendian.h"
 #include "qfile.h"
+#include "qlist.h"
 #include "qstringlist.h"
 #include "qvarlengtharray.h"
 #if !defined(QT_BOOTSTRAPPED)
@@ -1164,41 +1164,50 @@ QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba)
 
     Tries to detect the encoding of the provided snippet \a ba by
     using the BOM (Byte Order Mark) and returns a QTextCodec instance
-    that is capable of decoding the text to unicode. If the codec
-    cannot be detected from the content provided, \a defaultCodec is
-    returned.
+    that is capable of decoding the text to unicode. This function can
+    detect one of the following codecs:
+
+    \list
+      \li UTF-32 Little Endian
+      \li UTF-32 Big Endian
+      \li UTF-16 Little Endian
+      \li UTF-16 Big Endian
+      \li UTF-8
+    \endlist
+
+    If the codec cannot be detected from the content provided, \a defaultCodec
+    is returned.
 
     \sa codecForHtml()
 */
 QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba, QTextCodec *defaultCodec)
 {
     const int arraySize = ba.size();
+    const uchar *buf = reinterpret_cast<const uchar *>(ba.constData());
+    const uint bom = 0xfeff;
 
     if (arraySize > 3) {
-        if ((uchar)ba[0] == 0x00
-            && (uchar)ba[1] == 0x00
-            && (uchar)ba[2] == 0xFE
-            && (uchar)ba[3] == 0xFF)
+        uint uc = qFromUnaligned<uint>(buf);
+        if (uc == qToBigEndian(bom))
             return QTextCodec::codecForMib(1018); // utf-32 be
-        else if ((uchar)ba[0] == 0xFF
-                 && (uchar)ba[1] == 0xFE
-                 && (uchar)ba[2] == 0x00
-                 && (uchar)ba[3] == 0x00)
+        else if (uc == qToLittleEndian(bom))
             return QTextCodec::codecForMib(1019); // utf-32 le
     }
 
     if (arraySize < 2)
         return defaultCodec;
-    if ((uchar)ba[0] == 0xfe && (uchar)ba[1] == 0xff)
+
+    ushort uc = qFromUnaligned<ushort>(buf);
+    if (uc == qToBigEndian(ushort(bom)))
         return QTextCodec::codecForMib(1013); // utf16 be
-    else if ((uchar)ba[0] == 0xff && (uchar)ba[1] == 0xfe)
+    else if (uc == qToLittleEndian(ushort(bom)))
         return QTextCodec::codecForMib(1014); // utf16 le
 
     if (arraySize < 3)
         return defaultCodec;
-    if ((uchar)ba[0] == 0xef
-        && (uchar)ba[1] == 0xbb
-        && (uchar)ba[2] == 0xbf)
+
+    static const char utf8bom[] = "\xef\xbb\xbf";
+    if (memcmp(buf, utf8bom, sizeof(utf8bom) - 1) == 0)
         return QTextCodec::codecForMib(106); // utf-8
 
     return defaultCodec;
@@ -1209,8 +1218,19 @@ QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba, QTextCodec *defaul
 
     Tries to detect the encoding of the provided snippet \a ba by
     using the BOM (Byte Order Mark) and returns a QTextCodec instance
-    that is capable of decoding the text to unicode. If the codec
-    cannot be detected, this overload returns a Latin-1 QTextCodec.
+    that is capable of decoding the text to unicode. This function can
+    detect one of the following codecs:
+
+    \list
+      \li UTF-32 Little Endian
+      \li UTF-32 Big Endian
+      \li UTF-16 Little Endian
+      \li UTF-16 Big Endian
+      \li UTF-8
+    \endlist
+
+    If the codec cannot be detected from the content provided, this overload
+    returns a Latin-1 QTextCodec.
 
     \sa codecForHtml()
 */
@@ -1253,5 +1273,3 @@ bool QTextDecoder::needsMoreData() const
 }
 
 QT_END_NAMESPACE
-
-#endif // QT_NO_TEXTCODEC