summaryrefslogtreecommitdiffstats
path: root/src/corelib/codecs/qtextcodec.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/codecs/qtextcodec.cpp')
-rw-r--r--src/corelib/codecs/qtextcodec.cpp98
1 files changed, 58 insertions, 40 deletions
diff --git a/src/corelib/codecs/qtextcodec.cpp b/src/corelib/codecs/qtextcodec.cpp
index adb84a1856..602b97e32e 100644
--- a/src/corelib/codecs/qtextcodec.cpp
+++ b/src/corelib/codecs/qtextcodec.cpp
@@ -39,14 +39,14 @@
****************************************************************************/
#include "qplatformdefs.h"
+
#include "qtextcodec.h"
#include "qtextcodec_p.h"
-#ifndef QT_NO_TEXTCODEC
-
#include "qbytearraymatcher.h"
-#include "qlist.h"
+#include "qendian.h"
#include "qfile.h"
+#include "qlist.h"
#include "qstringlist.h"
#include "qvarlengtharray.h"
#if !defined(QT_BOOTSTRAPPED)
@@ -58,8 +58,10 @@
#include "qlatincodec_p.h"
#if !defined(QT_BOOTSTRAPPED)
+#if QT_CONFIG(codecs)
# include "qtsciicodec_p.h"
# include "qisciicodec_p.h"
+#endif
#if QT_CONFIG(icu)
#include "qicucodec_p.h"
#else
@@ -70,7 +72,7 @@
# include "qwindowscodec_p.h"
#endif
# include "qsimplecodec_p.h"
-#if !defined(QT_NO_BIG_CODECS)
+#if QT_CONFIG(big_codecs)
# ifndef Q_OS_INTEGRITY
# include "qgb18030codec_p.h"
# include "qeucjpcodec_p.h"
@@ -79,7 +81,7 @@
# include "qeuckrcodec_p.h"
# include "qbig5codec_p.h"
# endif // !Q_OS_INTEGRITY
-#endif // !QT_NO_BIG_CODECS
+#endif // big_codecs
#endif // icu
#endif // QT_BOOTSTRAPPED
@@ -269,14 +271,14 @@ static void setup()
return;
initialized = true;
-#if !defined(QT_NO_CODECS) && !defined(QT_BOOTSTRAPPED)
+#if QT_CONFIG(codecs) && !defined(QT_BOOTSTRAPPED)
(void)new QTsciiCodec;
for (int i = 0; i < 9; ++i)
(void)new QIsciiCodec(i);
for (int i = 0; i < QSimpleTextCodec::numSimpleCodecs; ++i)
(void)new QSimpleTextCodec(i);
-# if !defined(QT_NO_BIG_CODECS) && !defined(Q_OS_INTEGRITY)
+# if QT_CONFIG(big_codecs) && !defined(Q_OS_INTEGRITY)
(void)new QGb18030Codec;
(void)new QGbkCodec;
(void)new QGb2312Codec;
@@ -287,14 +289,14 @@ static void setup()
(void)new QCP949Codec;
(void)new QBig5Codec;
(void)new QBig5hkscsCodec;
-# endif // !QT_NO_BIG_CODECS && !Q_OS_INTEGRITY
+# endif // big_codecs && !Q_OS_INTEGRITY
#if QT_CONFIG(iconv)
(void) new QIconvCodec;
#endif
#if defined(Q_OS_WIN32)
(void) new QWindowsLocalCodec;
#endif // Q_OS_WIN32
-#endif // !QT_NO_CODECS && !QT_BOOTSTRAPPED
+#endif // codecs && !QT_BOOTSTRAPPED
(void)new QUtf16Codec;
(void)new QUtf16BECodec;
@@ -503,9 +505,9 @@ QTextCodec::~QTextCodec()
globalData->allCodecs.removeOne(this);
- auto it = globalData->codecCache.cbegin();
+ auto it = globalData->codecCache.begin();
- while (it != globalData->codecCache.cend()) {
+ while (it != globalData->codecCache.end()) {
if (it.value() == this)
it = globalData->codecCache.erase(it);
else
@@ -693,11 +695,9 @@ void QTextCodec::setCodecForLocale(QTextCodec *c)
\threadsafe
Returns a pointer to the codec most suitable for this locale.
- On Windows, the codec will be based on a system locale. On Unix
- systems, the codec will might fall back to using the \e iconv
- library if no builtin codec for the locale can be found.
-
- Note that in these cases the codec's name will be "System".
+ The codec will be retrieved from ICU where that backend is in use, otherwise
+ it may be obtained from an OS-specific API. In the latter case, the codec's
+ name may be "System".
*/
QTextCodec* QTextCodec::codecForLocale()
@@ -764,7 +764,7 @@ QList<QByteArray> QTextCodec::aliases() const
encoding of the subclass to Unicode, and returns the result in a
QString.
- \a state can be 0, in which case the conversion is stateless and
+ \a state can be \nullptr, in which case the conversion is stateless and
default conversion rules should be used. If state is not 0, the
codec should save the state after the conversion in \a state, and
adjust the \c remainingChars and \c invalidChars members of the struct.
@@ -780,7 +780,7 @@ QList<QByteArray> QTextCodec::aliases() const
from Unicode to the encoding of the subclass, and returns the result
in a QByteArray.
- \a state can be 0 in which case the conversion is stateless and
+ \a state can be \nullptr in which case the conversion is stateless and
default conversion rules should be used. If state is not 0, the
codec should save the state after the conversion in \a state, and
adjust the \c remainingChars and \c invalidChars members of the struct.
@@ -1061,7 +1061,7 @@ QString QTextDecoder::toUnicode(const char *chars, int len)
}
// in qstring.cpp:
-void qt_from_latin1(ushort *dst, const char *str, size_t size) Q_DECL_NOTHROW;
+void qt_from_latin1(ushort *dst, const char *str, size_t size) noexcept;
/*! \overload
@@ -1162,41 +1162,50 @@ QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba)
Tries to detect the encoding of the provided snippet \a ba by
using the BOM (Byte Order Mark) and returns a QTextCodec instance
- that is capable of decoding the text to unicode. If the codec
- cannot be detected from the content provided, \a defaultCodec is
- returned.
+ that is capable of decoding the text to unicode. This function can
+ detect one of the following codecs:
+
+ \list
+ \li UTF-32 Little Endian
+ \li UTF-32 Big Endian
+ \li UTF-16 Little Endian
+ \li UTF-16 Big Endian
+ \li UTF-8
+ \endlist
+
+ If the codec cannot be detected from the content provided, \a defaultCodec
+ is returned.
\sa codecForHtml()
*/
QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba, QTextCodec *defaultCodec)
{
const int arraySize = ba.size();
+ const uchar *buf = reinterpret_cast<const uchar *>(ba.constData());
+ const uint bom = 0xfeff;
if (arraySize > 3) {
- if ((uchar)ba[0] == 0x00
- && (uchar)ba[1] == 0x00
- && (uchar)ba[2] == 0xFE
- && (uchar)ba[3] == 0xFF)
+ uint uc = qFromUnaligned<uint>(buf);
+ if (uc == qToBigEndian(bom))
return QTextCodec::codecForMib(1018); // utf-32 be
- else if ((uchar)ba[0] == 0xFF
- && (uchar)ba[1] == 0xFE
- && (uchar)ba[2] == 0x00
- && (uchar)ba[3] == 0x00)
+ else if (uc == qToLittleEndian(bom))
return QTextCodec::codecForMib(1019); // utf-32 le
}
if (arraySize < 2)
return defaultCodec;
- if ((uchar)ba[0] == 0xfe && (uchar)ba[1] == 0xff)
+
+ ushort uc = qFromUnaligned<ushort>(buf);
+ if (uc == qToBigEndian(ushort(bom)))
return QTextCodec::codecForMib(1013); // utf16 be
- else if ((uchar)ba[0] == 0xff && (uchar)ba[1] == 0xfe)
+ else if (uc == qToLittleEndian(ushort(bom)))
return QTextCodec::codecForMib(1014); // utf16 le
if (arraySize < 3)
return defaultCodec;
- if ((uchar)ba[0] == 0xef
- && (uchar)ba[1] == 0xbb
- && (uchar)ba[2] == 0xbf)
+
+ static const char utf8bom[] = "\xef\xbb\xbf";
+ if (memcmp(buf, utf8bom, sizeof(utf8bom) - 1) == 0)
return QTextCodec::codecForMib(106); // utf-8
return defaultCodec;
@@ -1207,8 +1216,19 @@ QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba, QTextCodec *defaul
Tries to detect the encoding of the provided snippet \a ba by
using the BOM (Byte Order Mark) and returns a QTextCodec instance
- that is capable of decoding the text to unicode. If the codec
- cannot be detected, this overload returns a Latin-1 QTextCodec.
+ that is capable of decoding the text to unicode. This function can
+ detect one of the following codecs:
+
+ \list
+ \li UTF-32 Little Endian
+ \li UTF-32 Big Endian
+ \li UTF-16 Little Endian
+ \li UTF-16 Big Endian
+ \li UTF-8
+ \endlist
+
+ If the codec cannot be detected from the content provided, this overload
+ returns a Latin-1 QTextCodec.
\sa codecForHtml()
*/
@@ -1222,7 +1242,7 @@ QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba)
\obsolete
Returns the codec used by QObject::tr() on its argument. If this
- function returns 0 (the default), tr() assumes Latin-1.
+ function returns \nullptr (the default), tr() assumes Latin-1.
*/
/*!
@@ -1251,5 +1271,3 @@ bool QTextDecoder::needsMoreData() const
}
QT_END_NAMESPACE
-
-#endif // QT_NO_TEXTCODEC