summaryrefslogtreecommitdiffstats
path: root/src/corelib/codecs
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/codecs')
-rw-r--r--src/corelib/codecs/qeucjpcodec_p.h10
-rw-r--r--src/corelib/codecs/qeuckrcodec_p.h20
-rw-r--r--src/corelib/codecs/qgb18030codec_p.h28
-rw-r--r--src/corelib/codecs/qiconvcodec_p.h8
-rw-r--r--src/corelib/codecs/qjiscodec_p.h10
-rw-r--r--src/corelib/codecs/qjpunicode.cpp54
-rw-r--r--src/corelib/codecs/qsjiscodec_p.h10
-rw-r--r--src/corelib/codecs/qt_attribution.json7
-rw-r--r--src/corelib/codecs/qtextcodec.cpp93
-rw-r--r--src/corelib/codecs/qtextcodec.h3
-rw-r--r--src/corelib/codecs/qtextcodec_p.h2
-rw-r--r--src/corelib/codecs/qutfcodec.cpp108
-rw-r--r--src/corelib/codecs/qutfcodec_p.h4
-rw-r--r--src/corelib/codecs/qwindowscodec_p.h8
14 files changed, 238 insertions, 127 deletions
diff --git a/src/corelib/codecs/qeucjpcodec_p.h b/src/corelib/codecs/qeucjpcodec_p.h
index 535e830ad0..d79e2435e3 100644
--- a/src/corelib/codecs/qeucjpcodec_p.h
+++ b/src/corelib/codecs/qeucjpcodec_p.h
@@ -94,12 +94,12 @@ public:
static QList<QByteArray> _aliases() { return QList<QByteArray>(); }
static int _mibEnum();
- QByteArray name() const { return _name(); }
- QList<QByteArray> aliases() const { return _aliases(); }
- int mibEnum() const { return _mibEnum(); }
+ QByteArray name() const override { return _name(); }
+ QList<QByteArray> aliases() const override { return _aliases(); }
+ int mibEnum() const override { return _mibEnum(); }
- QString convertToUnicode(const char *, int, ConverterState *) const;
- QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const;
+ QString convertToUnicode(const char *, int, ConverterState *) const override;
+ QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const override;
QEucJpCodec();
~QEucJpCodec();
diff --git a/src/corelib/codecs/qeuckrcodec_p.h b/src/corelib/codecs/qeuckrcodec_p.h
index 9e85ca8c20..6180548aab 100644
--- a/src/corelib/codecs/qeuckrcodec_p.h
+++ b/src/corelib/codecs/qeuckrcodec_p.h
@@ -90,12 +90,12 @@ public:
static QList<QByteArray> _aliases() { return QList<QByteArray>(); }
static int _mibEnum();
- QByteArray name() const { return _name(); }
- QList<QByteArray> aliases() const { return _aliases(); }
- int mibEnum() const { return _mibEnum(); }
+ QByteArray name() const override { return _name(); }
+ QList<QByteArray> aliases() const override { return _aliases(); }
+ int mibEnum() const override { return _mibEnum(); }
- QString convertToUnicode(const char *, int, ConverterState *) const;
- QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const;
+ QString convertToUnicode(const char *, int, ConverterState *) const override;
+ QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const override;
};
class QCP949Codec : public QTextCodec {
@@ -104,12 +104,12 @@ public:
static QList<QByteArray> _aliases();
static int _mibEnum();
- QByteArray name() const { return _name(); }
- QList<QByteArray> aliases() const { return _aliases(); }
- int mibEnum() const { return _mibEnum(); }
+ QByteArray name() const override { return _name(); }
+ QList<QByteArray> aliases() const override { return _aliases(); }
+ int mibEnum() const override { return _mibEnum(); }
- QString convertToUnicode(const char *, int, ConverterState *) const;
- QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const;
+ QString convertToUnicode(const char *, int, ConverterState *) const override;
+ QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const override;
};
QT_END_NAMESPACE
diff --git a/src/corelib/codecs/qgb18030codec_p.h b/src/corelib/codecs/qgb18030codec_p.h
index 2fea21b5b3..c263309357 100644
--- a/src/corelib/codecs/qgb18030codec_p.h
+++ b/src/corelib/codecs/qgb18030codec_p.h
@@ -69,12 +69,12 @@ public:
static QList<QByteArray> _aliases() { return QList<QByteArray>(); }
static int _mibEnum() { return 114; }
- QByteArray name() const { return _name(); }
- QList<QByteArray> aliases() const { return _aliases(); }
- int mibEnum() const { return _mibEnum(); }
+ QByteArray name() const override { return _name(); }
+ QList<QByteArray> aliases() const override { return _aliases(); }
+ int mibEnum() const override { return _mibEnum(); }
- QString convertToUnicode(const char *, int, ConverterState *) const;
- QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const;
+ QString convertToUnicode(const char *, int, ConverterState *) const override;
+ QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const override;
};
class QGbkCodec : public QGb18030Codec {
@@ -85,12 +85,12 @@ public:
static QList<QByteArray> _aliases();
static int _mibEnum();
- QByteArray name() const { return _name(); }
- QList<QByteArray> aliases() const { return _aliases(); }
- int mibEnum() const { return _mibEnum(); }
+ QByteArray name() const override { return _name(); }
+ QList<QByteArray> aliases() const override { return _aliases(); }
+ int mibEnum() const override { return _mibEnum(); }
- QString convertToUnicode(const char *, int, ConverterState *) const;
- QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const;
+ QString convertToUnicode(const char *, int, ConverterState *) const override;
+ QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const override;
};
class QGb2312Codec : public QGb18030Codec {
@@ -101,11 +101,11 @@ public:
static QList<QByteArray> _aliases() { return QList<QByteArray>(); }
static int _mibEnum();
- QByteArray name() const { return _name(); }
- int mibEnum() const { return _mibEnum(); }
+ QByteArray name() const override { return _name(); }
+ int mibEnum() const override { return _mibEnum(); }
- QString convertToUnicode(const char *, int, ConverterState *) const;
- QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const;
+ QString convertToUnicode(const char *, int, ConverterState *) const override;
+ QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const override;
};
QT_END_NAMESPACE
diff --git a/src/corelib/codecs/qiconvcodec_p.h b/src/corelib/codecs/qiconvcodec_p.h
index 9b8500538b..7d192232d7 100644
--- a/src/corelib/codecs/qiconvcodec_p.h
+++ b/src/corelib/codecs/qiconvcodec_p.h
@@ -69,11 +69,11 @@ public:
QIconvCodec();
~QIconvCodec();
- QString convertToUnicode(const char *, int, ConverterState *) const;
- QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const;
+ QString convertToUnicode(const char *, int, ConverterState *) const override;
+ QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const override;
- QByteArray name() const;
- int mibEnum() const;
+ QByteArray name() const override;
+ int mibEnum() const override;
void init() const;
iconv_t createIconv_t(const char *to, const char *from) const;
diff --git a/src/corelib/codecs/qjiscodec_p.h b/src/corelib/codecs/qjiscodec_p.h
index c3eda25bdf..41195af108 100644
--- a/src/corelib/codecs/qjiscodec_p.h
+++ b/src/corelib/codecs/qjiscodec_p.h
@@ -94,12 +94,12 @@ public:
static QList<QByteArray> _aliases();
static int _mibEnum();
- QByteArray name() const { return _name(); }
- QList<QByteArray> aliases() const { return _aliases(); }
- int mibEnum() const { return _mibEnum(); }
+ QByteArray name() const override { return _name(); }
+ QList<QByteArray> aliases() const override { return _aliases(); }
+ int mibEnum() const override { return _mibEnum(); }
- QString convertToUnicode(const char *, int, ConverterState *) const;
- QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const;
+ QString convertToUnicode(const char *, int, ConverterState *) const override;
+ QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const override;
QJisCodec();
~QJisCodec();
diff --git a/src/corelib/codecs/qjpunicode.cpp b/src/corelib/codecs/qjpunicode.cpp
index a3f3448bb6..61f6eac93a 100644
--- a/src/corelib/codecs/qjpunicode.cpp
+++ b/src/corelib/codecs/qjpunicode.cpp
@@ -347,15 +347,15 @@ public:
// uint Jisx0201ToUnicode(uint h, uint l) const;
// uint Jisx0201LatinToUnicode(uint h, uint l) const;
// uint Jisx0201KanaToUnicode(uint h, uint l) const;
- uint jisx0208ToUnicode(uint h, uint l) const;
- uint jisx0212ToUnicode(uint h, uint l) const;
+ uint jisx0208ToUnicode(uint h, uint l) const override;
+ uint jisx0212ToUnicode(uint h, uint l) const override;
// uint UnicodeToAscii(uint h, uint l) const;
// uint UnicodeToJisx0201(uint h, uint l) const;
// uint UnicodeToJisx0201Latin(uint h, uint l) const;
// uint UnicodeToJisx0201Kana(uint h, uint l) const;
- uint unicodeToJisx0208(uint h, uint l) const;
- uint unicodeToJisx0212(uint h, uint l) const;
+ uint unicodeToJisx0208(uint h, uint l) const override;
+ uint unicodeToJisx0212(uint h, uint l) const override;
};
uint QJpUnicodeConv_Unicode_ASCII::jisx0208ToUnicode(uint h, uint l) const
@@ -404,18 +404,18 @@ class QJpUnicodeConv_JISX0221_JISX0201 : public QJpUnicodeConv {
public:
QJpUnicodeConv_JISX0221_JISX0201(int r) : QJpUnicodeConv(r) {}
- uint asciiToUnicode(uint h, uint l) const;
+ uint asciiToUnicode(uint h, uint l) const override;
// uint Jisx0201ToUnicode(uint h, uint l) const;
// uint Jisx0201LatinToUnicode(uint h, uint l) const;
// uint Jisx0201KanaToUnicode(uint h, uint l) const;
- uint jisx0208ToUnicode(uint h, uint l) const;
+ uint jisx0208ToUnicode(uint h, uint l) const override;
// uint Jisx0212ToUnicode(uint h, uint l) const;
- uint unicodeToAscii(uint h, uint l) const;
+ uint unicodeToAscii(uint h, uint l) const override;
// uint UnicodeToJisx0201(uint h, uint l) const;
// uint UnicodeToJisx0201Latin(uint h, uint l) const;
// uint UnicodeToJisx0201Kana(uint h, uint l) const;
- uint unicodeToJisx0208(uint h, uint l) const;
+ uint unicodeToJisx0208(uint h, uint l) const override;
// uint UnicodeToJisx0212(uint h, uint l) const;
};
@@ -460,17 +460,17 @@ public:
// uint AsciiToUnicode(uint h, uint l) const;
// uint Jisx0201ToUnicode(uint h, uint l) const;
- uint jisx0201LatinToUnicode(uint h, uint l) const;
+ uint jisx0201LatinToUnicode(uint h, uint l) const override;
// uint Jisx0201KanaToUnicode(uint h, uint l) const;
- uint jisx0208ToUnicode(uint h, uint l) const;
- uint jisx0212ToUnicode(uint h, uint l) const;
+ uint jisx0208ToUnicode(uint h, uint l) const override;
+ uint jisx0212ToUnicode(uint h, uint l) const override;
// uint UnicodeToAscii(uint h, uint l) const;
// uint UnicodeToJisx0201(uint h, uint l) const;
- uint unicodeToJisx0201Latin(uint h, uint l) const;
+ uint unicodeToJisx0201Latin(uint h, uint l) const override;
// uint UnicodeToJisx0201Kana(uint h, uint l) const;
- uint unicodeToJisx0208(uint h, uint l) const;
- uint unicodeToJisx0212(uint h, uint l) const;
+ uint unicodeToJisx0208(uint h, uint l) const override;
+ uint unicodeToJisx0212(uint h, uint l) const override;
};
uint QJpUnicodeConv_JISX0221_ASCII::jisx0201LatinToUnicode(uint h, uint l) const
@@ -556,17 +556,17 @@ public:
// uint AsciiToUnicode(uint h, uint l) const;
// uint Jisx0201ToUnicode(uint h, uint l) const;
- uint jisx0201LatinToUnicode(uint h, uint l) const;
+ uint jisx0201LatinToUnicode(uint h, uint l) const override;
// uint Jisx0201KanaToUnicode(uint h, uint l) const;
- uint jisx0208ToUnicode(uint h, uint l) const;
- uint jisx0212ToUnicode(uint h, uint l) const;
+ uint jisx0208ToUnicode(uint h, uint l) const override;
+ uint jisx0212ToUnicode(uint h, uint l) const override;
- uint unicodeToAscii(uint h, uint l) const;
+ uint unicodeToAscii(uint h, uint l) const override;
// uint UnicodeToJisx0201(uint h, uint l) const;
- uint unicodeToJisx0201Latin(uint h, uint l) const;
+ uint unicodeToJisx0201Latin(uint h, uint l) const override;
// uint UnicodeToJisx0201Kana(uint h, uint l) const;
- uint unicodeToJisx0208(uint h, uint l) const;
- uint unicodeToJisx0212(uint h, uint l) const;
+ uint unicodeToJisx0208(uint h, uint l) const override;
+ uint unicodeToJisx0212(uint h, uint l) const override;
};
uint QJpUnicodeConv_Sun::jisx0201LatinToUnicode(uint h, uint l) const
@@ -645,17 +645,17 @@ public:
// uint AsciiToUnicode(uint h, uint l) const;
// uint Jisx0201ToUnicode(uint h, uint l) const;
- uint jisx0201LatinToUnicode(uint h, uint l) const;
+ uint jisx0201LatinToUnicode(uint h, uint l) const override;
// uint Jisx0201KanaToUnicode(uint h, uint l) const;
- uint jisx0208ToUnicode(uint h, uint l) const;
- uint jisx0212ToUnicode(uint h, uint l) const;
+ uint jisx0208ToUnicode(uint h, uint l) const override;
+ uint jisx0212ToUnicode(uint h, uint l) const override;
// uint UnicodeToAscii(uint h, uint l) const;
// uint UnicodeToJisx0201(uint h, uint l) const;
- uint unicodeToJisx0201Latin(uint h, uint l) const;
+ uint unicodeToJisx0201Latin(uint h, uint l) const override;
// uint UnicodeToJisx0201Kana(uint h, uint l) const;
- uint unicodeToJisx0208(uint h, uint l) const;
- uint unicodeToJisx0212(uint h, uint l) const;
+ uint unicodeToJisx0208(uint h, uint l) const override;
+ uint unicodeToJisx0212(uint h, uint l) const override;
};
uint QJpUnicodeConv_Microsoft::jisx0201LatinToUnicode(uint h, uint l) const
diff --git a/src/corelib/codecs/qsjiscodec_p.h b/src/corelib/codecs/qsjiscodec_p.h
index 9d7ea5d6d2..1e5cd44f26 100644
--- a/src/corelib/codecs/qsjiscodec_p.h
+++ b/src/corelib/codecs/qsjiscodec_p.h
@@ -94,12 +94,12 @@ public:
static QList<QByteArray> _aliases();
static int _mibEnum();
- QByteArray name() const { return _name(); }
- QList<QByteArray> aliases() const { return _aliases(); }
- int mibEnum() const { return _mibEnum(); }
+ QByteArray name() const override { return _name(); }
+ QList<QByteArray> aliases() const override { return _aliases(); }
+ int mibEnum() const override { return _mibEnum(); }
- QString convertToUnicode(const char *, int, ConverterState *) const;
- QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const;
+ QString convertToUnicode(const char *, int, ConverterState *) const override;
+ QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const override;
QSjisCodec();
~QSjisCodec();
diff --git a/src/corelib/codecs/qt_attribution.json b/src/corelib/codecs/qt_attribution.json
index 41f644a030..0815074675 100644
--- a/src/corelib/codecs/qt_attribution.json
+++ b/src/corelib/codecs/qt_attribution.json
@@ -6,6 +6,7 @@
"QtUsage": "Used in Qt Core if ICU is not used. Configure with -icu to avoid.",
"Path": "qbig5codec.cpp",
+ "Description": "Treat as final version; no upstream known",
"Description": "The Big5 codecs (QBig5Codec, QBig5hkscsCodec)
provide conversion to and from the Big5 encodings.",
"License": "BSD 2-clause \"Simplified\" License",
@@ -23,6 +24,7 @@ Copyright (C) 2001, 2002 Anthony Fok, ThizLinux Laboratory Ltd."
"QtUsage": "Used in Qt Core if ICU is not used. Configure with -icu to avoid.",
"Path": "qeucjpcodec.cpp",
+ "Description": "Treat as final version; no upstream known",
"Description": "The EUC-JP text codec provides conversion to and from EUC-JP,
the main legacy encoding for Unix machines in Japan.",
"License": "BSD 2-clause \"Simplified\" License",
@@ -37,6 +39,7 @@ the main legacy encoding for Unix machines in Japan.",
"QtUsage": "Used in Qt Core if ICU is not used. Configure with -icu to avoid.",
"Path": "qeuckrcodec.cpp",
+ "Description": "Treat as final version; no upstream known",
"Description": "The EUC-KR text codec provides conversion to and from EUC-KR, KR,
the main legacy encoding for Unix machines in Korea.",
"License": "BSD 2-clause \"Simplified\" License",
@@ -51,6 +54,7 @@ the main legacy encoding for Unix machines in Korea.",
"QtUsage": "Used in Qt Core if ICU is not used. Configure with -icu to avoid.",
"Path": "qjiscodec.cpp",
+ "Description": "Treat as final version; no upstream known",
"Description": "The ISO 2022-JP (JIS) text codec provides conversion to and from ISO 2022-JP.",
"License": "BSD 2-clause \"Simplified\" License",
"LicenseId": "BSD-2-Clause",
@@ -64,6 +68,7 @@ the main legacy encoding for Unix machines in Korea.",
"QtUsage": "Used in Qt Core if ICU is not used. Configure with -icu to avoid.",
"Path": "qsjiscodec.cpp",
+ "Description": "Treat as final version; no upstream known",
"Description": "The Shift-JIS text codec provides conversion to and from Shift-JIS.",
"License": "BSD 2-clause \"Simplified\" License",
"LicenseId": "BSD-2-Clause",
@@ -77,6 +82,7 @@ the main legacy encoding for Unix machines in Korea.",
"QtUsage": "Used in Qt Core.",
"Path": "qtsciicodec.cpp",
+ "Description": "Treat as final version; no upstream known",
"Description": "The TSCII text codec provides conversion to and from the Tamil TSCII
encoding.",
"License": "BSD 2-clause \"Simplified\" License",
@@ -91,6 +97,7 @@ encoding.",
"QtUsage": "Used in Qt Core if ICU is not used. Configure with -icu to avoid.",
"Path": "qgb18030codec.cpp",
+ "Description": "Treat as final version; no upstream known",
"Description": "The GBK codec provides conversion to and from the Chinese
GB18030/GBK/GB2312 encoding.",
"License": "BSD 2-clause \"Simplified\" License",
diff --git a/src/corelib/codecs/qtextcodec.cpp b/src/corelib/codecs/qtextcodec.cpp
index 1ec443cd73..466c575c3e 100644
--- a/src/corelib/codecs/qtextcodec.cpp
+++ b/src/corelib/codecs/qtextcodec.cpp
@@ -1,6 +1,7 @@
/****************************************************************************
**
-** Copyright (C) 2016 The Qt Company Ltd.
+** Copyright (C) 2018 The Qt Company Ltd.
+** Copyright (C) 2018 Intel Corporation.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtCore module of the Qt Toolkit.
@@ -43,8 +44,9 @@
#include "qtextcodec_p.h"
#include "qbytearraymatcher.h"
-#include "qlist.h"
+#include "qendian.h"
#include "qfile.h"
+#include "qlist.h"
#include "qstringlist.h"
#include "qvarlengtharray.h"
#if !defined(QT_BOOTSTRAPPED)
@@ -493,6 +495,24 @@ QTextCodec::QTextCodec()
*/
QTextCodec::~QTextCodec()
{
+ QCoreGlobalData *globalData = QCoreGlobalData::instance();
+ if (!globalData)
+ return;
+
+ globalData->codecForLocale.testAndSetRelaxed(this, nullptr);
+
+ QMutexLocker locker(textCodecsMutex());
+
+ globalData->allCodecs.removeOne(this);
+
+ auto it = globalData->codecCache.cbegin();
+
+ while (it != globalData->codecCache.cend()) {
+ if (it.value() == this)
+ it = globalData->codecCache.erase(it);
+ else
+ ++it;
+ }
}
/*!
@@ -1144,41 +1164,50 @@ QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba)
Tries to detect the encoding of the provided snippet \a ba by
using the BOM (Byte Order Mark) and returns a QTextCodec instance
- that is capable of decoding the text to unicode. If the codec
- cannot be detected from the content provided, \a defaultCodec is
- returned.
+ that is capable of decoding the text to unicode. This function can
+ detect one of the following codecs:
+
+ \list
+ \li UTF-32 Little Endian
+ \li UTF-32 Big Endian
+ \li UTF-16 Little Endian
+ \li UTF-16 Big Endian
+ \li UTF-8
+ \endlist
+
+ If the codec cannot be detected from the content provided, \a defaultCodec
+ is returned.
\sa codecForHtml()
*/
QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba, QTextCodec *defaultCodec)
{
const int arraySize = ba.size();
+ const uchar *buf = reinterpret_cast<const uchar *>(ba.constData());
+ const uint bom = 0xfeff;
if (arraySize > 3) {
- if ((uchar)ba[0] == 0x00
- && (uchar)ba[1] == 0x00
- && (uchar)ba[2] == 0xFE
- && (uchar)ba[3] == 0xFF)
+ uint uc = qFromUnaligned<uint>(buf);
+ if (uc == qToBigEndian(bom))
return QTextCodec::codecForMib(1018); // utf-32 be
- else if ((uchar)ba[0] == 0xFF
- && (uchar)ba[1] == 0xFE
- && (uchar)ba[2] == 0x00
- && (uchar)ba[3] == 0x00)
+ else if (uc == qToLittleEndian(bom))
return QTextCodec::codecForMib(1019); // utf-32 le
}
if (arraySize < 2)
return defaultCodec;
- if ((uchar)ba[0] == 0xfe && (uchar)ba[1] == 0xff)
+
+ ushort uc = qFromUnaligned<ushort>(buf);
+ if (uc == qToBigEndian(ushort(bom)))
return QTextCodec::codecForMib(1013); // utf16 be
- else if ((uchar)ba[0] == 0xff && (uchar)ba[1] == 0xfe)
+ else if (uc == qToLittleEndian(ushort(bom)))
return QTextCodec::codecForMib(1014); // utf16 le
if (arraySize < 3)
return defaultCodec;
- if ((uchar)ba[0] == 0xef
- && (uchar)ba[1] == 0xbb
- && (uchar)ba[2] == 0xbf)
+
+ static const char utf8bom[] = "\xef\xbb\xbf";
+ if (memcmp(buf, utf8bom, sizeof(utf8bom) - 1) == 0)
return QTextCodec::codecForMib(106); // utf-8
return defaultCodec;
@@ -1189,8 +1218,19 @@ QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba, QTextCodec *defaul
Tries to detect the encoding of the provided snippet \a ba by
using the BOM (Byte Order Mark) and returns a QTextCodec instance
- that is capable of decoding the text to unicode. If the codec
- cannot be detected, this overload returns a Latin-1 QTextCodec.
+ that is capable of decoding the text to unicode. This function can
+ detect one of the following codecs:
+
+ \list
+ \li UTF-32 Little Endian
+ \li UTF-32 Big Endian
+ \li UTF-16 Little Endian
+ \li UTF-16 Big Endian
+ \li UTF-8
+ \endlist
+
+ If the codec cannot be detected from the content provided, this overload
+ returns a Latin-1 QTextCodec.
\sa codecForHtml()
*/
@@ -1219,4 +1259,17 @@ bool QTextDecoder::hasFailure() const
return state.invalidChars != 0;
}
+/*!
+ \internal
+ \since 5.12
+
+ Determines whether the decoder needs more bytes to continue decoding. That
+ is, this signifies that the input string ended in the middle of a
+ multi-byte sequence. Note that it's possible some codecs do not report this.
+ */
+bool QTextDecoder::needsMoreData() const
+{
+ return state.remainingChars;
+}
+
QT_END_NAMESPACE
diff --git a/src/corelib/codecs/qtextcodec.h b/src/corelib/codecs/qtextcodec.h
index c0261b7aa2..3010a2714e 100644
--- a/src/corelib/codecs/qtextcodec.h
+++ b/src/corelib/codecs/qtextcodec.h
@@ -1,6 +1,6 @@
/****************************************************************************
**
-** Copyright (C) 2016 The Qt Company Ltd.
+** Copyright (C) 2018 The Qt Company Ltd.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtCore module of the Qt Toolkit.
@@ -161,6 +161,7 @@ public:
QString toUnicode(const QByteArray &ba);
void toUnicode(QString *target, const char *chars, int len);
bool hasFailure() const;
+ bool needsMoreData() const;
private:
const QTextCodec *c;
QTextCodec::ConverterState state;
diff --git a/src/corelib/codecs/qtextcodec_p.h b/src/corelib/codecs/qtextcodec_p.h
index 6e19d1d30e..0e449d994c 100644
--- a/src/corelib/codecs/qtextcodec_p.h
+++ b/src/corelib/codecs/qtextcodec_p.h
@@ -60,7 +60,7 @@ QT_BEGIN_NAMESPACE
#include "qtextcodec.h"
-#if defined(Q_OS_MAC) || defined(Q_OS_ANDROID) || defined(Q_OS_QNX)
+#if defined(Q_OS_MAC) || defined(Q_OS_ANDROID) || defined(Q_OS_QNX) || defined(Q_OS_WASM)
#define QT_LOCALE_IS_UTF8
#endif
diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp
index 0dffdd723e..643c8ee475 100644
--- a/src/corelib/codecs/qutfcodec.cpp
+++ b/src/corelib/codecs/qutfcodec.cpp
@@ -70,9 +70,14 @@ static inline bool simdEncodeAscii(uchar *&dst, const ushort *&nextAscii, const
{
// do sixteen characters at a time
for ( ; end - src >= 16; src += 16, dst += 16) {
+# ifdef __AVX2__
+ __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src));
+ __m128i data1 = _mm256_castsi256_si128(data);
+ __m128i data2 = _mm256_extracti128_si256(data, 1);
+# else
__m128i data1 = _mm_loadu_si128((const __m128i*)src);
__m128i data2 = _mm_loadu_si128(1+(const __m128i*)src);
-
+# endif
// check if everything is ASCII
// the highest ASCII value is U+007F
@@ -102,6 +107,26 @@ static inline bool simdEncodeAscii(uchar *&dst, const ushort *&nextAscii, const
return false;
}
}
+
+ if (end - src >= 8) {
+ // do eight characters at a time
+ __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src));
+ __m128i packed = _mm_packus_epi16(data, data);
+ __m128i nonAscii = _mm_cmpgt_epi8(packed, _mm_setzero_si128());
+
+ // store even non-ASCII
+ _mm_storel_epi64(reinterpret_cast<__m128i *>(dst), packed);
+
+ uchar n = ~_mm_movemask_epi8(nonAscii);
+ if (n) {
+ nextAscii = src + qBitScanReverse(n) + 1;
+ n = qCountTrailingZeroBits(n);
+ dst += n;
+ src += n;
+ return false;
+ }
+ }
+
return src == end;
}
@@ -150,11 +175,52 @@ static inline bool simdDecodeAscii(ushort *&dst, const uchar *&nextAscii, const
return false;
}
+
+ if (end - src >= 8) {
+ __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(src));
+ uint n = _mm_movemask_epi8(data) & 0xff;
+ if (!n) {
+ // unpack and store
+ _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), _mm_unpacklo_epi8(data, _mm_setzero_si128()));
+ } else {
+ while (!(n & 1)) {
+ *dst++ = *src++;
+ n >>= 1;
+ }
+
+ n = qBitScanReverse(n);
+ nextAscii = src + n + 1;
+ return false;
+ }
+ }
+
return src == end;
}
static inline const uchar *simdFindNonAscii(const uchar *src, const uchar *end, const uchar *&nextAscii)
{
+#ifdef __AVX2__
+ // do 32 characters at a time
+ // (this is similar to simdTestMask in qstring.cpp)
+ const __m256i mask = _mm256_set1_epi8(0x80);
+ for ( ; end - src >= 32; src += 32) {
+ __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src));
+ if (_mm256_testz_si256(mask, data))
+ continue;
+
+ uint n = _mm256_movemask_epi8(data);
+ Q_ASSUME(n);
+
+ // find the next probable ASCII character
+ // we don't want to load 32 bytes again in this loop if we know there are non-ASCII
+ // characters still coming
+ nextAscii = src + qBitScanReverse(n) + 1;
+
+ // return the non-ASCII character
+ return src + qCountTrailingZeroBits(n);
+ }
+#endif
+
// do sixteen characters at a time
for ( ; end - src >= 16; src += 16) {
__m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(src));
@@ -695,26 +761,16 @@ QByteArray QUtf16::convertFromUnicode(const QChar *uc, int len, QTextCodec::Conv
char *data = d.data();
if (!state || !(state->flags & QTextCodec::IgnoreHeader)) {
QChar bom(QChar::ByteOrderMark);
- if (endian == BigEndianness) {
- data[0] = bom.row();
- data[1] = bom.cell();
- } else {
- data[0] = bom.cell();
- data[1] = bom.row();
- }
+ if (endian == BigEndianness)
+ qToBigEndian(bom.unicode(), data);
+ else
+ qToLittleEndian(bom.unicode(), data);
data += 2;
}
- if (endian == BigEndianness) {
- for (int i = 0; i < len; ++i) {
- *(data++) = uc[i].row();
- *(data++) = uc[i].cell();
- }
- } else {
- for (int i = 0; i < len; ++i) {
- *(data++) = uc[i].cell();
- *(data++) = uc[i].row();
- }
- }
+ if (endian == BigEndianness)
+ qToBigEndian<ushort>(uc, len, data);
+ else
+ qToLittleEndian<ushort>(uc, len, data);
if (state) {
state->remainingChars = 0;
@@ -830,20 +886,14 @@ QByteArray QUtf32::convertFromUnicode(const QChar *uc, int len, QTextCodec::Conv
if (endian == BigEndianness) {
while (i.hasNext()) {
uint cp = i.next();
-
- *(data++) = cp >> 24;
- *(data++) = (cp >> 16) & 0xff;
- *(data++) = (cp >> 8) & 0xff;
- *(data++) = cp & 0xff;
+ qToBigEndian(cp, data);
+ data += 4;
}
} else {
while (i.hasNext()) {
uint cp = i.next();
-
- *(data++) = cp & 0xff;
- *(data++) = (cp >> 8) & 0xff;
- *(data++) = (cp >> 16) & 0xff;
- *(data++) = cp >> 24;
+ qToLittleEndian(cp, data);
+ data += 4;
}
}
diff --git a/src/corelib/codecs/qutfcodec_p.h b/src/corelib/codecs/qutfcodec_p.h
index d7743753af..b24283ac5e 100644
--- a/src/corelib/codecs/qutfcodec_p.h
+++ b/src/corelib/codecs/qutfcodec_p.h
@@ -1,7 +1,7 @@
/****************************************************************************
**
-** Copyright (C) 2016 The Qt Company Ltd.
-** Copyright (C) 2016 Intel Corporation.
+** Copyright (C) 2018 The Qt Company Ltd.
+** Copyright (C) 2018 Intel Corporation.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtCore module of the Qt Toolkit.
diff --git a/src/corelib/codecs/qwindowscodec_p.h b/src/corelib/codecs/qwindowscodec_p.h
index 1ca6d5567e..5bcab0ce66 100644
--- a/src/corelib/codecs/qwindowscodec_p.h
+++ b/src/corelib/codecs/qwindowscodec_p.h
@@ -63,12 +63,12 @@ public:
QWindowsLocalCodec();
~QWindowsLocalCodec();
- QString convertToUnicode(const char *, int, ConverterState *) const;
- QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const;
+ QString convertToUnicode(const char *, int, ConverterState *) const override;
+ QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const override;
QString convertToUnicodeCharByChar(const char *chars, int length, ConverterState *state) const;
- QByteArray name() const;
- int mibEnum() const;
+ QByteArray name() const override;
+ int mibEnum() const override;
};