diff options
Diffstat (limited to 'src/corelib/codecs')
-rw-r--r-- | src/corelib/codecs/qeucjpcodec_p.h | 10 | ||||
-rw-r--r-- | src/corelib/codecs/qeuckrcodec_p.h | 20 | ||||
-rw-r--r-- | src/corelib/codecs/qgb18030codec_p.h | 28 | ||||
-rw-r--r-- | src/corelib/codecs/qiconvcodec_p.h | 8 | ||||
-rw-r--r-- | src/corelib/codecs/qjiscodec_p.h | 10 | ||||
-rw-r--r-- | src/corelib/codecs/qjpunicode.cpp | 54 | ||||
-rw-r--r-- | src/corelib/codecs/qsjiscodec_p.h | 10 | ||||
-rw-r--r-- | src/corelib/codecs/qt_attribution.json | 7 | ||||
-rw-r--r-- | src/corelib/codecs/qtextcodec.cpp | 93 | ||||
-rw-r--r-- | src/corelib/codecs/qtextcodec.h | 3 | ||||
-rw-r--r-- | src/corelib/codecs/qtextcodec_p.h | 2 | ||||
-rw-r--r-- | src/corelib/codecs/qutfcodec.cpp | 108 | ||||
-rw-r--r-- | src/corelib/codecs/qutfcodec_p.h | 4 | ||||
-rw-r--r-- | src/corelib/codecs/qwindowscodec_p.h | 8 |
14 files changed, 238 insertions, 127 deletions
diff --git a/src/corelib/codecs/qeucjpcodec_p.h b/src/corelib/codecs/qeucjpcodec_p.h index 535e830ad0..d79e2435e3 100644 --- a/src/corelib/codecs/qeucjpcodec_p.h +++ b/src/corelib/codecs/qeucjpcodec_p.h @@ -94,12 +94,12 @@ public: static QList<QByteArray> _aliases() { return QList<QByteArray>(); } static int _mibEnum(); - QByteArray name() const { return _name(); } - QList<QByteArray> aliases() const { return _aliases(); } - int mibEnum() const { return _mibEnum(); } + QByteArray name() const override { return _name(); } + QList<QByteArray> aliases() const override { return _aliases(); } + int mibEnum() const override { return _mibEnum(); } - QString convertToUnicode(const char *, int, ConverterState *) const; - QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const; + QString convertToUnicode(const char *, int, ConverterState *) const override; + QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const override; QEucJpCodec(); ~QEucJpCodec(); diff --git a/src/corelib/codecs/qeuckrcodec_p.h b/src/corelib/codecs/qeuckrcodec_p.h index 9e85ca8c20..6180548aab 100644 --- a/src/corelib/codecs/qeuckrcodec_p.h +++ b/src/corelib/codecs/qeuckrcodec_p.h @@ -90,12 +90,12 @@ public: static QList<QByteArray> _aliases() { return QList<QByteArray>(); } static int _mibEnum(); - QByteArray name() const { return _name(); } - QList<QByteArray> aliases() const { return _aliases(); } - int mibEnum() const { return _mibEnum(); } + QByteArray name() const override { return _name(); } + QList<QByteArray> aliases() const override { return _aliases(); } + int mibEnum() const override { return _mibEnum(); } - QString convertToUnicode(const char *, int, ConverterState *) const; - QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const; + QString convertToUnicode(const char *, int, ConverterState *) const override; + QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const override; }; class QCP949Codec : public QTextCodec { @@ -104,12 +104,12 @@ public: static QList<QByteArray> _aliases(); static int _mibEnum(); - QByteArray name() const { return _name(); } - QList<QByteArray> aliases() const { return _aliases(); } - int mibEnum() const { return _mibEnum(); } + QByteArray name() const override { return _name(); } + QList<QByteArray> aliases() const override { return _aliases(); } + int mibEnum() const override { return _mibEnum(); } - QString convertToUnicode(const char *, int, ConverterState *) const; - QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const; + QString convertToUnicode(const char *, int, ConverterState *) const override; + QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const override; }; QT_END_NAMESPACE diff --git a/src/corelib/codecs/qgb18030codec_p.h b/src/corelib/codecs/qgb18030codec_p.h index 2fea21b5b3..c263309357 100644 --- a/src/corelib/codecs/qgb18030codec_p.h +++ b/src/corelib/codecs/qgb18030codec_p.h @@ -69,12 +69,12 @@ public: static QList<QByteArray> _aliases() { return QList<QByteArray>(); } static int _mibEnum() { return 114; } - QByteArray name() const { return _name(); } - QList<QByteArray> aliases() const { return _aliases(); } - int mibEnum() const { return _mibEnum(); } + QByteArray name() const override { return _name(); } + QList<QByteArray> aliases() const override { return _aliases(); } + int mibEnum() const override { return _mibEnum(); } - QString convertToUnicode(const char *, int, ConverterState *) const; - QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const; + QString convertToUnicode(const char *, int, ConverterState *) const override; + QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const override; }; class QGbkCodec : public QGb18030Codec { @@ -85,12 +85,12 @@ public: static QList<QByteArray> _aliases(); static int _mibEnum(); - QByteArray name() const { return _name(); } - QList<QByteArray> aliases() const { return _aliases(); } - int mibEnum() const { return _mibEnum(); } + QByteArray name() const override { return _name(); } + QList<QByteArray> aliases() const override { return _aliases(); } + int mibEnum() const override { return _mibEnum(); } - QString convertToUnicode(const char *, int, ConverterState *) const; - QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const; + QString convertToUnicode(const char *, int, ConverterState *) const override; + QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const override; }; class QGb2312Codec : public QGb18030Codec { @@ -101,11 +101,11 @@ public: static QList<QByteArray> _aliases() { return QList<QByteArray>(); } static int _mibEnum(); - QByteArray name() const { return _name(); } - int mibEnum() const { return _mibEnum(); } + QByteArray name() const override { return _name(); } + int mibEnum() const override { return _mibEnum(); } - QString convertToUnicode(const char *, int, ConverterState *) const; - QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const; + QString convertToUnicode(const char *, int, ConverterState *) const override; + QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const override; }; QT_END_NAMESPACE diff --git a/src/corelib/codecs/qiconvcodec_p.h b/src/corelib/codecs/qiconvcodec_p.h index 9b8500538b..7d192232d7 100644 --- a/src/corelib/codecs/qiconvcodec_p.h +++ b/src/corelib/codecs/qiconvcodec_p.h @@ -69,11 +69,11 @@ public: QIconvCodec(); ~QIconvCodec(); - QString convertToUnicode(const char *, int, ConverterState *) const; - QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const; + QString convertToUnicode(const char *, int, ConverterState *) const override; + QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const override; - QByteArray name() const; - int mibEnum() const; + QByteArray name() const override; + int mibEnum() const override; void init() const; iconv_t createIconv_t(const char *to, const char *from) const; diff --git a/src/corelib/codecs/qjiscodec_p.h b/src/corelib/codecs/qjiscodec_p.h index c3eda25bdf..41195af108 100644 --- a/src/corelib/codecs/qjiscodec_p.h +++ b/src/corelib/codecs/qjiscodec_p.h @@ -94,12 +94,12 @@ public: static QList<QByteArray> _aliases(); static int _mibEnum(); - QByteArray name() const { return _name(); } - QList<QByteArray> aliases() const { return _aliases(); } - int mibEnum() const { return _mibEnum(); } + QByteArray name() const override { return _name(); } + QList<QByteArray> aliases() const override { return _aliases(); } + int mibEnum() const override { return _mibEnum(); } - QString convertToUnicode(const char *, int, ConverterState *) const; - QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const; + QString convertToUnicode(const char *, int, ConverterState *) const override; + QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const override; QJisCodec(); ~QJisCodec(); diff --git a/src/corelib/codecs/qjpunicode.cpp b/src/corelib/codecs/qjpunicode.cpp index a3f3448bb6..61f6eac93a 100644 --- a/src/corelib/codecs/qjpunicode.cpp +++ b/src/corelib/codecs/qjpunicode.cpp @@ -347,15 +347,15 @@ public: // uint Jisx0201ToUnicode(uint h, uint l) const; // uint Jisx0201LatinToUnicode(uint h, uint l) const; // uint Jisx0201KanaToUnicode(uint h, uint l) const; - uint jisx0208ToUnicode(uint h, uint l) const; - uint jisx0212ToUnicode(uint h, uint l) const; + uint jisx0208ToUnicode(uint h, uint l) const override; + uint jisx0212ToUnicode(uint h, uint l) const override; // uint UnicodeToAscii(uint h, uint l) const; // uint UnicodeToJisx0201(uint h, uint l) const; // uint UnicodeToJisx0201Latin(uint h, uint l) const; // uint UnicodeToJisx0201Kana(uint h, uint l) const; - uint unicodeToJisx0208(uint h, uint l) const; - uint unicodeToJisx0212(uint h, uint l) const; + uint unicodeToJisx0208(uint h, uint l) const override; + uint unicodeToJisx0212(uint h, uint l) const override; }; uint QJpUnicodeConv_Unicode_ASCII::jisx0208ToUnicode(uint h, uint l) const @@ -404,18 +404,18 @@ class QJpUnicodeConv_JISX0221_JISX0201 : public QJpUnicodeConv { public: QJpUnicodeConv_JISX0221_JISX0201(int r) : QJpUnicodeConv(r) {} - uint asciiToUnicode(uint h, uint l) const; + uint asciiToUnicode(uint h, uint l) const override; // uint Jisx0201ToUnicode(uint h, uint l) const; // uint Jisx0201LatinToUnicode(uint h, uint l) const; // uint Jisx0201KanaToUnicode(uint h, uint l) const; - uint jisx0208ToUnicode(uint h, uint l) const; + uint jisx0208ToUnicode(uint h, uint l) const override; // uint Jisx0212ToUnicode(uint h, uint l) const; - uint unicodeToAscii(uint h, uint l) const; + uint unicodeToAscii(uint h, uint l) const override; // uint UnicodeToJisx0201(uint h, uint l) const; // uint UnicodeToJisx0201Latin(uint h, uint l) const; // uint UnicodeToJisx0201Kana(uint h, uint l) const; - uint unicodeToJisx0208(uint h, uint l) const; + uint unicodeToJisx0208(uint h, uint l) const override; // uint UnicodeToJisx0212(uint h, uint l) const; }; @@ -460,17 +460,17 @@ public: // uint AsciiToUnicode(uint h, uint l) const; // uint Jisx0201ToUnicode(uint h, uint l) const; - uint jisx0201LatinToUnicode(uint h, uint l) const; + uint jisx0201LatinToUnicode(uint h, uint l) const override; // uint Jisx0201KanaToUnicode(uint h, uint l) const; - uint jisx0208ToUnicode(uint h, uint l) const; - uint jisx0212ToUnicode(uint h, uint l) const; + uint jisx0208ToUnicode(uint h, uint l) const override; + uint jisx0212ToUnicode(uint h, uint l) const override; // uint UnicodeToAscii(uint h, uint l) const; // uint UnicodeToJisx0201(uint h, uint l) const; - uint unicodeToJisx0201Latin(uint h, uint l) const; + uint unicodeToJisx0201Latin(uint h, uint l) const override; // uint UnicodeToJisx0201Kana(uint h, uint l) const; - uint unicodeToJisx0208(uint h, uint l) const; - uint unicodeToJisx0212(uint h, uint l) const; + uint unicodeToJisx0208(uint h, uint l) const override; + uint unicodeToJisx0212(uint h, uint l) const override; }; uint QJpUnicodeConv_JISX0221_ASCII::jisx0201LatinToUnicode(uint h, uint l) const @@ -556,17 +556,17 @@ public: // uint AsciiToUnicode(uint h, uint l) const; // uint Jisx0201ToUnicode(uint h, uint l) const; - uint jisx0201LatinToUnicode(uint h, uint l) const; + uint jisx0201LatinToUnicode(uint h, uint l) const override; // uint Jisx0201KanaToUnicode(uint h, uint l) const; - uint jisx0208ToUnicode(uint h, uint l) const; - uint jisx0212ToUnicode(uint h, uint l) const; + uint jisx0208ToUnicode(uint h, uint l) const override; + uint jisx0212ToUnicode(uint h, uint l) const override; - uint unicodeToAscii(uint h, uint l) const; + uint unicodeToAscii(uint h, uint l) const override; // uint UnicodeToJisx0201(uint h, uint l) const; - uint unicodeToJisx0201Latin(uint h, uint l) const; + uint unicodeToJisx0201Latin(uint h, uint l) const override; // uint UnicodeToJisx0201Kana(uint h, uint l) const; - uint unicodeToJisx0208(uint h, uint l) const; - uint unicodeToJisx0212(uint h, uint l) const; + uint unicodeToJisx0208(uint h, uint l) const override; + uint unicodeToJisx0212(uint h, uint l) const override; }; uint QJpUnicodeConv_Sun::jisx0201LatinToUnicode(uint h, uint l) const @@ -645,17 +645,17 @@ public: // uint AsciiToUnicode(uint h, uint l) const; // uint Jisx0201ToUnicode(uint h, uint l) const; - uint jisx0201LatinToUnicode(uint h, uint l) const; + uint jisx0201LatinToUnicode(uint h, uint l) const override; // uint Jisx0201KanaToUnicode(uint h, uint l) const; - uint jisx0208ToUnicode(uint h, uint l) const; - uint jisx0212ToUnicode(uint h, uint l) const; + uint jisx0208ToUnicode(uint h, uint l) const override; + uint jisx0212ToUnicode(uint h, uint l) const override; // uint UnicodeToAscii(uint h, uint l) const; // uint UnicodeToJisx0201(uint h, uint l) const; - uint unicodeToJisx0201Latin(uint h, uint l) const; + uint unicodeToJisx0201Latin(uint h, uint l) const override; // uint UnicodeToJisx0201Kana(uint h, uint l) const; - uint unicodeToJisx0208(uint h, uint l) const; - uint unicodeToJisx0212(uint h, uint l) const; + uint unicodeToJisx0208(uint h, uint l) const override; + uint unicodeToJisx0212(uint h, uint l) const override; }; uint QJpUnicodeConv_Microsoft::jisx0201LatinToUnicode(uint h, uint l) const diff --git a/src/corelib/codecs/qsjiscodec_p.h b/src/corelib/codecs/qsjiscodec_p.h index 9d7ea5d6d2..1e5cd44f26 100644 --- a/src/corelib/codecs/qsjiscodec_p.h +++ b/src/corelib/codecs/qsjiscodec_p.h @@ -94,12 +94,12 @@ public: static QList<QByteArray> _aliases(); static int _mibEnum(); - QByteArray name() const { return _name(); } - QList<QByteArray> aliases() const { return _aliases(); } - int mibEnum() const { return _mibEnum(); } + QByteArray name() const override { return _name(); } + QList<QByteArray> aliases() const override { return _aliases(); } + int mibEnum() const override { return _mibEnum(); } - QString convertToUnicode(const char *, int, ConverterState *) const; - QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const; + QString convertToUnicode(const char *, int, ConverterState *) const override; + QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const override; QSjisCodec(); ~QSjisCodec(); diff --git a/src/corelib/codecs/qt_attribution.json b/src/corelib/codecs/qt_attribution.json index 41f644a030..0815074675 100644 --- a/src/corelib/codecs/qt_attribution.json +++ b/src/corelib/codecs/qt_attribution.json @@ -6,6 +6,7 @@ "QtUsage": "Used in Qt Core if ICU is not used. Configure with -icu to avoid.", "Path": "qbig5codec.cpp", + "Description": "Treat as final version; no upstream known", "Description": "The Big5 codecs (QBig5Codec, QBig5hkscsCodec) provide conversion to and from the Big5 encodings.", "License": "BSD 2-clause \"Simplified\" License", @@ -23,6 +24,7 @@ Copyright (C) 2001, 2002 Anthony Fok, ThizLinux Laboratory Ltd." "QtUsage": "Used in Qt Core if ICU is not used. Configure with -icu to avoid.", "Path": "qeucjpcodec.cpp", + "Description": "Treat as final version; no upstream known", "Description": "The EUC-JP text codec provides conversion to and from EUC-JP, the main legacy encoding for Unix machines in Japan.", "License": "BSD 2-clause \"Simplified\" License", @@ -37,6 +39,7 @@ the main legacy encoding for Unix machines in Japan.", "QtUsage": "Used in Qt Core if ICU is not used. Configure with -icu to avoid.", "Path": "qeuckrcodec.cpp", + "Description": "Treat as final version; no upstream known", "Description": "The EUC-KR text codec provides conversion to and from EUC-KR, KR, the main legacy encoding for Unix machines in Korea.", "License": "BSD 2-clause \"Simplified\" License", @@ -51,6 +54,7 @@ the main legacy encoding for Unix machines in Korea.", "QtUsage": "Used in Qt Core if ICU is not used. Configure with -icu to avoid.", "Path": "qjiscodec.cpp", + "Description": "Treat as final version; no upstream known", "Description": "The ISO 2022-JP (JIS) text codec provides conversion to and from ISO 2022-JP.", "License": "BSD 2-clause \"Simplified\" License", "LicenseId": "BSD-2-Clause", @@ -64,6 +68,7 @@ the main legacy encoding for Unix machines in Korea.", "QtUsage": "Used in Qt Core if ICU is not used. Configure with -icu to avoid.", "Path": "qsjiscodec.cpp", + "Description": "Treat as final version; no upstream known", "Description": "The Shift-JIS text codec provides conversion to and from Shift-JIS.", "License": "BSD 2-clause \"Simplified\" License", "LicenseId": "BSD-2-Clause", @@ -77,6 +82,7 @@ the main legacy encoding for Unix machines in Korea.", "QtUsage": "Used in Qt Core.", "Path": "qtsciicodec.cpp", + "Description": "Treat as final version; no upstream known", "Description": "The TSCII text codec provides conversion to and from the Tamil TSCII encoding.", "License": "BSD 2-clause \"Simplified\" License", @@ -91,6 +97,7 @@ encoding.", "QtUsage": "Used in Qt Core if ICU is not used. Configure with -icu to avoid.", "Path": "qgb18030codec.cpp", + "Description": "Treat as final version; no upstream known", "Description": "The GBK codec provides conversion to and from the Chinese GB18030/GBK/GB2312 encoding.", "License": "BSD 2-clause \"Simplified\" License", diff --git a/src/corelib/codecs/qtextcodec.cpp b/src/corelib/codecs/qtextcodec.cpp index 1ec443cd73..466c575c3e 100644 --- a/src/corelib/codecs/qtextcodec.cpp +++ b/src/corelib/codecs/qtextcodec.cpp @@ -1,6 +1,7 @@ /**************************************************************************** ** -** Copyright (C) 2016 The Qt Company Ltd. +** Copyright (C) 2018 The Qt Company Ltd. +** Copyright (C) 2018 Intel Corporation. ** Contact: https://www.qt.io/licensing/ ** ** This file is part of the QtCore module of the Qt Toolkit. @@ -43,8 +44,9 @@ #include "qtextcodec_p.h" #include "qbytearraymatcher.h" -#include "qlist.h" +#include "qendian.h" #include "qfile.h" +#include "qlist.h" #include "qstringlist.h" #include "qvarlengtharray.h" #if !defined(QT_BOOTSTRAPPED) @@ -493,6 +495,24 @@ QTextCodec::QTextCodec() */ QTextCodec::~QTextCodec() { + QCoreGlobalData *globalData = QCoreGlobalData::instance(); + if (!globalData) + return; + + globalData->codecForLocale.testAndSetRelaxed(this, nullptr); + + QMutexLocker locker(textCodecsMutex()); + + globalData->allCodecs.removeOne(this); + + auto it = globalData->codecCache.cbegin(); + + while (it != globalData->codecCache.cend()) { + if (it.value() == this) + it = globalData->codecCache.erase(it); + else + ++it; + } } /*! @@ -1144,41 +1164,50 @@ QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba) Tries to detect the encoding of the provided snippet \a ba by using the BOM (Byte Order Mark) and returns a QTextCodec instance - that is capable of decoding the text to unicode. If the codec - cannot be detected from the content provided, \a defaultCodec is - returned. + that is capable of decoding the text to unicode. This function can + detect one of the following codecs: + + \list + \li UTF-32 Little Endian + \li UTF-32 Big Endian + \li UTF-16 Little Endian + \li UTF-16 Big Endian + \li UTF-8 + \endlist + + If the codec cannot be detected from the content provided, \a defaultCodec + is returned. \sa codecForHtml() */ QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba, QTextCodec *defaultCodec) { const int arraySize = ba.size(); + const uchar *buf = reinterpret_cast<const uchar *>(ba.constData()); + const uint bom = 0xfeff; if (arraySize > 3) { - if ((uchar)ba[0] == 0x00 - && (uchar)ba[1] == 0x00 - && (uchar)ba[2] == 0xFE - && (uchar)ba[3] == 0xFF) + uint uc = qFromUnaligned<uint>(buf); + if (uc == qToBigEndian(bom)) return QTextCodec::codecForMib(1018); // utf-32 be - else if ((uchar)ba[0] == 0xFF - && (uchar)ba[1] == 0xFE - && (uchar)ba[2] == 0x00 - && (uchar)ba[3] == 0x00) + else if (uc == qToLittleEndian(bom)) return QTextCodec::codecForMib(1019); // utf-32 le } if (arraySize < 2) return defaultCodec; - if ((uchar)ba[0] == 0xfe && (uchar)ba[1] == 0xff) + + ushort uc = qFromUnaligned<ushort>(buf); + if (uc == qToBigEndian(ushort(bom))) return QTextCodec::codecForMib(1013); // utf16 be - else if ((uchar)ba[0] == 0xff && (uchar)ba[1] == 0xfe) + else if (uc == qToLittleEndian(ushort(bom))) return QTextCodec::codecForMib(1014); // utf16 le if (arraySize < 3) return defaultCodec; - if ((uchar)ba[0] == 0xef - && (uchar)ba[1] == 0xbb - && (uchar)ba[2] == 0xbf) + + static const char utf8bom[] = "\xef\xbb\xbf"; + if (memcmp(buf, utf8bom, sizeof(utf8bom) - 1) == 0) return QTextCodec::codecForMib(106); // utf-8 return defaultCodec; @@ -1189,8 +1218,19 @@ QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba, QTextCodec *defaul Tries to detect the encoding of the provided snippet \a ba by using the BOM (Byte Order Mark) and returns a QTextCodec instance - that is capable of decoding the text to unicode. If the codec - cannot be detected, this overload returns a Latin-1 QTextCodec. + that is capable of decoding the text to unicode. This function can + detect one of the following codecs: + + \list + \li UTF-32 Little Endian + \li UTF-32 Big Endian + \li UTF-16 Little Endian + \li UTF-16 Big Endian + \li UTF-8 + \endlist + + If the codec cannot be detected from the content provided, this overload + returns a Latin-1 QTextCodec. \sa codecForHtml() */ @@ -1219,4 +1259,17 @@ bool QTextDecoder::hasFailure() const return state.invalidChars != 0; } +/*! + \internal + \since 5.12 + + Determines whether the decoder needs more bytes to continue decoding. That + is, this signifies that the input string ended in the middle of a + multi-byte sequence. Note that it's possible some codecs do not report this. + */ +bool QTextDecoder::needsMoreData() const +{ + return state.remainingChars; +} + QT_END_NAMESPACE diff --git a/src/corelib/codecs/qtextcodec.h b/src/corelib/codecs/qtextcodec.h index c0261b7aa2..3010a2714e 100644 --- a/src/corelib/codecs/qtextcodec.h +++ b/src/corelib/codecs/qtextcodec.h @@ -1,6 +1,6 @@ /**************************************************************************** ** -** Copyright (C) 2016 The Qt Company Ltd. +** Copyright (C) 2018 The Qt Company Ltd. ** Contact: https://www.qt.io/licensing/ ** ** This file is part of the QtCore module of the Qt Toolkit. @@ -161,6 +161,7 @@ public: QString toUnicode(const QByteArray &ba); void toUnicode(QString *target, const char *chars, int len); bool hasFailure() const; + bool needsMoreData() const; private: const QTextCodec *c; QTextCodec::ConverterState state; diff --git a/src/corelib/codecs/qtextcodec_p.h b/src/corelib/codecs/qtextcodec_p.h index 6e19d1d30e..0e449d994c 100644 --- a/src/corelib/codecs/qtextcodec_p.h +++ b/src/corelib/codecs/qtextcodec_p.h @@ -60,7 +60,7 @@ QT_BEGIN_NAMESPACE #include "qtextcodec.h" -#if defined(Q_OS_MAC) || defined(Q_OS_ANDROID) || defined(Q_OS_QNX) +#if defined(Q_OS_MAC) || defined(Q_OS_ANDROID) || defined(Q_OS_QNX) || defined(Q_OS_WASM) #define QT_LOCALE_IS_UTF8 #endif diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp index 0dffdd723e..643c8ee475 100644 --- a/src/corelib/codecs/qutfcodec.cpp +++ b/src/corelib/codecs/qutfcodec.cpp @@ -70,9 +70,14 @@ static inline bool simdEncodeAscii(uchar *&dst, const ushort *&nextAscii, const { // do sixteen characters at a time for ( ; end - src >= 16; src += 16, dst += 16) { +# ifdef __AVX2__ + __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src)); + __m128i data1 = _mm256_castsi256_si128(data); + __m128i data2 = _mm256_extracti128_si256(data, 1); +# else __m128i data1 = _mm_loadu_si128((const __m128i*)src); __m128i data2 = _mm_loadu_si128(1+(const __m128i*)src); - +# endif // check if everything is ASCII // the highest ASCII value is U+007F @@ -102,6 +107,26 @@ static inline bool simdEncodeAscii(uchar *&dst, const ushort *&nextAscii, const return false; } } + + if (end - src >= 8) { + // do eight characters at a time + __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src)); + __m128i packed = _mm_packus_epi16(data, data); + __m128i nonAscii = _mm_cmpgt_epi8(packed, _mm_setzero_si128()); + + // store even non-ASCII + _mm_storel_epi64(reinterpret_cast<__m128i *>(dst), packed); + + uchar n = ~_mm_movemask_epi8(nonAscii); + if (n) { + nextAscii = src + qBitScanReverse(n) + 1; + n = qCountTrailingZeroBits(n); + dst += n; + src += n; + return false; + } + } + return src == end; } @@ -150,11 +175,52 @@ static inline bool simdDecodeAscii(ushort *&dst, const uchar *&nextAscii, const return false; } + + if (end - src >= 8) { + __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(src)); + uint n = _mm_movemask_epi8(data) & 0xff; + if (!n) { + // unpack and store + _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), _mm_unpacklo_epi8(data, _mm_setzero_si128())); + } else { + while (!(n & 1)) { + *dst++ = *src++; + n >>= 1; + } + + n = qBitScanReverse(n); + nextAscii = src + n + 1; + return false; + } + } + return src == end; } static inline const uchar *simdFindNonAscii(const uchar *src, const uchar *end, const uchar *&nextAscii) { +#ifdef __AVX2__ + // do 32 characters at a time + // (this is similar to simdTestMask in qstring.cpp) + const __m256i mask = _mm256_set1_epi8(0x80); + for ( ; end - src >= 32; src += 32) { + __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src)); + if (_mm256_testz_si256(mask, data)) + continue; + + uint n = _mm256_movemask_epi8(data); + Q_ASSUME(n); + + // find the next probable ASCII character + // we don't want to load 32 bytes again in this loop if we know there are non-ASCII + // characters still coming + nextAscii = src + qBitScanReverse(n) + 1; + + // return the non-ASCII character + return src + qCountTrailingZeroBits(n); + } +#endif + // do sixteen characters at a time for ( ; end - src >= 16; src += 16) { __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(src)); @@ -695,26 +761,16 @@ QByteArray QUtf16::convertFromUnicode(const QChar *uc, int len, QTextCodec::Conv char *data = d.data(); if (!state || !(state->flags & QTextCodec::IgnoreHeader)) { QChar bom(QChar::ByteOrderMark); - if (endian == BigEndianness) { - data[0] = bom.row(); - data[1] = bom.cell(); - } else { - data[0] = bom.cell(); - data[1] = bom.row(); - } + if (endian == BigEndianness) + qToBigEndian(bom.unicode(), data); + else + qToLittleEndian(bom.unicode(), data); data += 2; } - if (endian == BigEndianness) { - for (int i = 0; i < len; ++i) { - *(data++) = uc[i].row(); - *(data++) = uc[i].cell(); - } - } else { - for (int i = 0; i < len; ++i) { - *(data++) = uc[i].cell(); - *(data++) = uc[i].row(); - } - } + if (endian == BigEndianness) + qToBigEndian<ushort>(uc, len, data); + else + qToLittleEndian<ushort>(uc, len, data); if (state) { state->remainingChars = 0; @@ -830,20 +886,14 @@ QByteArray QUtf32::convertFromUnicode(const QChar *uc, int len, QTextCodec::Conv if (endian == BigEndianness) { while (i.hasNext()) { uint cp = i.next(); - - *(data++) = cp >> 24; - *(data++) = (cp >> 16) & 0xff; - *(data++) = (cp >> 8) & 0xff; - *(data++) = cp & 0xff; + qToBigEndian(cp, data); + data += 4; } } else { while (i.hasNext()) { uint cp = i.next(); - - *(data++) = cp & 0xff; - *(data++) = (cp >> 8) & 0xff; - *(data++) = (cp >> 16) & 0xff; - *(data++) = cp >> 24; + qToLittleEndian(cp, data); + data += 4; } } diff --git a/src/corelib/codecs/qutfcodec_p.h b/src/corelib/codecs/qutfcodec_p.h index d7743753af..b24283ac5e 100644 --- a/src/corelib/codecs/qutfcodec_p.h +++ b/src/corelib/codecs/qutfcodec_p.h @@ -1,7 +1,7 @@ /**************************************************************************** ** -** Copyright (C) 2016 The Qt Company Ltd. -** Copyright (C) 2016 Intel Corporation. +** Copyright (C) 2018 The Qt Company Ltd. +** Copyright (C) 2018 Intel Corporation. ** Contact: https://www.qt.io/licensing/ ** ** This file is part of the QtCore module of the Qt Toolkit. diff --git a/src/corelib/codecs/qwindowscodec_p.h b/src/corelib/codecs/qwindowscodec_p.h index 1ca6d5567e..5bcab0ce66 100644 --- a/src/corelib/codecs/qwindowscodec_p.h +++ b/src/corelib/codecs/qwindowscodec_p.h @@ -63,12 +63,12 @@ public: QWindowsLocalCodec(); ~QWindowsLocalCodec(); - QString convertToUnicode(const char *, int, ConverterState *) const; - QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const; + QString convertToUnicode(const char *, int, ConverterState *) const override; + QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const override; QString convertToUnicodeCharByChar(const char *chars, int length, ConverterState *state) const; - QByteArray name() const; - int mibEnum() const; + QByteArray name() const override; + int mibEnum() const override; }; |