diff options
author | Thiago Macieira <thiago.macieira@intel.com> | 2022-05-19 10:17:32 -0700 |
---|---|---|
committer | Edward Welbourne <edward.welbourne@qt.io> | 2023-01-27 15:24:22 +0100 |
commit | 5ed684e79e71e5e11159d8df998a4576113981e0 (patch) | |
tree | 4a2f0b89290b04c469c397ae575f979ace1e72aa | |
parent | 3eb6b13c0b30263c6c4d17e1b8c4e5a2cbc73b9e (diff) |
QStringConverter: use the QUtf8 codec when Windows is using UTF-8
The QLocal8Bit implementation assumes that there's at most one
continuation byte -- that is, that all codecs are either Single or
Double Byte Character Sets (SBCS or DBCS). It appears to be the case for
all Windows default codepages, except for CP_UTF8, which is an opt-in
anyway.
Instead of fixing our codec, let's just use the optimized UTF-8
implementation.
[ChangeLog][Windows] Fixed support for using Qt applications with UTF-8
as the system codepage or by enabling that in the application's
manifest.
Discussed-on: https://lists.qt-project.org/pipermail/interest/2022-May/038241.html
Change-Id: I77c8221eb2824c369feffffd16f0912550a98049
Reviewed-by: Lars Knoll <lars.knoll@qt.io>
(cherry picked from commit 9bad4be21482d36bff76357a000e008755b60361)
Reviewed-by: MÃ¥rten Nordheim <marten.nordheim@qt.io>
-rw-r--r-- | src/corelib/text/qstringconverter.cpp | 9 | ||||
-rw-r--r-- | src/corelib/text/qstringconverter_p.h | 27 | ||||
-rw-r--r-- | tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp | 64 |
3 files changed, 82 insertions, 18 deletions
diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp index c419caddbc..802f747f9f 100644 --- a/src/corelib/text/qstringconverter.cpp +++ b/src/corelib/text/qstringconverter.cpp @@ -1204,6 +1204,11 @@ QChar *QUtf32::convertToUnicode(QChar *out, QByteArrayView in, QStringConverter: } #if defined(Q_OS_WIN) && !defined(QT_BOOTSTRAPPED) +int QLocal8Bit::checkUtf8() +{ + return GetACP() == CP_UTF8 ? 1 : -1; +} + static QString convertToUnicodeCharByChar(QByteArrayView in, QStringConverter::State *state) { qsizetype length = in.size(); @@ -1257,7 +1262,7 @@ static QString convertToUnicodeCharByChar(QByteArrayView in, QStringConverter::S } -QString QLocal8Bit::convertToUnicode(QByteArrayView in, QStringConverter::State *state) +QString QLocal8Bit::convertToUnicode_sys(QByteArrayView in, QStringConverter::State *state) { qsizetype length = in.size(); @@ -1345,7 +1350,7 @@ QString QLocal8Bit::convertToUnicode(QByteArrayView in, QStringConverter::State return s; } -QByteArray QLocal8Bit::convertFromUnicode(QStringView in, QStringConverter::State *state) +QByteArray QLocal8Bit::convertFromUnicode_sys(QStringView in, QStringConverter::State *state) { const QChar *ch = in.data(); qsizetype uclen = in.size(); diff --git a/src/corelib/text/qstringconverter_p.h b/src/corelib/text/qstringconverter_p.h index da309e1f34..682e5c2179 100644 --- a/src/corelib/text/qstringconverter_p.h +++ b/src/corelib/text/qstringconverter_p.h @@ -368,8 +368,31 @@ struct Q_CORE_EXPORT QLocal8Bit static QByteArray convertFromUnicode(QStringView in, QStringConverter::State *state) { return QUtf8::convertFromUnicode(in, state); } #else - static QString convertToUnicode(QByteArrayView, QStringConverter::State *); - static QByteArray convertFromUnicode(QStringView, QStringConverter::State *); + static int checkUtf8(); + static bool isUtf8() + { + static QBasicAtomicInteger<qint8> result = { 0 }; + int r = result.loadRelaxed(); + if (r == 0) { + r = checkUtf8(); + result.storeRelaxed(r); + } + return r > 0; + } + static QString convertToUnicode_sys(QByteArrayView, QStringConverter::State *); + static QString convertToUnicode(QByteArrayView in, QStringConverter::State *state) + { + if (isUtf8()) + return QUtf8::convertToUnicode(in, state); + return convertToUnicode_sys(in, state); + } + static QByteArray convertFromUnicode_sys(QStringView, QStringConverter::State *); + static QByteArray convertFromUnicode(QStringView in, QStringConverter::State *state) + { + if (isUtf8()) + return QUtf8::convertFromUnicode(in, state); + return convertFromUnicode_sys(in, state); + } #endif }; diff --git a/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp b/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp index 23d8fd429c..198fb59dfb 100644 --- a/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp +++ b/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp @@ -32,11 +32,52 @@ #include <qstringconverter.h> #include <qthreadpool.h> +#include <array> + +enum CodecLimitation { + AsciiOnly, + Latin1Only, + FullUnicode +}; + +#ifdef Q_OS_WIN +# include <qt_windows.h> +static bool localeIsUtf8() +{ + return GetACP() == CP_UTF8; +} +#else +static constexpr bool localeIsUtf8() +{ + return true; +} +#endif + +struct Codec +{ + const char name[12]; + QStringConverter::Encoding code; + CodecLimitation limitation = FullUnicode; +}; +static const std::array codes = { + Codec{ "UTF-8", QStringConverter::Utf8 }, + Codec{ "UTF-16", QStringConverter::Utf16 }, + Codec{ "UTF-16-le", QStringConverter::Utf16LE }, + Codec{ "UTF-16-be", QStringConverter::Utf16BE }, + Codec{ "UTF-32", QStringConverter::Utf32 }, + Codec{ "UTF-32-le", QStringConverter::Utf32LE }, + Codec{ "UTF-32-be", QStringConverter::Utf32BE }, + Codec{ "Latin-1", QStringConverter::Latin1, Latin1Only }, + Codec{ "System", QStringConverter::System, localeIsUtf8() ? FullUnicode : AsciiOnly } +}; + class tst_QStringConverter : public QObject { Q_OBJECT private slots: + void initTestCase(); + void threadSafety(); void constructByName(); @@ -155,24 +196,11 @@ void tst_QStringConverter::roundtrip_data() QTest::addColumn<QString>("utf16"); QTest::addColumn<QStringConverter::Encoding>("code"); - const struct { - QStringConverter::Encoding code; - const char *name; - } codes[] = { - { QStringConverter::Utf8, "UTF-8" }, - { QStringConverter::Utf16, "UTF-16" }, - { QStringConverter::Utf16LE, "UTF-16-le" }, - { QStringConverter::Utf16BE, "UTF-16-be" }, - { QStringConverter::Utf32, "UTF-32" }, - { QStringConverter::Utf32LE, "UTF-32-le" }, - { QStringConverter::Utf32BE, "UTF-32-be" }, - // Latin1, System: not guaranteed to be able to represent arbitrary Unicode. - }; // TODO: include flag variations, too. for (const auto code : codes) { QTest::addRow("empty-%s", code.name) << u""_qs << code.code; - { + if (code.limitation == FullUnicode) { const char32_t zeroVal = 0x11136; // Unicode's representation of Chakma zero const QChar data[] = { QChar::highSurrogate(zeroVal), QChar::lowSurrogate(zeroVal), @@ -1893,6 +1921,14 @@ public: } }; +void tst_QStringConverter::initTestCase() +{ + if (localeIsUtf8()) + qInfo("System locale is UTF-8"); + else + qInfo("System locale is not UTF-8"); +} + void tst_QStringConverter::threadSafety() { QThreadPool::globalInstance()->setMaxThreadCount(12); |