From 9dd8e655cdd26eeaae30645b7fe013d9a696547f Mon Sep 17 00:00:00 2001 From: Edward Welbourne Date: Fri, 29 May 2020 13:12:28 +0200 Subject: Limit QByteArray's 8-bit support to ASCII Previously it handled Latin-1, which made it incompatible with UTF-8, which is now our preferred 8-bit encoding. For Qt6 it is limited to ASCII. Adjusted tests to match. QLatin1String::compare() turned out to be relying on qstrnicmp()'s Latin-1 handling. Removed some spurious Q_UNLIKELY()s and tidied up code a little in the process. [ChangeLog][QtCore][Important Behavior Changes] Encoding-dependent features of QByteArrray are now limited to ASCII, where previously they worked for the whole of Latin-1. This affects case-insensitive comparison, notably including qstricmp() and qstrnicmp(), and case-transforming functions. Fixes: QTBUG-84323 Change-Id: I2925d9908f8654599195a2860847b17083911b41 Reviewed-by: Lars Knoll Reviewed-by: Qt CI Bot --- .../corelib/text/qbytearray/tst_qbytearray.cpp | 44 ++++++++++++---------- .../qstringapisymmetry/tst_qstringapisymmetry.cpp | 23 ++++++----- 2 files changed, 37 insertions(+), 30 deletions(-) (limited to 'tests/auto/corelib/text') diff --git a/tests/auto/corelib/text/qbytearray/tst_qbytearray.cpp b/tests/auto/corelib/text/qbytearray/tst_qbytearray.cpp index e90f4ff14e..16e28cc1d6 100644 --- a/tests/auto/corelib/text/qbytearray/tst_qbytearray.cpp +++ b/tests/auto/corelib/text/qbytearray/tst_qbytearray.cpp @@ -176,9 +176,9 @@ QByteArray verifyZeroTermination(const QByteArray &ba) int baSize = ba.size(); char baTerminator = ba.constData()[baSize]; if ('\0' != baTerminator) - return QString::fromLatin1( - "*** Result ('%1') not null-terminated: 0x%2 ***").arg(QString::fromLatin1(ba)) - .arg(baTerminator, 2, 16, QChar('0')).toLatin1(); + return QString::fromUtf8( + "*** Result ('%1') not null-terminated: 0x%2 ***").arg(QString::fromUtf8(ba)) + .arg(baTerminator, 2, 16, QChar('0')).toUtf8(); // Skip mutating checks on shared strings if (baDataPtr->isShared()) @@ -934,30 +934,30 @@ void tst_QByteArray::qstricmp() QFETCH(QString, str1); QFETCH(QString, str2); - int expected = strcmp(str1.toUpper().toLatin1(), - str2.toUpper().toLatin1()); + int expected = strcmp(str1.toUpper().toUtf8(), + str2.toUpper().toUtf8()); if ( expected != 0 ) { expected = (expected < 0 ? -1 : 1); } - int actual = ::qstricmp(str1.toLatin1(), str2.toLatin1()); + int actual = ::qstricmp(str1.toUtf8(), str2.toUtf8()); if ( actual != 0 ) { actual = (actual < 0 ? -1 : 1); } QCOMPARE(actual, expected); - actual = ::qstricmp("012345679abcd" + str1.toLatin1(), "012345679AbCd" + str2.toLatin1()); + actual = ::qstricmp("012345679abcd" + str1.toUtf8(), "012345679AbCd" + str2.toUtf8()); if ( actual != 0 ) { actual = (actual < 0 ? -1 : 1); } QCOMPARE(actual, expected); - actual = str1.toLatin1().compare(str2.toLatin1(), Qt::CaseInsensitive); + actual = str1.toUtf8().compare(str2.toUtf8(), Qt::CaseInsensitive); if ( actual != 0 ) { actual = (actual < 0 ? -1 : 1); } QCOMPARE(actual, expected); - actual = str1.toLatin1().compare(str2.toLatin1().constData(), Qt::CaseInsensitive); + actual = str1.toUtf8().compare(str2.toUtf8().constData(), Qt::CaseInsensitive); if ( actual != 0 ) { actual = (actual < 0 ? -1 : 1); } @@ -1468,7 +1468,7 @@ void tst_QByteArray::toULong_data() QTest::addColumn("ok"); ulong LongMaxPlusOne = (ulong)LONG_MAX + 1; - QTest::newRow("LONG_MAX+1") << QString::number(LongMaxPlusOne).toLatin1() << 10 << LongMaxPlusOne << true; + QTest::newRow("LONG_MAX+1") << QString::number(LongMaxPlusOne).toUtf8() << 10 << LongMaxPlusOne << true; QTest::newRow("default") << QByteArray() << 10 << 0UL << false; QTest::newRow("empty") << QByteArray("") << 10 << 0UL << false; QTest::newRow("ulong1") << QByteArray("3234567890") << 10 << 3234567890UL << true; @@ -1990,7 +1990,7 @@ void tst_QByteArray::compareCharStar() const bool isEqual = result == 0; const bool isLess = result < 0; const bool isGreater = result > 0; - QByteArray qba = string2.toLatin1(); + QByteArray qba = string2.toUtf8(); const char *str2 = qba.constData(); if (string2.isNull()) str2 = 0; @@ -2297,6 +2297,14 @@ void tst_QByteArray::toUpperLower_data() QTest::addColumn("upper"); QTest::addColumn("lower"); + { + QByteArray nonAscii(128, Qt::Uninitialized); + char *data = nonAscii.data(); + for (unsigned char i = 0; i < 128; ++i) + data[i] = i + 128; + QTest::newRow("non-ASCII") << nonAscii << nonAscii << nonAscii; + } + QTest::newRow("empty") << QByteArray() << QByteArray() << QByteArray(); QTest::newRow("literal") << QByteArrayLiteral("Hello World") << QByteArrayLiteral("HELLO WORLD") @@ -2304,9 +2312,6 @@ void tst_QByteArray::toUpperLower_data() QTest::newRow("ascii") << QByteArray("Hello World, this is a STRING") << QByteArray("HELLO WORLD, THIS IS A STRING") << QByteArray("hello world, this is a string"); - QTest::newRow("latin1") << QByteArray("R\311sum\351") - << QByteArray("R\311SUM\311") - << QByteArray("r\351sum\351"); QTest::newRow("nul") << QByteArray("a\0B", 3) << QByteArray("A\0B", 3) << QByteArray("a\0b", 3); } @@ -2350,9 +2355,9 @@ void tst_QByteArray::isUpper() QVERIFY(!QByteArray().isUpper()); QVERIFY(!QByteArray("").isUpper()); QVERIFY(QByteArray("TEXT").isUpper()); - QVERIFY(QByteArray("\xD0\xDE").isUpper()); - QVERIFY(!QByteArray("\xD7").isUpper()); // multiplication sign is not upper - QVERIFY(!QByteArray("\xDF").isUpper()); // sz ligature is not upper + QVERIFY(!QByteArray("\xD0\xDE").isUpper()); // non-ASCII is neither upper nor lower + QVERIFY(!QByteArray("\xD7").isUpper()); + QVERIFY(!QByteArray("\xDF").isUpper()); QVERIFY(!QByteArray("text").isUpper()); QVERIFY(!QByteArray("Text").isUpper()); QVERIFY(!QByteArray("tExt").isUpper()); @@ -2373,8 +2378,8 @@ void tst_QByteArray::isLower() QVERIFY(!QByteArray().isLower()); QVERIFY(!QByteArray("").isLower()); QVERIFY(QByteArray("text").isLower()); - QVERIFY(QByteArray("\xE0\xFF").isLower()); - QVERIFY(!QByteArray("\xF7").isLower()); // division sign is not lower + QVERIFY(!QByteArray("\xE0\xFF").isLower()); // non-ASCII is neither upper nor lower + QVERIFY(!QByteArray("\xF7").isLower()); QVERIFY(!QByteArray("Text").isLower()); QVERIFY(!QByteArray("tExt").isLower()); QVERIFY(!QByteArray("teXt").isLower()); @@ -2416,7 +2421,6 @@ void tst_QByteArray::stdString() QVERIFY(l1str.length() < utf8str.length()); } - const char globalChar = '1'; QTEST_MAIN(tst_QByteArray) diff --git a/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp b/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp index 37cc7db841..f3a7e93be2 100644 --- a/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp +++ b/tests/auto/corelib/text/qstringapisymmetry/tst_qstringapisymmetry.cpp @@ -972,7 +972,7 @@ void tst_QStringApiSymmetry::compare_data(bool hasConceptOfNullAndEmpty) << 0 << 0; } -#define ROW(lhs, rhs) \ +#define ROW(lhs, rhs, caseless) \ do { \ static const QString pinned[] = { \ QString(QLatin1String(lhs)), \ @@ -981,16 +981,19 @@ void tst_QStringApiSymmetry::compare_data(bool hasConceptOfNullAndEmpty) QTest::newRow(qUtf8Printable(QLatin1String("'" lhs "' <> '" rhs "': "))) \ << QStringRef(&pinned[0]) << QLatin1String(lhs) \ << QStringRef(&pinned[1]) << QLatin1String(rhs) \ - << sign(qstrcmp(lhs, rhs)) << sign(qstricmp(lhs, rhs)); \ + << sign(qstrcmp(lhs, rhs)) << caseless; \ } while (false) - ROW("", "0"); - ROW("0", ""); - ROW("0", "1"); - ROW("0", "0"); - ROW("10", "0"); - ROW("01", "1"); - ROW("\xE4", "\xE4"); // ä <> ä - ROW("\xE4", "\xC4"); // ä <> Ä +#define ASCIIROW(lhs, rhs) ROW(lhs, rhs, sign(qstricmp(lhs, rhs))) + ASCIIROW("", "0"); + ASCIIROW("0", ""); + ASCIIROW("0", "1"); + ASCIIROW("0", "0"); + ASCIIROW("10", "0"); + ASCIIROW("01", "1"); + ASCIIROW("e", "e"); + ASCIIROW("e", "E"); + ROW("\xE4", "\xE4", 0); // ä <> ä + ROW("\xE4", "\xC4", 0); // ä <> Ä #undef ROW } -- cgit v1.2.3