diff options
-rw-r--r-- | src/corelib/serialization/qcborvalue.cpp | 1 | ||||
-rw-r--r-- | src/corelib/text/qstring.cpp | 55 | ||||
-rw-r--r-- | src/corelib/text/qstringalgorithms.h | 1 | ||||
-rw-r--r-- | src/corelib/text/qstringconverter.cpp | 2 | ||||
-rw-r--r-- | src/corelib/text/qstringconverter_p.h | 2 | ||||
-rw-r--r-- | tests/auto/corelib/text/qstring/tst_qstring.cpp | 30 |
6 files changed, 86 insertions, 5 deletions
diff --git a/src/corelib/serialization/qcborvalue.cpp b/src/corelib/serialization/qcborvalue.cpp index 7e2d8003c7..ea2d092a1f 100644 --- a/src/corelib/serialization/qcborvalue.cpp +++ b/src/corelib/serialization/qcborvalue.cpp @@ -1168,6 +1168,7 @@ static int compareElementRecursive(const QCborContainerPrivate *c1, const Elemen if (!(e1.flags & Element::StringIsAscii) || !(e2.flags & Element::StringIsAscii)) { // Case 2: one of them is UTF-8 and the other is UTF-16, so lengths // are NOT comparable. We need to convert to UTF-16 first... + // (we can't use QUtf8::compareUtf8 because we need to compare lengths) auto string = [](const Element &e, const ByteData *b) { return e.flags & Element::StringIsUtf16 ? b->asQStringRaw() : b->toUtf8String(); }; diff --git a/src/corelib/text/qstring.cpp b/src/corelib/text/qstring.cpp index 82cdeec1a3..f068c5e94a 100644 --- a/src/corelib/text/qstring.cpp +++ b/src/corelib/text/qstring.cpp @@ -1,7 +1,7 @@ /**************************************************************************** ** ** Copyright (C) 2020 The Qt Company Ltd. -** Copyright (C) 2018 Intel Corporation. +** Copyright (C) 2020 Intel Corporation. ** Copyright (C) 2019 Mail.ru Group. ** Contact: https://www.qt.io/licensing/ ** @@ -871,6 +871,35 @@ static int ucstricmp(const QChar *a, const QChar *ae, const char *b, const char return 1; } +// Case-insensitive comparison between a Unicode string and a UTF-8 string +static int ucstricmp8(const char *utf8, const char *utf8end, const QChar *utf16, const QChar *utf16end) +{ + auto src1 = reinterpret_cast<const uchar *>(utf8); + auto end1 = reinterpret_cast<const uchar *>(utf8end); + QStringIterator src2(utf16, utf16end); + + while (src1 < end1 && src2.hasNext()) { + uint uc1; + uint *output = &uc1; + uchar b = *src1++; + int res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, output, src1, end1); + if (res < 0) { + // decoding error + uc1 = QChar::ReplacementCharacter; + } else { + uc1 = QChar::toCaseFolded(uc1); + } + + uint uc2 = QChar::toCaseFolded(src2.next()); + int diff = uc1 - uc2; // can't underflow + if (diff) + return diff; + } + + // the shorter string sorts first + return (end1 > src1) - int(src2.hasNext()); +} + #if defined(__mips_dsp) // From qstring_mips_dsp_asm.S extern "C" int qt_ucstrncmp_mips_dsp_asm(const char16_t *a, @@ -1334,6 +1363,30 @@ int QtPrivate::compareStrings(QLatin1String lhs, QLatin1String rhs, Qt::CaseSens return qt_compare_strings(lhs, rhs, cs); } +/*! + \relates QStringView + \internal + \since 6.0 + \overload + + Returns an integer that compares to 0 as \a lhs compares to \a rhs. + + If \a cs is Qt::CaseSensitive (the default), the comparison is case-sensitive; + otherwise the comparison is case-insensitive. + + Case-sensitive comparison is based exclusively on the numeric values of the + decoded Unicode code points and is very fast, but is not what a human would + expect. Consider sorting user-visible strings with + QString::localeAwareCompare(). +*/ +int QtPrivate::compareStringsUtf8(const char *u8str, qsizetype u8len, QStringView rhs, Qt::CaseSensitivity cs) noexcept +{ + if (cs == Qt::CaseSensitive) + return QUtf8::compareUtf8(u8str, u8len, rhs.data(), rhs.size()); + else + return ucstricmp8(u8str, u8str + u8len, rhs.begin(), rhs.end()); +} + #define REHASH(a) \ if (sl_minus_1 < sizeof(std::size_t) * CHAR_BIT) \ hashHaystack -= std::size_t(a) << sl_minus_1; \ diff --git a/src/corelib/text/qstringalgorithms.h b/src/corelib/text/qstringalgorithms.h index 4a0f7dce9a..c407c54268 100644 --- a/src/corelib/text/qstringalgorithms.h +++ b/src/corelib/text/qstringalgorithms.h @@ -62,6 +62,7 @@ Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QStringV Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QStringView lhs, QLatin1String rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept; Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QLatin1String lhs, QStringView rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept; Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QLatin1String lhs, QLatin1String rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept; +Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStringsUtf8(const char *, qsizetype, QStringView rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept; Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool startsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept; diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp index 8e314ffd34..2111d22b2f 100644 --- a/src/corelib/text/qstringconverter.cpp +++ b/src/corelib/text/qstringconverter.cpp @@ -713,7 +713,7 @@ QUtf8::ValidUtf8Result QUtf8::isValidUtf8(const char *chars, qsizetype len) return { true, isValidAscii }; } -int QUtf8::compareUtf8(const char *utf8, qsizetype u8len, const QChar *utf16, qsizetype u16len) +int QUtf8::compareUtf8(const char *utf8, qsizetype u8len, const QChar *utf16, qsizetype u16len) noexcept { uint uc1, uc2; auto src1 = reinterpret_cast<const uchar *>(utf8); diff --git a/src/corelib/text/qstringconverter_p.h b/src/corelib/text/qstringconverter_p.h index 4e1efd3731..3a99a4191a 100644 --- a/src/corelib/text/qstringconverter_p.h +++ b/src/corelib/text/qstringconverter_p.h @@ -338,7 +338,7 @@ struct QUtf8 bool isValidAscii; }; static ValidUtf8Result isValidUtf8(const char *, qsizetype); - static int compareUtf8(const char *, qsizetype, const QChar *, qsizetype); + static int compareUtf8(const char *, qsizetype, const QChar *, qsizetype) noexcept; static int compareUtf8(const char *, qsizetype, QLatin1String s); }; diff --git a/tests/auto/corelib/text/qstring/tst_qstring.cpp b/tests/auto/corelib/text/qstring/tst_qstring.cpp index 700284f043..321e9ba32e 100644 --- a/tests/auto/corelib/text/qstring/tst_qstring.cpp +++ b/tests/auto/corelib/text/qstring/tst_qstring.cpp @@ -1,7 +1,7 @@ /**************************************************************************** ** -** Copyright (C) 2016 The Qt Company Ltd. -** Copyright (C) 2016 Intel Corporation. +** Copyright (C) 2020 The Qt Company Ltd. +** Copyright (C) 2020 Intel Corporation. ** Contact: https://www.qt.io/licensing/ ** ** This file is part of the test suite of the Qt Toolkit. @@ -6181,6 +6181,23 @@ void tst_QString::compare_data() in2[i] = 'b'; QTest::addRow("all-same-except-char-%d", i) << in1 << in2 << -1 << -1; } + + // some non-US-ASCII comparisons + QChar smallA = u'a'; + QChar smallAWithAcute = u'á'; + QChar capitalAWithAcute = u'Á'; + QChar nbsp = u'\u00a0'; + for (int i = 1; i <= 65; ++i) { + QString padding(i - 1, ' '); + QTest::addRow("ascii-nonascii-%d", i) + << (padding + smallA) << (padding + smallAWithAcute) << -1 << -1; + QTest::addRow("nonascii-nonascii-equal-%d", i) + << (padding + smallAWithAcute) << (padding + smallAWithAcute) << 0 << 0; + QTest::addRow("nonascii-nonascii-caseequal-%d", i) + << (padding + capitalAWithAcute) << (padding + smallAWithAcute) << -1 << 0; + QTest::addRow("nonascii-nonascii-notequal-%d", i) + << (padding + nbsp) << (padding + smallAWithAcute) << -1 << -1; + } } static bool isLatin(const QString &s) @@ -6200,7 +6217,10 @@ void tst_QString::compare() QStringRef r1(&s1, 0, s1.length()); QStringRef r2(&s2, 0, s2.length()); + QByteArray s1_8 = s1.toUtf8(); + QByteArray s2_8 = s2.toUtf8(); + const QStringView v1(s1); const QStringView v2(s2); QCOMPARE(sign(QString::compare(s1, s2)), csr); @@ -6218,6 +6238,10 @@ void tst_QString::compare() QCOMPARE(sign(r1.compare(r2, Qt::CaseInsensitive)), cir); QCOMPARE(sign(s1.compare(v2, Qt::CaseSensitive)), csr); QCOMPARE(sign(s1.compare(v2, Qt::CaseInsensitive)), cir); + QCOMPARE(sign(QtPrivate::compareStringsUtf8(s1_8, s1_8.size(), v2, Qt::CaseSensitive)), csr); + QCOMPARE(sign(QtPrivate::compareStringsUtf8(s1_8, s1_8.size(), v2, Qt::CaseInsensitive)), cir); + QCOMPARE(sign(QtPrivate::compareStringsUtf8(s2_8, s2_8.size(), v1, Qt::CaseSensitive)), -csr); + QCOMPARE(sign(QtPrivate::compareStringsUtf8(s2_8, s2_8.size(), v1, Qt::CaseInsensitive)), -cir); QCOMPARE(sign(QString::compare(s1, s2, Qt::CaseSensitive)), csr); QCOMPARE(sign(QString::compare(s1, s2, Qt::CaseInsensitive)), cir); @@ -6238,6 +6262,7 @@ void tst_QString::compare() } if (isLatin(s2)) { + QVERIFY(QtPrivate::isLatin1(s2)); QCOMPARE(sign(QString::compare(s1, QLatin1String(s2.toLatin1()))), csr); QCOMPARE(sign(QString::compare(s1, QLatin1String(s2.toLatin1()), Qt::CaseInsensitive)), cir); QCOMPARE(sign(QStringRef::compare(r1, QLatin1String(s2.toLatin1()))), csr); @@ -6252,6 +6277,7 @@ void tst_QString::compare() } if (isLatin(s1)) { + QVERIFY(QtPrivate::isLatin1(s1)); QCOMPARE(sign(QString::compare(QLatin1String(s1.toLatin1()), s2)), csr); QCOMPARE(sign(QString::compare(QLatin1String(s1.toLatin1()), s2, Qt::CaseInsensitive)), cir); } |