summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/corelib/serialization/qcborvalue.cpp1
-rw-r--r--src/corelib/text/qstring.cpp55
-rw-r--r--src/corelib/text/qstringalgorithms.h1
-rw-r--r--src/corelib/text/qstringconverter.cpp2
-rw-r--r--src/corelib/text/qstringconverter_p.h2
-rw-r--r--tests/auto/corelib/text/qstring/tst_qstring.cpp30
6 files changed, 86 insertions, 5 deletions
diff --git a/src/corelib/serialization/qcborvalue.cpp b/src/corelib/serialization/qcborvalue.cpp
index 7e2d8003c7..ea2d092a1f 100644
--- a/src/corelib/serialization/qcborvalue.cpp
+++ b/src/corelib/serialization/qcborvalue.cpp
@@ -1168,6 +1168,7 @@ static int compareElementRecursive(const QCborContainerPrivate *c1, const Elemen
if (!(e1.flags & Element::StringIsAscii) || !(e2.flags & Element::StringIsAscii)) {
// Case 2: one of them is UTF-8 and the other is UTF-16, so lengths
// are NOT comparable. We need to convert to UTF-16 first...
+ // (we can't use QUtf8::compareUtf8 because we need to compare lengths)
auto string = [](const Element &e, const ByteData *b) {
return e.flags & Element::StringIsUtf16 ? b->asQStringRaw() : b->toUtf8String();
};
diff --git a/src/corelib/text/qstring.cpp b/src/corelib/text/qstring.cpp
index 82cdeec1a3..f068c5e94a 100644
--- a/src/corelib/text/qstring.cpp
+++ b/src/corelib/text/qstring.cpp
@@ -1,7 +1,7 @@
/****************************************************************************
**
** Copyright (C) 2020 The Qt Company Ltd.
-** Copyright (C) 2018 Intel Corporation.
+** Copyright (C) 2020 Intel Corporation.
** Copyright (C) 2019 Mail.ru Group.
** Contact: https://www.qt.io/licensing/
**
@@ -871,6 +871,35 @@ static int ucstricmp(const QChar *a, const QChar *ae, const char *b, const char
return 1;
}
+// Case-insensitive comparison between a Unicode string and a UTF-8 string
+static int ucstricmp8(const char *utf8, const char *utf8end, const QChar *utf16, const QChar *utf16end)
+{
+ auto src1 = reinterpret_cast<const uchar *>(utf8);
+ auto end1 = reinterpret_cast<const uchar *>(utf8end);
+ QStringIterator src2(utf16, utf16end);
+
+ while (src1 < end1 && src2.hasNext()) {
+ uint uc1;
+ uint *output = &uc1;
+ uchar b = *src1++;
+ int res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, output, src1, end1);
+ if (res < 0) {
+ // decoding error
+ uc1 = QChar::ReplacementCharacter;
+ } else {
+ uc1 = QChar::toCaseFolded(uc1);
+ }
+
+ uint uc2 = QChar::toCaseFolded(src2.next());
+ int diff = uc1 - uc2; // can't underflow
+ if (diff)
+ return diff;
+ }
+
+ // the shorter string sorts first
+ return (end1 > src1) - int(src2.hasNext());
+}
+
#if defined(__mips_dsp)
// From qstring_mips_dsp_asm.S
extern "C" int qt_ucstrncmp_mips_dsp_asm(const char16_t *a,
@@ -1334,6 +1363,30 @@ int QtPrivate::compareStrings(QLatin1String lhs, QLatin1String rhs, Qt::CaseSens
return qt_compare_strings(lhs, rhs, cs);
}
+/*!
+ \relates QStringView
+ \internal
+ \since 6.0
+ \overload
+
+ Returns an integer that compares to 0 as \a lhs compares to \a rhs.
+
+ If \a cs is Qt::CaseSensitive (the default), the comparison is case-sensitive;
+ otherwise the comparison is case-insensitive.
+
+ Case-sensitive comparison is based exclusively on the numeric values of the
+ decoded Unicode code points and is very fast, but is not what a human would
+ expect. Consider sorting user-visible strings with
+ QString::localeAwareCompare().
+*/
+int QtPrivate::compareStringsUtf8(const char *u8str, qsizetype u8len, QStringView rhs, Qt::CaseSensitivity cs) noexcept
+{
+ if (cs == Qt::CaseSensitive)
+ return QUtf8::compareUtf8(u8str, u8len, rhs.data(), rhs.size());
+ else
+ return ucstricmp8(u8str, u8str + u8len, rhs.begin(), rhs.end());
+}
+
#define REHASH(a) \
if (sl_minus_1 < sizeof(std::size_t) * CHAR_BIT) \
hashHaystack -= std::size_t(a) << sl_minus_1; \
diff --git a/src/corelib/text/qstringalgorithms.h b/src/corelib/text/qstringalgorithms.h
index 4a0f7dce9a..c407c54268 100644
--- a/src/corelib/text/qstringalgorithms.h
+++ b/src/corelib/text/qstringalgorithms.h
@@ -62,6 +62,7 @@ Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QStringV
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QStringView lhs, QLatin1String rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QLatin1String lhs, QStringView rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QLatin1String lhs, QLatin1String rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
+Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStringsUtf8(const char *, qsizetype, QStringView rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool startsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp
index 8e314ffd34..2111d22b2f 100644
--- a/src/corelib/text/qstringconverter.cpp
+++ b/src/corelib/text/qstringconverter.cpp
@@ -713,7 +713,7 @@ QUtf8::ValidUtf8Result QUtf8::isValidUtf8(const char *chars, qsizetype len)
return { true, isValidAscii };
}
-int QUtf8::compareUtf8(const char *utf8, qsizetype u8len, const QChar *utf16, qsizetype u16len)
+int QUtf8::compareUtf8(const char *utf8, qsizetype u8len, const QChar *utf16, qsizetype u16len) noexcept
{
uint uc1, uc2;
auto src1 = reinterpret_cast<const uchar *>(utf8);
diff --git a/src/corelib/text/qstringconverter_p.h b/src/corelib/text/qstringconverter_p.h
index 4e1efd3731..3a99a4191a 100644
--- a/src/corelib/text/qstringconverter_p.h
+++ b/src/corelib/text/qstringconverter_p.h
@@ -338,7 +338,7 @@ struct QUtf8
bool isValidAscii;
};
static ValidUtf8Result isValidUtf8(const char *, qsizetype);
- static int compareUtf8(const char *, qsizetype, const QChar *, qsizetype);
+ static int compareUtf8(const char *, qsizetype, const QChar *, qsizetype) noexcept;
static int compareUtf8(const char *, qsizetype, QLatin1String s);
};
diff --git a/tests/auto/corelib/text/qstring/tst_qstring.cpp b/tests/auto/corelib/text/qstring/tst_qstring.cpp
index 700284f043..321e9ba32e 100644
--- a/tests/auto/corelib/text/qstring/tst_qstring.cpp
+++ b/tests/auto/corelib/text/qstring/tst_qstring.cpp
@@ -1,7 +1,7 @@
/****************************************************************************
**
-** Copyright (C) 2016 The Qt Company Ltd.
-** Copyright (C) 2016 Intel Corporation.
+** Copyright (C) 2020 The Qt Company Ltd.
+** Copyright (C) 2020 Intel Corporation.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the test suite of the Qt Toolkit.
@@ -6181,6 +6181,23 @@ void tst_QString::compare_data()
in2[i] = 'b';
QTest::addRow("all-same-except-char-%d", i) << in1 << in2 << -1 << -1;
}
+
+ // some non-US-ASCII comparisons
+ QChar smallA = u'a';
+ QChar smallAWithAcute = u'á';
+ QChar capitalAWithAcute = u'Á';
+ QChar nbsp = u'\u00a0';
+ for (int i = 1; i <= 65; ++i) {
+ QString padding(i - 1, ' ');
+ QTest::addRow("ascii-nonascii-%d", i)
+ << (padding + smallA) << (padding + smallAWithAcute) << -1 << -1;
+ QTest::addRow("nonascii-nonascii-equal-%d", i)
+ << (padding + smallAWithAcute) << (padding + smallAWithAcute) << 0 << 0;
+ QTest::addRow("nonascii-nonascii-caseequal-%d", i)
+ << (padding + capitalAWithAcute) << (padding + smallAWithAcute) << -1 << 0;
+ QTest::addRow("nonascii-nonascii-notequal-%d", i)
+ << (padding + nbsp) << (padding + smallAWithAcute) << -1 << -1;
+ }
}
static bool isLatin(const QString &s)
@@ -6200,7 +6217,10 @@ void tst_QString::compare()
QStringRef r1(&s1, 0, s1.length());
QStringRef r2(&s2, 0, s2.length());
+ QByteArray s1_8 = s1.toUtf8();
+ QByteArray s2_8 = s2.toUtf8();
+ const QStringView v1(s1);
const QStringView v2(s2);
QCOMPARE(sign(QString::compare(s1, s2)), csr);
@@ -6218,6 +6238,10 @@ void tst_QString::compare()
QCOMPARE(sign(r1.compare(r2, Qt::CaseInsensitive)), cir);
QCOMPARE(sign(s1.compare(v2, Qt::CaseSensitive)), csr);
QCOMPARE(sign(s1.compare(v2, Qt::CaseInsensitive)), cir);
+ QCOMPARE(sign(QtPrivate::compareStringsUtf8(s1_8, s1_8.size(), v2, Qt::CaseSensitive)), csr);
+ QCOMPARE(sign(QtPrivate::compareStringsUtf8(s1_8, s1_8.size(), v2, Qt::CaseInsensitive)), cir);
+ QCOMPARE(sign(QtPrivate::compareStringsUtf8(s2_8, s2_8.size(), v1, Qt::CaseSensitive)), -csr);
+ QCOMPARE(sign(QtPrivate::compareStringsUtf8(s2_8, s2_8.size(), v1, Qt::CaseInsensitive)), -cir);
QCOMPARE(sign(QString::compare(s1, s2, Qt::CaseSensitive)), csr);
QCOMPARE(sign(QString::compare(s1, s2, Qt::CaseInsensitive)), cir);
@@ -6238,6 +6262,7 @@ void tst_QString::compare()
}
if (isLatin(s2)) {
+ QVERIFY(QtPrivate::isLatin1(s2));
QCOMPARE(sign(QString::compare(s1, QLatin1String(s2.toLatin1()))), csr);
QCOMPARE(sign(QString::compare(s1, QLatin1String(s2.toLatin1()), Qt::CaseInsensitive)), cir);
QCOMPARE(sign(QStringRef::compare(r1, QLatin1String(s2.toLatin1()))), csr);
@@ -6252,6 +6277,7 @@ void tst_QString::compare()
}
if (isLatin(s1)) {
+ QVERIFY(QtPrivate::isLatin1(s1));
QCOMPARE(sign(QString::compare(QLatin1String(s1.toLatin1()), s2)), csr);
QCOMPARE(sign(QString::compare(QLatin1String(s1.toLatin1()), s2, Qt::CaseInsensitive)), cir);
}