diff options
author | Thiago Macieira <thiago.macieira@intel.com> | 2024-03-26 09:23:53 -0700 |
---|---|---|
committer | Thiago Macieira <thiago.macieira@intel.com> | 2024-04-08 13:31:21 -0700 |
commit | 394788c68efacdec2676988b4b4ff207b20557f2 (patch) | |
tree | 37f1bb819f235e34e0c14f241211435ae5e1ffd3 | |
parent | 085d5db90dd4c148fe5dea6a3435166f6bdeedac (diff) |
QCborValue: fix sorting of UTF8-to-UTF16 strings
We must compare the raw string length in UTF-8, not the UTF-16 one.
[ChangeLog][QtCore][QCborValue] Fixed a bug that caused certain
non-US-ASCII string comparisons to produce results not in line with the
CBOR specifications.
Pick-to: 6.5 6.6 6.7
Change-Id: I5f663c2f9f4149af84fefffd17c05d1c0f1bbc3a
Reviewed-by: Ivan Solovev <ivan.solovev@qt.io>
Reviewed-by: Ahmad Samir <a.samirh78@gmail.com>
-rw-r--r-- | src/corelib/serialization/qcborvalue.cpp | 14 | ||||
-rw-r--r-- | tests/auto/corelib/serialization/qcborvalue/tst_qcborvalue.cpp | 36 |
2 files changed, 44 insertions, 6 deletions
diff --git a/src/corelib/serialization/qcborvalue.cpp b/src/corelib/serialization/qcborvalue.cpp index 123d62168b..07150e5d8c 100644 --- a/src/corelib/serialization/qcborvalue.cpp +++ b/src/corelib/serialization/qcborvalue.cpp @@ -1190,16 +1190,18 @@ static int compareElementRecursive(const QCborContainerPrivate *c1, const Elemen if (!(e1.flags & Element::StringIsAscii) || !(e2.flags & Element::StringIsAscii)) { // Case 2: one of them is UTF-8 and the other is UTF-16, so lengths - // are NOT comparable. We need to convert to UTF-16 first... + // are NOT comparable. We need to convert to UTF-8 first... // (we can't use QUtf8::compareUtf8 because we need to compare lengths) - auto string = [](const Element &e, const ByteData *b) { - return e.flags & Element::StringIsUtf16 ? b->asQStringRaw() : b->toUtf8String(); + auto string = [](const Element &e, const ByteData *b) -> QByteArray { + if (e.flags & Element::StringIsUtf16) + return b->asStringView().toUtf8(); + return b->asByteArrayView(); // actually a QByteArray::fromRaw }; - QString s1 = string(e1, b1); - QString s2 = string(e2, b2); + QByteArray s1 = string(e1, b1); + QByteArray s2 = string(e2, b2); if (s1.size() == s2.size()) - return s1.compare(s2); + return memcmp(s1.constData(), s2.constData(), s1.size()); return s1.size() < s2.size() ? -1 : 1; } diff --git a/tests/auto/corelib/serialization/qcborvalue/tst_qcborvalue.cpp b/tests/auto/corelib/serialization/qcborvalue/tst_qcborvalue.cpp index 37f1569bbc..2e06251ac4 100644 --- a/tests/auto/corelib/serialization/qcborvalue/tst_qcborvalue.cpp +++ b/tests/auto/corelib/serialization/qcborvalue/tst_qcborvalue.cpp @@ -16,6 +16,8 @@ Q_DECLARE_METATYPE(QCborKnownTags) Q_DECLARE_METATYPE(QCborValue) Q_DECLARE_METATYPE(QCborValue::EncodingOptions) +using namespace Qt::StringLiterals; + class tst_QCborValue : public QObject { Q_OBJECT @@ -1895,6 +1897,40 @@ void tst_QCborValue::sorting() // which shows all doubles sorted after integrals QT_TEST_ALL_COMPARISON_OPS(vint2, vdouble1, Qt::strong_ordering::less); QVERIFY(vint2.toInteger() > vdouble1.toDouble()); + + // Add some non-US-ASCII strings. In the current implementation, QCborValue + // can store a string as either US-ASCII, UTF-8, or UTF-16, so let's exercise + // those comparisons. + + // we don't have a QUtf8StringView constructor, so work around it + auto utf8string = [](QByteArray str) { + Q_ASSERT(str.size() < 24); + str.prepend(char(QCborValue::String) + str.size()); + return QCborValue::fromCbor(str); + }; + // 5 code units in UTF-8 + QCborValue vs4_utf16(u"Mørk"_s); + QCborValue vs4_utf8 = utf8string("Mørk"); + QT_TEST_ALL_COMPARISON_OPS(vs4_utf8, vs4_utf8, Qt::strong_ordering::equal); + QT_TEST_ALL_COMPARISON_OPS(vs4_utf16, vs4_utf16, Qt::strong_ordering::equal); + QT_TEST_ALL_COMPARISON_OPS(vs4_utf16, vs4_utf8, Qt::strong_ordering::equal); + + // 5 code units in UTF-16 + QCborValue vs5_utf16(u"Først"_s); + QCborValue vs5_utf8 = utf8string("Først"); + QT_TEST_ALL_COMPARISON_OPS(vs5_utf8, vs5_utf8, Qt::strong_ordering::equal); + QT_TEST_ALL_COMPARISON_OPS(vs5_utf16, vs5_utf16, Qt::strong_ordering::equal); + QT_TEST_ALL_COMPARISON_OPS(vs5_utf16, vs5_utf8, Qt::strong_ordering::equal); + + // sorted by UTF-8 length first, so "Mørk" < "World" < "Først" (!!) + QT_TEST_ALL_COMPARISON_OPS(vs3, vs4_utf8, Qt::strong_ordering::greater); + QT_TEST_ALL_COMPARISON_OPS(vs3, vs4_utf16, Qt::strong_ordering::greater); + QT_TEST_ALL_COMPARISON_OPS(vs3, vs5_utf8, Qt::strong_ordering::less); + QT_TEST_ALL_COMPARISON_OPS(vs3, vs5_utf16, Qt::strong_ordering::less); + QT_TEST_ALL_COMPARISON_OPS(vs4_utf8, vs5_utf8, Qt::strong_ordering::less); + QT_TEST_ALL_COMPARISON_OPS(vs4_utf8, vs5_utf16, Qt::strong_ordering::less); + QT_TEST_ALL_COMPARISON_OPS(vs4_utf16, vs5_utf8, Qt::strong_ordering::less); + QT_TEST_ALL_COMPARISON_OPS(vs4_utf16, vs5_utf16, Qt::strong_ordering::less); } void tst_QCborValue::comparison_data() |