summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Hausmann <simon.hausmann@qt.io>2018-06-29 16:09:16 +0200
committerSimon Hausmann <simon.hausmann@qt.io>2018-07-06 20:49:29 +0000
commit42cc42acae8b4b5d8aa79e0d4f079b4322588ce7 (patch)
tree39f7ed2684689fd32432c1a38580436910184c80
parent13337096f9c2090b97e839102cefaacfaafdd870 (diff)
Fix QString::localeAwareCompare with composed/decomposed strings on macOS
Similar to commit cd64a96b31f57e522ab8d29c8357acf384012ebe we also need to normalize the strings before comparison in order to be compliant with the ECMAScript test suite. This patch also adds the remaining test cases from built-ins/String/prototype/localeCompare/15.5.4.9_CE. Since the same tests are also failing with strcoll/qt_compare_strings, this simplifies the code to always normalize except when using ICU (which gets it right by default). Change-Id: I16b32da7fc70dc7e6725c49f66fe9941d0bf3a47 Reviewed-by: Thiago Macieira <thiago.macieira@intel.com> Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
-rw-r--r--src/corelib/tools/qstring.cpp21
-rw-r--r--tests/auto/corelib/tools/qstring/tst_qstring.cpp37
2 files changed, 44 insertions, 14 deletions
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp
index 650c3bdb32..da4066a1e3 100644
--- a/src/corelib/tools/qstring.cpp
+++ b/src/corelib/tools/qstring.cpp
@@ -6392,10 +6392,11 @@ int QString::localeAwareCompare_helper(const QChar *data1, int length1,
return qt_compare_strings(QStringView(data1, length1), QStringView(data2, length2),
Qt::CaseSensitive);
+#if !QT_CONFIG(icu)
+ const QString lhs = QString::fromRawData(data1, length1).normalized(QString::NormalizationForm_C);
+ const QString rhs = QString::fromRawData(data2, length2).normalized(QString::NormalizationForm_C);
+#endif
#if defined(Q_OS_WIN)
- QString lhs = QString::fromRawData(data1, length1).normalized(QString::NormalizationForm_C);
- QString rhs = QString::fromRawData(data2, length2).normalized(QString::NormalizationForm_C);
-
int res = CompareStringEx(LOCALE_NAME_USER_DEFAULT, 0, (LPWSTR)lhs.constData(), lhs.length(), (LPWSTR)rhs.constData(), rhs.length(), NULL, NULL, 0);
switch (res) {
@@ -6406,17 +6407,17 @@ int QString::localeAwareCompare_helper(const QChar *data1, int length1,
default:
return 0;
}
-#elif defined (Q_OS_MAC)
+#elif defined (Q_OS_DARWIN)
// Use CFStringCompare for comparing strings on Mac. This makes Qt order
// strings the same way as native applications do, and also respects
// the "Order for sorted lists" setting in the International preferences
// panel.
const CFStringRef thisString =
CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault,
- reinterpret_cast<const UniChar *>(data1), length1, kCFAllocatorNull);
+ reinterpret_cast<const UniChar *>(lhs.constData()), lhs.length(), kCFAllocatorNull);
const CFStringRef otherString =
CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault,
- reinterpret_cast<const UniChar *>(data2), length2, kCFAllocatorNull);
+ reinterpret_cast<const UniChar *>(rhs.constData()), rhs.length(), kCFAllocatorNull);
const int result = CFStringCompare(thisString, otherString, kCFCompareLocalized);
CFRelease(thisString);
@@ -6428,14 +6429,12 @@ int QString::localeAwareCompare_helper(const QChar *data1, int length1,
return defaultCollator()->localData().compare(data1, length1, data2, length2);
#elif defined(Q_OS_UNIX)
// declared in <string.h>
- int delta = strcoll(toLocal8Bit_helper(data1, length1).constData(), toLocal8Bit_helper(data2, length2).constData());
+ int delta = strcoll(lhs.toLocal8Bit().constData(), rhs.toLocal8Bit().constData());
if (delta == 0)
- delta = qt_compare_strings(QStringView(data1, length1), QStringView(data2, length2),
- Qt::CaseSensitive);
+ delta = qt_compare_strings(lhs, rhs, Qt::CaseSensitive);
return delta;
#else
- return qt_compare_strings(QStringView(data1, length1), QStringView(data2, length2),
- Qt::CaseSensitive);
+ return qt_compare_strings(lhs, rhs, Qt::CaseSensitive);
#endif
}
diff --git a/tests/auto/corelib/tools/qstring/tst_qstring.cpp b/tests/auto/corelib/tools/qstring/tst_qstring.cpp
index cdabd51d43..e6dfe81ca9 100644
--- a/tests/auto/corelib/tools/qstring/tst_qstring.cpp
+++ b/tests/auto/corelib/tools/qstring/tst_qstring.cpp
@@ -5517,9 +5517,40 @@ void tst_QString::localeAwareCompare_data()
// Compare decomposed and composed form
{
- // From ES6 test262 test suite (built-ins/String/prototype/localeCompare/15.5.4.9_CE.js), addressing from Unicode 5.0, chapter 3.12. Boils
- // down to this one-liner: console.log("\u1111\u1171\u11B6".localeCompare("\ud4db")
- QTest::newRow("normalize") << QString() << QString::fromUtf8("\xED\x93\x9B") << QString::fromUtf8("\xE1\x84\x91\xE1\x85\xB1\xE1\x86\xB6") << 0;
+ // From ES6 test262 test suite (built-ins/String/prototype/localeCompare/15.5.4.9_CE.js). The test cases boil down to code like this:
+ // console.log("\u1111\u1171\u11B6".localeCompare("\ud4db")
+
+ // example from Unicode 5.0, section 3.7, definition D70
+ QTest::newRow("normalize1") << QString() << QString::fromUtf8("o\xCC\x88") << QString::fromUtf8("\xC3\xB6") << 0;
+ // examples from Unicode 5.0, chapter 3.11
+ QTest::newRow("normalize2") << QString() << QString::fromUtf8("\xC3\xA4\xCC\xA3") << QString::fromUtf8("a\xCC\xA3\xCC\x88") << 0;
+ QTest::newRow("normalize3") << QString() << QString::fromUtf8("a\xCC\x88\xCC\xA3") << QString::fromUtf8("a\xCC\xA3\xCC\x88") << 0;
+ QTest::newRow("normalize4") << QString() << QString::fromUtf8("\xE1\xBA\xA1\xCC\x88") << QString::fromUtf8("a\xCC\xA3\xCC\x88") << 0;
+ QTest::newRow("normalize5") << QString() << QString::fromUtf8("\xC3\xA4\xCC\x86") << QString::fromUtf8("a\xCC\x88\xCC\x86") << 0;
+ QTest::newRow("normalize6") << QString() << QString::fromUtf8("\xC4\x83\xCC\x88") << QString::fromUtf8("a\xCC\x86\xCC\x88") << 0;
+ // example from Unicode 5.0, chapter 3.12
+ QTest::newRow("normalize7") << QString() << QString::fromUtf8("\xE1\x84\x91\xE1\x85\xB1\xE1\x86\xB6") << QString::fromUtf8("\xED\x93\x9B") << 0;
+ // examples from UTS 10, Unicode Collation Algorithm
+ QTest::newRow("normalize8") << QString() << QString::fromUtf8("\xE2\x84\xAB") << QString::fromUtf8("\xC3\x85") << 0;
+ QTest::newRow("normalize9") << QString() << QString::fromUtf8("\xE2\x84\xAB") << QString::fromUtf8("A\xCC\x8A") << 0;
+ QTest::newRow("normalize10") << QString() << QString::fromUtf8("x\xCC\x9B\xCC\xA3") << QString::fromUtf8("x\xCC\xA3\xCC\x9B") << 0;
+ QTest::newRow("normalize11") << QString() << QString::fromUtf8("\xE1\xBB\xB1") << QString::fromUtf8("\xE1\xBB\xA5\xCC\x9B") << 0;
+ QTest::newRow("normalize12") << QString() << QString::fromUtf8("\xE1\xBB\xB1") << QString::fromUtf8("u\xCC\x9B\xCC\xA3") << 0;
+ QTest::newRow("normalize13") << QString() << QString::fromUtf8("\xE1\xBB\xB1") << QString::fromUtf8("\xC6\xB0\xCC\xA3") << 0;
+ QTest::newRow("normalize14") << QString() << QString::fromUtf8("\xE1\xBB\xB1") << QString::fromUtf8("u\xCC\xA3\xCC\x9B") << 0;
+ // examples from UAX 15, Unicode Normalization Forms
+ QTest::newRow("normalize15") << QString() << QString::fromUtf8("\xC3\x87") << QString::fromUtf8("C\xCC\xA7") << 0;
+ QTest::newRow("normalize16") << QString() << QString::fromUtf8("q\xCC\x87\xCC\xA3") << QString::fromUtf8("q\xCC\xA3\xCC\x87") << 0;
+ QTest::newRow("normalize17") << QString() << QString::fromUtf8("\xEA\xB0\x80") << QString::fromUtf8("\xE1\x84\x80\xE1\x85\xA1") << 0;
+ QTest::newRow("normalize18") << QString() << QString::fromUtf8("\xE2\x84\xAB") << QString::fromUtf8("A\xCC\x8A") << 0;
+ QTest::newRow("normalize19") << QString() << QString::fromUtf8("\xE2\x84\xA6") << QString::fromUtf8("\xCE\xA9") << 0;
+ QTest::newRow("normalize20") << QString() << QString::fromUtf8("\xC3\x85") << QString::fromUtf8("A\xCC\x8A") << 0;
+ QTest::newRow("normalize21") << QString() << QString::fromUtf8("\xC3\xB4") << QString::fromUtf8("o\xCC\x82") << 0;
+ QTest::newRow("normalize22") << QString() << QString::fromUtf8("\xE1\xB9\xA9") << QString::fromUtf8("s\xCC\xA3\xCC\x87") << 0;
+ QTest::newRow("normalize23") << QString() << QString::fromUtf8("\xE1\xB8\x8B\xCC\xA3") << QString::fromUtf8("d\xCC\xA3\xCC\x87") << 0;
+ QTest::newRow("normalize24") << QString() << QString::fromUtf8("\xE1\xB8\x8B\xCC\xA3") << QString::fromUtf8("\xE1\xB8\x8D\xCC\x87") << 0;
+ QTest::newRow("normalize25") << QString() << QString::fromUtf8("q\xCC\x87\xCC\xA3") << QString::fromUtf8("q\xCC\xA3\xCC\x87") << 0;
+
}
#if !defined(Q_OS_WIN)