diff options
-rw-r--r-- | src/corelib/tools/qcollator_icu.cpp | 11 | ||||
-rw-r--r-- | src/corelib/tools/qcollator_macx.cpp | 9 | ||||
-rw-r--r-- | tests/auto/corelib/tools/qcollator/tst_qcollator.cpp | 90 |
3 files changed, 105 insertions, 5 deletions
diff --git a/src/corelib/tools/qcollator_icu.cpp b/src/corelib/tools/qcollator_icu.cpp index 407a493d25..23e88b5015 100644 --- a/src/corelib/tools/qcollator_icu.cpp +++ b/src/corelib/tools/qcollator_icu.cpp @@ -75,10 +75,17 @@ void QCollator::setCaseSensitivity(Qt::CaseSensitivity cs) { detach(); - UColAttributeValue val = (cs == Qt::CaseSensitive) ? UCOL_UPPER_FIRST : UCOL_OFF; + // The strength attribute in ICU is rather badly documented. Basically UCOL_PRIMARY + // ignores differences between base characters and accented characters as well as case. + // So A and A-umlaut would compare equal. + // UCOL_SECONDARY ignores case differences. UCOL_TERTIARY is the default in most languages + // and does case sensitive comparison. + // UCOL_QUATERNARY is used as default in a few languages such as Japanese to take care of some + // additional differences in those languages. + UColAttributeValue val = (cs == Qt::CaseSensitive) ? UCOL_DEFAULT_STRENGTH : UCOL_SECONDARY; UErrorCode status = U_ZERO_ERROR; - ucol_setAttribute(d->collator, UCOL_CASE_FIRST, val, &status); + ucol_setAttribute(d->collator, UCOL_STRENGTH, val, &status); if (U_FAILURE(status)) qWarning("ucol_setAttribute: Case First failed: %d", status); } diff --git a/src/corelib/tools/qcollator_macx.cpp b/src/corelib/tools/qcollator_macx.cpp index 8985cd4eba..877510489a 100644 --- a/src/corelib/tools/qcollator_macx.cpp +++ b/src/corelib/tools/qcollator_macx.cpp @@ -128,12 +128,15 @@ bool QCollator::ignorePunctuation() const int QCollator::compare(const QChar *s1, int len1, const QChar *s2, int len2) const { SInt32 result; - return UCCompareText(d->collator.collator, + Boolean equivalent; + UCCompareText(d->collator.collator, reinterpret_cast<const UniChar *>(s1), len1, reinterpret_cast<const UniChar *>(s2), len2, - NULL, + &equivalent, &result); - return result; + if (equivalent) + return 0; + return result < 0 ? -1 : 1; } int QCollator::compare(const QString &str1, const QString &str2) const { diff --git a/tests/auto/corelib/tools/qcollator/tst_qcollator.cpp b/tests/auto/corelib/tools/qcollator/tst_qcollator.cpp index 3df8422a34..9ed27a8742 100644 --- a/tests/auto/corelib/tools/qcollator/tst_qcollator.cpp +++ b/tests/auto/corelib/tools/qcollator/tst_qcollator.cpp @@ -52,6 +52,9 @@ class tst_QCollator : public QObject private Q_SLOTS: void moveSemantics(); + + void compare_data(); + void compare(); }; #ifdef Q_COMPILER_RVALUE_REFS @@ -87,6 +90,93 @@ void tst_QCollator::moveSemantics() #endif } + +void tst_QCollator::compare_data() +{ + QTest::addColumn<QString>("locale"); + QTest::addColumn<QString>("s1"); + QTest::addColumn<QString>("s2"); + QTest::addColumn<int>("result"); + QTest::addColumn<int>("caseInsensitiveResult"); + + /* + A few tests below are commented out on the mac. It's unclear why they fail, + as it looks like the collator for the locale is created correctly. + */ + + /* + It's hard to test English, because it's treated differently + on different platforms. For example, on Linux, it uses the + iso14651_t1 template file, which happens to provide good + defaults for Swedish. Mac OS X seems to do a pure bytewise + comparison of Latin-1 values, although I'm not sure. So I + just test digits to make sure that it's not totally broken. + */ + QTest::newRow("english1") << QString("en_US") << QString("5") << QString("4") << 1 << 1; + QTest::newRow("english2") << QString("en_US") << QString("4") << QString("6") << -1 << -1; + QTest::newRow("english3") << QString("en_US") << QString("5") << QString("6") << -1 << -1; + QTest::newRow("english4") << QString("en_US") << QString("a") << QString("b") << -1 << -1; + /* + In Swedish, a with ring above (E5) comes before a with + diaresis (E4), which comes before o diaresis (F6), which + all come after z. + */ + QTest::newRow("swedish1") << QString("sv_SE") << QString::fromLatin1("\xe5") << QString::fromLatin1("\xe4") << -1 << -1; + QTest::newRow("swedish2") << QString("sv_SE") << QString::fromLatin1("\xe4") << QString::fromLatin1("\xf6") << -1 << -1; + QTest::newRow("swedish3") << QString("sv_SE") << QString::fromLatin1("\xe5") << QString::fromLatin1("\xf6") << -1 << -1; +#ifndef Q_OS_MAC + QTest::newRow("swedish4") << QString("sv_SE") << QString::fromLatin1("z") << QString::fromLatin1("\xe5") << -1 << -1; +#endif + + /* + In Norwegian, ae (E6) comes before o with stroke (D8), which + comes before a with ring above (E5). + */ + QTest::newRow("norwegian1") << QString("no_NO") << QString::fromLatin1("\xe6") << QString::fromLatin1("\xd8") << -1 << -1; +#ifndef Q_OS_MAC + QTest::newRow("norwegian2") << QString("no_NO") << QString::fromLatin1("\xd8") << QString::fromLatin1("\xe5") << -1 << -1; +#endif + QTest::newRow("norwegian3") << QString("no_NO") << QString::fromLatin1("\xe6") << QString::fromLatin1("\xe5") << -1 << -1; + + /* + In German, z comes *after* a with diaresis (E4), + which comes before o diaresis (F6). + */ + QTest::newRow("german1") << QString("de_DE") << QString::fromLatin1("a") << QString::fromLatin1("\xe4") << -1 << -1; + QTest::newRow("german2") << QString("de_DE") << QString::fromLatin1("b") << QString::fromLatin1("\xe4") << 1 << 1; + QTest::newRow("german3") << QString("de_DE") << QString::fromLatin1("z") << QString::fromLatin1("\xe4") << 1 << 1; + QTest::newRow("german4") << QString("de_DE") << QString::fromLatin1("\xe4") << QString::fromLatin1("\xf6") << -1 << -1; + QTest::newRow("german5") << QString("de_DE") << QString::fromLatin1("z") << QString::fromLatin1("\xf6") << 1 << 1; + QTest::newRow("german6") << QString("de_DE") << QString::fromLatin1("\xc0") << QString::fromLatin1("\xe0") << 1 << 0; + QTest::newRow("german7") << QString("de_DE") << QString::fromLatin1("\xd6") << QString::fromLatin1("\xf6") << 1 << 0; + QTest::newRow("german8") << QString("de_DE") << QString::fromLatin1("oe") << QString::fromLatin1("\xf6") << 1 << 1; + QTest::newRow("german9") << QString("de_DE") << QString("A") << QString("a") << 1 << 0; + + /* + French sorting of e and e with accent + */ + QTest::newRow("french1") << QString("fr_FR") << QString::fromLatin1("\xe9") << QString::fromLatin1("e") << 1 << 1; + QTest::newRow("french2") << QString("fr_FR") << QString::fromLatin1("\xe9t") << QString::fromLatin1("et") << 1 << 1; + QTest::newRow("french3") << QString("fr_FR") << QString::fromLatin1("\xe9") << QString::fromLatin1("d") << 1 << 1; + QTest::newRow("french4") << QString("fr_FR") << QString::fromLatin1("\xe9") << QString::fromLatin1("f") << -1 << -1; + +} + + +void tst_QCollator::compare() +{ + QFETCH(QString, locale); + QFETCH(QString, s1); + QFETCH(QString, s2); + QFETCH(int, result); + QFETCH(int, caseInsensitiveResult); + + QCollator collator(locale); + QCOMPARE(collator.compare(s1, s2), result); + collator.setCaseSensitivity(Qt::CaseInsensitive); + QCOMPARE(collator.compare(s1, s2), caseInsensitiveResult); +} + QTEST_APPLESS_MAIN(tst_QCollator) #include "tst_qcollator.moc" |