3 files changed, 105 insertions, 5 deletions
diff --git a/src/corelib/tools/qcollator_icu.cpp b/src/corelib/tools/qcollator_icu.cpp
index 407a493d25..23e88b5015 100644
--- a/src/corelib/tools/qcollator_icu.cpp
+++ b/src/corelib/tools/qcollator_icu.cpp
@@ -75,10 +75,17 @@ void QCollator::setCaseSensitivity(Qt::CaseSensitivity cs)
 {
     detach();
 
-    UColAttributeValue val = (cs == Qt::CaseSensitive) ? UCOL_UPPER_FIRST : UCOL_OFF;
+    // The strength attribute in ICU is rather badly documented. Basically UCOL_PRIMARY
+    // ignores differences between base characters and accented characters as well as case.
+    // So A and A-umlaut would compare equal.
+    // UCOL_SECONDARY ignores case differences. UCOL_TERTIARY is the default in most languages
+    // and does case sensitive comparison.
+    // UCOL_QUATERNARY is used as default in a few languages such as Japanese to take care of some
+    // additional differences in those languages.
+    UColAttributeValue val = (cs == Qt::CaseSensitive) ? UCOL_DEFAULT_STRENGTH : UCOL_SECONDARY;
 
     UErrorCode status = U_ZERO_ERROR;
-    ucol_setAttribute(d->collator, UCOL_CASE_FIRST, val, &status);
+    ucol_setAttribute(d->collator, UCOL_STRENGTH, val, &status);
     if (U_FAILURE(status))
         qWarning("ucol_setAttribute: Case First failed: %d", status);
 }
diff --git a/src/corelib/tools/qcollator_macx.cpp b/src/corelib/tools/qcollator_macx.cpp
index 8985cd4eba..877510489a 100644
--- a/src/corelib/tools/qcollator_macx.cpp
+++ b/src/corelib/tools/qcollator_macx.cpp
@@ -128,12 +128,15 @@ bool QCollator::ignorePunctuation() const
 int QCollator::compare(const QChar *s1, int len1, const QChar *s2, int len2) const
 {
     SInt32 result;
-    return UCCompareText(d->collator.collator,
+    Boolean equivalent;
+    UCCompareText(d->collator.collator,
                          reinterpret_cast<const UniChar *>(s1), len1,
                          reinterpret_cast<const UniChar *>(s2), len2,
-                         NULL,
+                         &equivalent,
                          &result);
-    return result;
+    if (equivalent)
+        return 0;
+    return result < 0 ? -1 : 1;
 }
 int QCollator::compare(const QString &str1, const QString &str2) const
 {
diff --git a/tests/auto/corelib/tools/qcollator/tst_qcollator.cpp b/tests/auto/corelib/tools/qcollator/tst_qcollator.cpp
index 3df8422a34..9ed27a8742 100644
--- a/tests/auto/corelib/tools/qcollator/tst_qcollator.cpp
+++ b/tests/auto/corelib/tools/qcollator/tst_qcollator.cpp
@@ -52,6 +52,9 @@ class tst_QCollator : public QObject
 
 private Q_SLOTS:
     void moveSemantics();
+
+    void compare_data();
+    void compare();
 };
 
 #ifdef Q_COMPILER_RVALUE_REFS
@@ -87,6 +90,93 @@ void tst_QCollator::moveSemantics()
 #endif
 }
 
+
+void tst_QCollator::compare_data()
+{
+    QTest::addColumn<QString>("locale");
+    QTest::addColumn<QString>("s1");
+    QTest::addColumn<QString>("s2");
+    QTest::addColumn<int>("result");
+    QTest::addColumn<int>("caseInsensitiveResult");
+
+    /*
+        A few tests below are commented out on the mac. It's unclear why they fail,
+        as it looks like the collator for the locale is created correctly.
+    */
+
+    /*
+        It's hard to test English, because it's treated differently
+        on different platforms. For example, on Linux, it uses the
+        iso14651_t1 template file, which happens to provide good
+        defaults for Swedish. Mac OS X seems to do a pure bytewise
+        comparison of Latin-1 values, although I'm not sure. So I
+        just test digits to make sure that it's not totally broken.
+    */
+    QTest::newRow("english1") << QString("en_US") << QString("5") << QString("4") << 1 << 1;
+    QTest::newRow("english2") << QString("en_US") << QString("4") << QString("6") << -1 << -1;
+    QTest::newRow("english3") << QString("en_US") << QString("5") << QString("6") << -1 << -1;
+    QTest::newRow("english4") << QString("en_US") << QString("a") << QString("b") << -1 << -1;
+    /*
+        In Swedish, a with ring above (E5) comes before a with
+        diaresis (E4), which comes before o diaresis (F6), which
+        all come after z.
+    */
+    QTest::newRow("swedish1") << QString("sv_SE") << QString::fromLatin1("\xe5") << QString::fromLatin1("\xe4") << -1 << -1;
+    QTest::newRow("swedish2") << QString("sv_SE") << QString::fromLatin1("\xe4") << QString::fromLatin1("\xf6") << -1 << -1;
+    QTest::newRow("swedish3") << QString("sv_SE") << QString::fromLatin1("\xe5") << QString::fromLatin1("\xf6") << -1 << -1;
+#ifndef Q_OS_MAC
+    QTest::newRow("swedish4") << QString("sv_SE") << QString::fromLatin1("z") << QString::fromLatin1("\xe5") << -1 << -1;
+#endif
+
+    /*
+        In Norwegian, ae (E6) comes before o with stroke (D8), which
+        comes before a with ring above (E5).
+    */
+    QTest::newRow("norwegian1") << QString("no_NO") << QString::fromLatin1("\xe6") << QString::fromLatin1("\xd8") << -1 << -1;
+#ifndef Q_OS_MAC
+    QTest::newRow("norwegian2") << QString("no_NO") << QString::fromLatin1("\xd8") << QString::fromLatin1("\xe5") << -1 << -1;
+#endif
+    QTest::newRow("norwegian3") << QString("no_NO") << QString::fromLatin1("\xe6") << QString::fromLatin1("\xe5") << -1 << -1;
+
+    /*
+        In German, z comes *after* a with diaresis (E4),
+        which comes before o diaresis (F6).
+    */
+    QTest::newRow("german1") << QString("de_DE") << QString::fromLatin1("a") << QString::fromLatin1("\xe4") << -1 << -1;
+    QTest::newRow("german2") << QString("de_DE") << QString::fromLatin1("b") << QString::fromLatin1("\xe4") << 1 << 1;
+    QTest::newRow("german3") << QString("de_DE") << QString::fromLatin1("z") << QString::fromLatin1("\xe4") << 1 << 1;
+    QTest::newRow("german4") << QString("de_DE") << QString::fromLatin1("\xe4") << QString::fromLatin1("\xf6") << -1 << -1;
+    QTest::newRow("german5") << QString("de_DE") << QString::fromLatin1("z") << QString::fromLatin1("\xf6") << 1 << 1;
+    QTest::newRow("german6") << QString("de_DE") << QString::fromLatin1("\xc0") << QString::fromLatin1("\xe0") << 1 << 0;
+    QTest::newRow("german7") << QString("de_DE") << QString::fromLatin1("\xd6") << QString::fromLatin1("\xf6") << 1 << 0;
+    QTest::newRow("german8") << QString("de_DE") << QString::fromLatin1("oe") << QString::fromLatin1("\xf6") << 1 << 1;
+    QTest::newRow("german9") << QString("de_DE") << QString("A") << QString("a") << 1 << 0;
+
+    /*
+        French sorting of e and e with accent
+    */
+    QTest::newRow("french1") << QString("fr_FR") << QString::fromLatin1("\xe9") << QString::fromLatin1("e") << 1 << 1;
+    QTest::newRow("french2") << QString("fr_FR") << QString::fromLatin1("\xe9t") << QString::fromLatin1("et") << 1 << 1;
+    QTest::newRow("french3") << QString("fr_FR") << QString::fromLatin1("\xe9") << QString::fromLatin1("d") << 1 << 1;
+    QTest::newRow("french4") << QString("fr_FR") << QString::fromLatin1("\xe9") << QString::fromLatin1("f") << -1 << -1;
+
+}
+
+
+void tst_QCollator::compare()
+{
+    QFETCH(QString, locale);
+    QFETCH(QString, s1);
+    QFETCH(QString, s2);
+    QFETCH(int, result);
+    QFETCH(int, caseInsensitiveResult);
+
+    QCollator collator(locale);
+    QCOMPARE(collator.compare(s1, s2), result);
+    collator.setCaseSensitivity(Qt::CaseInsensitive);
+    QCOMPARE(collator.compare(s1, s2), caseInsensitiveResult);
+}
+
 QTEST_APPLESS_MAIN(tst_QCollator)
 
 #include "tst_qcollator.moc"