summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLars Knoll <lars.knoll@nokia.com>2012-06-01 23:09:31 +0200
committerQt by Nokia <qt-info@nokia.com>2012-06-10 19:08:56 +0200
commit1e9be1327bed93542816abb1aef6bcee25216ce0 (patch)
treec09fd0eb6674f76a4c3dc6232291c81df5c1b909
parent97e177e58d195f78ac103b3dd0b8ecedb1e07d4c (diff)
Add proper collation support to Qt
QString::localeAwareCompare() has always been a broken way to support collation. The current implementation is not even thread safe. This adds a proper collation class that fixes the problems and finally allows Qt to sort properly according to locale rules. The class is private for now, but is intendent to be made public with 5.1 Change-Id: Idb4e75ff68a398c9813af622af884a90898d2be9 Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
-rw-r--r--src/corelib/tools/qcollator.cpp585
-rw-r--r--src/corelib/tools/qcollator_p.h124
-rw-r--r--src/corelib/tools/qstring.h1
-rw-r--r--src/corelib/tools/tools.pri2
4 files changed, 712 insertions, 0 deletions
diff --git a/src/corelib/tools/qcollator.cpp b/src/corelib/tools/qcollator.cpp
new file mode 100644
index 0000000000..58d9a0b920
--- /dev/null
+++ b/src/corelib/tools/qcollator.cpp
@@ -0,0 +1,585 @@
+/****************************************************************************
+**
+** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: http://www.qt-project.org/
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** GNU Lesser General Public License Usage
+** This file may be used under the terms of the GNU Lesser General Public
+** License version 2.1 as published by the Free Software Foundation and
+** appearing in the file LICENSE.LGPL included in the packaging of this
+** file. Please review the following information to ensure the GNU Lesser
+** General Public License version 2.1 requirements will be met:
+** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU General
+** Public License version 3.0 as published by the Free Software Foundation
+** and appearing in the file LICENSE.GPL included in the packaging of this
+** file. Please review the following information to ensure the GNU General
+** Public License version 3.0 requirements will be met:
+** http://www.gnu.org/copyleft/gpl.html.
+**
+** Other Usage
+** Alternatively, this file may be used in accordance with the terms and
+** conditions contained in a signed written agreement between you and Nokia.
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "qcollator_p.h"
+#include "qstringlist.h"
+#include "qstring.h"
+
+#ifdef QT_USE_ICU
+#include <unicode/utypes.h>
+#include <unicode/ucol.h>
+#include <unicode/ustring.h>
+#include <unicode/ures.h>
+#endif
+
+#include "qdebug.h"
+
+QT_BEGIN_NAMESPACE
+
+
+class QCollatorPrivate
+{
+public:
+ QAtomicInt ref;
+ QLocale locale;
+ QCollator::Collation collation;
+
+#ifdef QT_USE_ICU
+ UCollator *collator;
+#else
+ void *collator;
+#endif
+
+ QStringList indexCharacters;
+
+ void clear() {
+#ifdef QT_USE_ICU
+ if (collator)
+ ucol_close(collator);
+#endif
+ collator = 0;
+ indexCharacters.clear();
+ }
+
+ QCollatorPrivate()
+ : collation(QCollator::Default),
+ collator(0)
+ { ref.store(1); }
+ ~QCollatorPrivate();
+
+private:
+ Q_DISABLE_COPY(QCollatorPrivate)
+};
+
+
+QCollatorPrivate::~QCollatorPrivate()
+{
+ clear();
+}
+
+static const int collationStringsCount = 13;
+static const char * const collationStrings[collationStringsCount] = {
+ "default",
+ "big5han",
+ "dictionary",
+ "direct",
+ "gb2312han",
+ "phonebook",
+ "pinyin",
+ "phonetic",
+ "reformed",
+ "standard",
+ "stroke",
+ "traditional",
+ "unihan"
+};
+
+/*!
+ \class QCollator
+ \brief The QCollator class compares strings according to a localized collation algorithm.
+
+ \reentrant
+ \ingroup i18n
+ \ingroup string-processing
+ \ingroup shared
+
+ QCollator is initialized with a QLocale and an optional collation strategy. It tries to
+ initialize the collator with the specified values. The collator can then be used to compare
+ and sort strings in a locale dependent fashion.
+
+ A QCollator object can be used together with template based sorting algorithms such as qSort
+ to sort a list of QStrings.
+
+ In addition to the locale and collation strategy, several optional flags can be set that influence
+ the result of the collation.
+*/
+
+/*!
+ Constructs a QCollator from \a locale and \a collation. If \a collation is not
+ specified the default collation algorithm for the locale is being used. If
+ \a locale is not specified QLocale::default() is being used.
+
+ \sa setLocale setCollation setOptions
+ */
+QCollator::QCollator(const QLocale &locale, QCollator::Collation collation)
+ : d(new QCollatorPrivate)
+{
+ d->locale = locale;
+ if ((int)collation >= 0 && (int)collation < collationStringsCount)
+ d->collation = collation;
+
+ init();
+}
+
+/*!
+ Creates a copy of \a other.
+ */
+QCollator::QCollator(const QCollator &other)
+ : d(other.d)
+{
+ d->ref.ref();
+}
+
+/*!
+ Destroys the collator.
+ */
+QCollator::~QCollator()
+{
+ if (!d->ref.deref())
+ delete d;
+}
+
+/*!
+ Assigns \a other to this collator.
+ */
+QCollator &QCollator::operator=(const QCollator &other)
+{
+ if (this != &other) {
+ if (!d->ref.deref())
+ delete d;
+ d = other.d;
+ d->ref.ref();
+ }
+ return *this;
+}
+
+
+/*!
+ \internal
+ */
+void QCollator::init()
+{
+ Q_ASSERT((int)d->collation < collationStringsCount);
+#ifdef QT_USE_ICU
+ const char *collationString = collationStrings[(int)d->collation];
+ UErrorCode status = U_ZERO_ERROR;
+ QByteArray name = (d->locale.bcp47Name().replace(QLatin1Char('-'), QLatin1Char('_')) + QLatin1String("@collation=") + QLatin1String(collationString)).toLatin1();
+ d->collator = ucol_open(name.constData(), &status);
+ if (U_FAILURE(status))
+ qWarning("Could not create collator: %d", status);
+
+ // enable normalization by default
+ ucol_setAttribute(d->collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
+#endif
+}
+
+/*!
+ \internal
+ */
+void QCollator::detach()
+{
+ if (d->ref.load() != 1) {
+ QCollatorPrivate *x = new QCollatorPrivate;
+ x->ref.store(1);
+ x->locale = d->locale;
+ x->collation = d->collation;
+ x->collator = 0;
+ if (!d->ref.deref())
+ delete d;
+ d = x;
+ }
+}
+
+
+/*!
+ Sets the locale of the collator to \a locale.
+ */
+void QCollator::setLocale(const QLocale &locale)
+{
+ if (d->ref.load() != 1)
+ detach();
+ d->clear();
+ d->locale = locale;
+
+ init();
+}
+
+/*!
+ Returns the locale of the collator.
+ */
+QLocale QCollator::locale() const
+{
+ return d->locale;
+}
+
+/*!
+ \enum QCollator::collation
+
+ This enum can be used to specify an alternate collation algorithm to be used instead
+ of the default algorithm for the locale.
+
+ Possible values are:
+
+ \value Default Use the default algorithm for the locale
+ \value Big5Han
+ \value Dictionary
+ \value Direct
+ \value GB2312Han
+ \value PhoneBook
+ \value Pinyin
+ \value Phonetic
+ \value Reformed
+ \value Standard
+ \value Stroke
+ \value Traditional
+ \value UniHan
+*/
+
+/*!
+ Sets the collation algorithm to be used.
+
+ \sa QCollator::Collation
+ */
+void QCollator::setCollation(QCollator::Collation collation)
+{
+ if ((int)collation < 0 || (int)collation >= collationStringsCount)
+ return;
+
+ if (d->ref.load() != 1)
+ detach();
+ d->clear();
+ d->collation = collation;
+
+ init();
+}
+/*!
+ Returns the currently used collation algorithm.
+
+ \sa QCollator::Collation
+ */
+QCollator::Collation QCollator::collation() const
+{
+ return d->collation;
+}
+
+/*!
+ Returns a unique identifer for this collation object.
+
+ This method is helpful to save and restore defined collation
+ objects.
+
+ \sa fromIdentifier
+ */
+QString QCollator::identifier() const
+{
+ QString id = d->locale.bcp47Name();
+ if (d->collation != QCollator::Default) {
+ id += QLatin1String("@collation=");
+ id += QLatin1String(collationStrings[d->collation]);
+ }
+ // this ensures the ID is compatible with ICU
+ id.replace('-', '_');
+ return id;
+}
+
+/*!
+ Creates a QCollator from a unique identifier and returns it.
+
+ \sa identifier
+ */
+QCollator QCollator::fromIdentifier(const QString &identifier)
+{
+ QString localeString = identifier;
+ QString collationString;
+ int at = identifier.indexOf(QLatin1Char('@'));
+ if (at >= 0) {
+ localeString = identifier.left(at);
+ collationString = identifier.mid(at + strlen("@collation="));
+ }
+
+ QLocale locale(localeString);
+ Collation collation = Default;
+ if (!collationString.isEmpty()) {
+ for (int i = 0; i < collationStringsCount; ++i) {
+ if (QLatin1String(collationStrings[i]) == collationString) {
+ collation = Collation(i);
+ break;
+ }
+ }
+ }
+ return QCollator(locale, collation);
+}
+
+/*!
+ \enum QCollator::CasePreference
+
+ This enum can be used to tailor the case preference during collation.
+
+ \value CasePreferenceOff No case preference, use what is the standard for the locale
+ \value CasePreferenceUpper Sort upper case characters before lower case
+ \value CasePreferenceLower Sort lower case characters before upper case
+*/
+
+/*!
+ Sets the case preference of the collator.
+
+ \sa QCollator::CasePreference
+ */
+void QCollator::setCasePreference(CasePreference c)
+{
+ if (d->ref.load() != 1)
+ detach();
+
+#ifdef QT_USE_ICU
+ UColAttributeValue val = UCOL_OFF;
+ if (c == QCollator::CasePreferenceUpper)
+ val = UCOL_UPPER_FIRST;
+ else if (c == QCollator::CasePreferenceLower)
+ val = UCOL_LOWER_FIRST;
+
+ UErrorCode status = U_ZERO_ERROR;
+ ucol_setAttribute(d->collator, UCOL_CASE_FIRST, val, &status);
+ if (U_FAILURE(status))
+ qWarning("ucol_setAttribute: Case First failed: %d", status);
+#else
+ Q_UNUSED(c);
+#endif
+}
+
+/*!
+ Returns case preference of the collator.
+
+ \sa QCollator::CasePreference
+ */
+QCollator::CasePreference QCollator::casePreference() const
+{
+#ifdef QT_USE_ICU
+ UErrorCode status = U_ZERO_ERROR;
+ switch (ucol_getAttribute(d->collator, UCOL_CASE_FIRST, &status)) {
+ case UCOL_UPPER_FIRST:
+ return QCollator::CasePreferenceUpper;
+ case UCOL_LOWER_FIRST:
+ return QCollator::CasePreferenceLower;
+ case UCOL_OFF:
+ default:
+ break;
+ }
+#endif
+ return QCollator::CasePreferenceOff;
+}
+
+/*!
+ Enables numeric sorting mode when \a on is set to true.
+
+ This will enable proper sorting of numeric digits, so that e.g. 100 sorts after 99.
+
+ By default this mode is off.
+ */
+void QCollator::setNumericMode(bool on)
+{
+ if (d->ref.load() != 1)
+ detach();
+
+#ifdef QT_USE_ICU
+ UErrorCode status = U_ZERO_ERROR;
+ ucol_setAttribute(d->collator, UCOL_NUMERIC_COLLATION, on ? UCOL_ON : UCOL_OFF, &status);
+ if (U_FAILURE(status))
+ qWarning("ucol_setAttribute: numeric collation failed: %d", status);
+#else
+ Q_UNUSED(on);
+#endif
+}
+
+/*!
+ Returns true if numeric sorting is enabled, false otherwise.
+
+ \sa setNumericMode
+ */
+bool QCollator::numericMode() const
+{
+#ifdef QT_USE_ICU
+ UErrorCode status;
+ if (ucol_getAttribute(d->collator, UCOL_NUMERIC_COLLATION, &status) == UCOL_ON)
+ return true;
+#endif
+ return false;
+}
+
+/*!
+ If set to true, punctuation characters and symbols are ignored when determining sort order.
+
+ The default is locale dependent.
+ */
+void QCollator::setIgnorePunctuation(bool on)
+{
+ if (d->ref.load() != 1)
+ detach();
+
+#ifdef QT_USE_ICU
+ UErrorCode status;
+ ucol_setAttribute(d->collator, UCOL_ALTERNATE_HANDLING, on ? UCOL_SHIFTED : UCOL_NON_IGNORABLE, &status);
+ if (U_FAILURE(status))
+ qWarning("ucol_setAttribute: Alternate handling failed: %d", status);
+#else
+ Q_UNUSED(on);
+#endif
+}
+
+/*!
+ Returns true if punctuation characters and symbols are ignored when determining sort order.
+
+ \sa setIgnorePunctuation
+ */
+bool QCollator::ignorePunctuation() const
+{
+#ifdef QT_USE_ICU
+ UErrorCode status;
+ if (ucol_getAttribute(d->collator, UCOL_ALTERNATE_HANDLING, &status) == UCOL_SHIFTED)
+ return true;
+#endif
+ return false;
+}
+
+/*!
+ Compares \a s1 with \a s2. Returns -1, 0 or 1 depending on whether \a s1 is
+ smaller, equal or larger than \a s2.
+ */
+int QCollator::compare(const QString &s1, const QString &s2) const
+{
+ return compare(s1.constData(), s1.size(), s2.constData(), s2.size());
+}
+
+/*!
+ \overload
+
+ Compares \a s1 with \a s2. Returns -1, 0 or 1 depending on whether \a s1 is
+ smaller, equal or larger than \a s2.
+ */
+int QCollator::compare(const QStringRef &s1, const QStringRef &s2) const
+{
+ return compare(s1.constData(), s1.size(), s2.constData(), s2.size());
+}
+
+/*!
+ \overload
+
+ Compares \a s1 with \a s2. \a len1 and \a len2 specify the length of the
+ QChar arrays pointer to by \a s1 and \a s2.
+
+ Returns -1, 0 or 1 depending on whether \a s1 is smaller, equal or larger than \a s2.
+ */
+int QCollator::compare(const QChar *s1, int len1, const QChar *s2, int len2) const
+{
+#ifdef QT_USE_ICU
+ const UCollationResult result =
+ ucol_strcoll(d->collator, (const UChar *)s1, len1, (const UChar *)s2, len2);
+ return result;
+#else
+ return QString::compare_helper((const QChar *)s1, len1, (const QChar *)s2, len2, Qt::CaseInsensitive);
+#endif
+}
+
+/*!
+ Returns a sortKey for \a string. The sortkey can be used as a placeholder
+ for the string that can be then sorted using regular strcmp based sorting.
+
+ Creating the sort key is usually somewhat slower, then using the compare()
+ methods directly. But if the string is compared repeatedly (e.g. when sorting
+ a whole list of strings), it's usually faster to create the sort keys for each
+ string and then sort using the keys.
+ */
+QByteArray QCollator::sortKey(const QString &string) const
+{
+#ifdef QT_USE_ICU
+ QByteArray result(16 + string.size() + (string.size() >> 2), Qt::Uninitialized);
+ int size = ucol_getSortKey(d->collator, (const UChar *)string.constData(),
+ string.size(), (uint8_t *)result.data(), result.size());
+ if (size > result.size()) {
+ result.resize(size);
+ size = ucol_getSortKey(d->collator, (const UChar *)string.constData(),
+ string.size(), (uint8_t *)result.data(), result.size());
+ }
+ result.truncate(size);
+ return result;
+#else
+ return string.toLower().toUtf8();
+#endif
+}
+
+static QStringList englishIndexCharacters()
+{
+ QString chars = QString::fromLatin1("A B C D E F G H I J K L M N O P Q R S T U V W X Y Z");
+ return chars.split(QLatin1Char(' '), QString::SkipEmptyParts);
+}
+
+/*!
+ Returns a string list of primary index characters. This is useful when presenting the
+ sorted list in a user interface with section headers.
+*/
+QStringList QCollator::indexCharacters() const
+{
+ if (!d->indexCharacters.isEmpty())
+ return d->indexCharacters;
+
+#ifdef QT_USE_ICU
+ QByteArray id = identifier().toLatin1();
+
+ UErrorCode status = U_ZERO_ERROR;
+ UResourceBundle *res = ures_open(NULL, id, &status);
+
+ if (U_FAILURE(status)) {
+ d->indexCharacters = englishIndexCharacters();
+ } else {
+
+ qint32 len = 0;
+ status = U_ZERO_ERROR;
+ const UChar *val = ures_getStringByKey(res, "ExemplarCharactersIndex", &len, &status);
+ if (U_FAILURE(status)) {
+ d->indexCharacters = englishIndexCharacters();
+ } else {
+ QString chars = QString::fromUtf16(val, len);
+ chars.remove('[');
+ chars.remove(']');
+ chars.remove('{');
+ chars.remove('}');
+ d->indexCharacters = chars.split(QLatin1Char(' '), QString::SkipEmptyParts);
+ }
+ }
+
+ ures_close(res);
+#else
+ d->indexCharacters = englishIndexCharacters();
+#endif
+
+ return d->indexCharacters;
+}
+
+QT_END_NAMESPACE
diff --git a/src/corelib/tools/qcollator_p.h b/src/corelib/tools/qcollator_p.h
new file mode 100644
index 0000000000..7677180424
--- /dev/null
+++ b/src/corelib/tools/qcollator_p.h
@@ -0,0 +1,124 @@
+/****************************************************************************
+**
+** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: http://www.qt-project.org/
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** GNU Lesser General Public License Usage
+** This file may be used under the terms of the GNU Lesser General Public
+** License version 2.1 as published by the Free Software Foundation and
+** appearing in the file LICENSE.LGPL included in the packaging of this
+** file. Please review the following information to ensure the GNU Lesser
+** General Public License version 2.1 requirements will be met:
+** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU General
+** Public License version 3.0 as published by the Free Software Foundation
+** and appearing in the file LICENSE.GPL included in the packaging of this
+** file. Please review the following information to ensure the GNU General
+** Public License version 3.0 requirements will be met:
+** http://www.gnu.org/copyleft/gpl.html.
+**
+** Other Usage
+** Alternatively, this file may be used in accordance with the terms and
+** conditions contained in a signed written agreement between you and Nokia.
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#ifndef QCOLLATOR_H
+#define QCOLLATOR_H
+
+#include <QString>
+#include <QLocale>
+
+QT_BEGIN_HEADER
+
+QT_BEGIN_NAMESPACE
+
+class QCollatorPrivate;
+
+class Q_CORE_EXPORT QCollator
+{
+public:
+ enum Collation {
+ Default,
+ Big5Han,
+ Dictionary,
+ Direct,
+ GB2312Han,
+ PhoneBook,
+ Pinyin,
+ Phonetic,
+ Reformed,
+ Standard,
+ Stroke,
+ Traditional,
+ UniHan
+ };
+
+ QCollator(const QLocale &locale = QLocale(), QCollator::Collation collation = QCollator::Default);
+ QCollator(const QCollator &);
+ ~QCollator();
+ QCollator &operator=(const QCollator &);
+
+ void setLocale(const QLocale &locale);
+ QLocale locale() const;
+
+ void setCollation(Collation collation);
+ Collation collation() const;
+
+ QString identifier() const;
+ static QCollator fromIdentifier(const QString &identifier);
+
+ enum CasePreference {
+ CasePreferenceOff = 0x0,
+ CasePreferenceUpper = 0x1,
+ CasePreferenceLower = 0x2
+ };
+
+ CasePreference casePreference() const;
+ void setCasePreference(CasePreference c);
+
+ void setNumericMode(bool on);
+ bool numericMode() const;
+
+ void setIgnorePunctuation(bool on);
+ bool ignorePunctuation() const;
+
+ int compare(const QString &s1, const QString &s2) const;
+ int compare(const QStringRef &s1, const QStringRef &s2) const;
+ int compare(const QChar *s1, int len1, const QChar *s2, int len2) const;
+
+ bool operator()(const QString &s1, const QString &s2) const
+ { return compare(s1, s2) < 0; }
+
+ QByteArray sortKey(const QString &string) const;
+
+ QStringList indexCharacters() const;
+
+private:
+ QCollatorPrivate *d;
+
+ void detach();
+ void init();
+};
+
+QT_END_NAMESPACE
+
+QT_END_HEADER
+
+#endif // QCOLLATOR_H
diff --git a/src/corelib/tools/qstring.h b/src/corelib/tools/qstring.h
index 77bfacc6a4..375dfbb1a4 100644
--- a/src/corelib/tools/qstring.h
+++ b/src/corelib/tools/qstring.h
@@ -728,6 +728,7 @@ private:
friend class QTextCodec;
friend class QStringRef;
friend class QByteArray;
+ friend class QCollator;
friend struct QAbstractConcatenable;
public:
diff --git a/src/corelib/tools/tools.pri b/src/corelib/tools/tools.pri
index 9db459ab68..0a0cf3733c 100644
--- a/src/corelib/tools/tools.pri
+++ b/src/corelib/tools/tools.pri
@@ -11,6 +11,7 @@ HEADERS += \
tools/qbytedata_p.h \
tools/qcache.h \
tools/qchar.h \
+ tools/qcollator_p.h \
tools/qcontainerfwd.h \
tools/qcryptographichash.h \
tools/qdatetime.h \
@@ -65,6 +66,7 @@ SOURCES += \
tools/qbitarray.cpp \
tools/qbytearray.cpp \
tools/qbytearraymatcher.cpp \
+ tools/qcollator.cpp \
tools/qcryptographichash.cpp \
tools/qdatetime.cpp \
tools/qeasingcurve.cpp \