summaryrefslogtreecommitdiffstats
path: root/src/corelib/tools/qcollator.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/tools/qcollator.cpp')
-rw-r--r--src/corelib/tools/qcollator.cpp585
1 files changed, 585 insertions, 0 deletions
diff --git a/src/corelib/tools/qcollator.cpp b/src/corelib/tools/qcollator.cpp
new file mode 100644
index 0000000000..58d9a0b920
--- /dev/null
+++ b/src/corelib/tools/qcollator.cpp
@@ -0,0 +1,585 @@
+/****************************************************************************
+**
+** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: http://www.qt-project.org/
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** GNU Lesser General Public License Usage
+** This file may be used under the terms of the GNU Lesser General Public
+** License version 2.1 as published by the Free Software Foundation and
+** appearing in the file LICENSE.LGPL included in the packaging of this
+** file. Please review the following information to ensure the GNU Lesser
+** General Public License version 2.1 requirements will be met:
+** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU General
+** Public License version 3.0 as published by the Free Software Foundation
+** and appearing in the file LICENSE.GPL included in the packaging of this
+** file. Please review the following information to ensure the GNU General
+** Public License version 3.0 requirements will be met:
+** http://www.gnu.org/copyleft/gpl.html.
+**
+** Other Usage
+** Alternatively, this file may be used in accordance with the terms and
+** conditions contained in a signed written agreement between you and Nokia.
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "qcollator_p.h"
+#include "qstringlist.h"
+#include "qstring.h"
+
+#ifdef QT_USE_ICU
+#include <unicode/utypes.h>
+#include <unicode/ucol.h>
+#include <unicode/ustring.h>
+#include <unicode/ures.h>
+#endif
+
+#include "qdebug.h"
+
+QT_BEGIN_NAMESPACE
+
+
+class QCollatorPrivate
+{
+public:
+ QAtomicInt ref;
+ QLocale locale;
+ QCollator::Collation collation;
+
+#ifdef QT_USE_ICU
+ UCollator *collator;
+#else
+ void *collator;
+#endif
+
+ QStringList indexCharacters;
+
+ void clear() {
+#ifdef QT_USE_ICU
+ if (collator)
+ ucol_close(collator);
+#endif
+ collator = 0;
+ indexCharacters.clear();
+ }
+
+ QCollatorPrivate()
+ : collation(QCollator::Default),
+ collator(0)
+ { ref.store(1); }
+ ~QCollatorPrivate();
+
+private:
+ Q_DISABLE_COPY(QCollatorPrivate)
+};
+
+
+QCollatorPrivate::~QCollatorPrivate()
+{
+ clear();
+}
+
+static const int collationStringsCount = 13;
+static const char * const collationStrings[collationStringsCount] = {
+ "default",
+ "big5han",
+ "dictionary",
+ "direct",
+ "gb2312han",
+ "phonebook",
+ "pinyin",
+ "phonetic",
+ "reformed",
+ "standard",
+ "stroke",
+ "traditional",
+ "unihan"
+};
+
+/*!
+ \class QCollator
+ \brief The QCollator class compares strings according to a localized collation algorithm.
+
+ \reentrant
+ \ingroup i18n
+ \ingroup string-processing
+ \ingroup shared
+
+ QCollator is initialized with a QLocale and an optional collation strategy. It tries to
+ initialize the collator with the specified values. The collator can then be used to compare
+ and sort strings in a locale dependent fashion.
+
+ A QCollator object can be used together with template based sorting algorithms such as qSort
+ to sort a list of QStrings.
+
+ In addition to the locale and collation strategy, several optional flags can be set that influence
+ the result of the collation.
+*/
+
+/*!
+ Constructs a QCollator from \a locale and \a collation. If \a collation is not
+ specified the default collation algorithm for the locale is being used. If
+ \a locale is not specified QLocale::default() is being used.
+
+ \sa setLocale setCollation setOptions
+ */
+QCollator::QCollator(const QLocale &locale, QCollator::Collation collation)
+ : d(new QCollatorPrivate)
+{
+ d->locale = locale;
+ if ((int)collation >= 0 && (int)collation < collationStringsCount)
+ d->collation = collation;
+
+ init();
+}
+
+/*!
+ Creates a copy of \a other.
+ */
+QCollator::QCollator(const QCollator &other)
+ : d(other.d)
+{
+ d->ref.ref();
+}
+
+/*!
+ Destroys the collator.
+ */
+QCollator::~QCollator()
+{
+ if (!d->ref.deref())
+ delete d;
+}
+
+/*!
+ Assigns \a other to this collator.
+ */
+QCollator &QCollator::operator=(const QCollator &other)
+{
+ if (this != &other) {
+ if (!d->ref.deref())
+ delete d;
+ d = other.d;
+ d->ref.ref();
+ }
+ return *this;
+}
+
+
+/*!
+ \internal
+ */
+void QCollator::init()
+{
+ Q_ASSERT((int)d->collation < collationStringsCount);
+#ifdef QT_USE_ICU
+ const char *collationString = collationStrings[(int)d->collation];
+ UErrorCode status = U_ZERO_ERROR;
+ QByteArray name = (d->locale.bcp47Name().replace(QLatin1Char('-'), QLatin1Char('_')) + QLatin1String("@collation=") + QLatin1String(collationString)).toLatin1();
+ d->collator = ucol_open(name.constData(), &status);
+ if (U_FAILURE(status))
+ qWarning("Could not create collator: %d", status);
+
+ // enable normalization by default
+ ucol_setAttribute(d->collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
+#endif
+}
+
+/*!
+ \internal
+ */
+void QCollator::detach()
+{
+ if (d->ref.load() != 1) {
+ QCollatorPrivate *x = new QCollatorPrivate;
+ x->ref.store(1);
+ x->locale = d->locale;
+ x->collation = d->collation;
+ x->collator = 0;
+ if (!d->ref.deref())
+ delete d;
+ d = x;
+ }
+}
+
+
+/*!
+ Sets the locale of the collator to \a locale.
+ */
+void QCollator::setLocale(const QLocale &locale)
+{
+ if (d->ref.load() != 1)
+ detach();
+ d->clear();
+ d->locale = locale;
+
+ init();
+}
+
+/*!
+ Returns the locale of the collator.
+ */
+QLocale QCollator::locale() const
+{
+ return d->locale;
+}
+
+/*!
+ \enum QCollator::collation
+
+ This enum can be used to specify an alternate collation algorithm to be used instead
+ of the default algorithm for the locale.
+
+ Possible values are:
+
+ \value Default Use the default algorithm for the locale
+ \value Big5Han
+ \value Dictionary
+ \value Direct
+ \value GB2312Han
+ \value PhoneBook
+ \value Pinyin
+ \value Phonetic
+ \value Reformed
+ \value Standard
+ \value Stroke
+ \value Traditional
+ \value UniHan
+*/
+
+/*!
+ Sets the collation algorithm to be used.
+
+ \sa QCollator::Collation
+ */
+void QCollator::setCollation(QCollator::Collation collation)
+{
+ if ((int)collation < 0 || (int)collation >= collationStringsCount)
+ return;
+
+ if (d->ref.load() != 1)
+ detach();
+ d->clear();
+ d->collation = collation;
+
+ init();
+}
+/*!
+ Returns the currently used collation algorithm.
+
+ \sa QCollator::Collation
+ */
+QCollator::Collation QCollator::collation() const
+{
+ return d->collation;
+}
+
+/*!
+ Returns a unique identifer for this collation object.
+
+ This method is helpful to save and restore defined collation
+ objects.
+
+ \sa fromIdentifier
+ */
+QString QCollator::identifier() const
+{
+ QString id = d->locale.bcp47Name();
+ if (d->collation != QCollator::Default) {
+ id += QLatin1String("@collation=");
+ id += QLatin1String(collationStrings[d->collation]);
+ }
+ // this ensures the ID is compatible with ICU
+ id.replace('-', '_');
+ return id;
+}
+
+/*!
+ Creates a QCollator from a unique identifier and returns it.
+
+ \sa identifier
+ */
+QCollator QCollator::fromIdentifier(const QString &identifier)
+{
+ QString localeString = identifier;
+ QString collationString;
+ int at = identifier.indexOf(QLatin1Char('@'));
+ if (at >= 0) {
+ localeString = identifier.left(at);
+ collationString = identifier.mid(at + strlen("@collation="));
+ }
+
+ QLocale locale(localeString);
+ Collation collation = Default;
+ if (!collationString.isEmpty()) {
+ for (int i = 0; i < collationStringsCount; ++i) {
+ if (QLatin1String(collationStrings[i]) == collationString) {
+ collation = Collation(i);
+ break;
+ }
+ }
+ }
+ return QCollator(locale, collation);
+}
+
+/*!
+ \enum QCollator::CasePreference
+
+ This enum can be used to tailor the case preference during collation.
+
+ \value CasePreferenceOff No case preference, use what is the standard for the locale
+ \value CasePreferenceUpper Sort upper case characters before lower case
+ \value CasePreferenceLower Sort lower case characters before upper case
+*/
+
+/*!
+ Sets the case preference of the collator.
+
+ \sa QCollator::CasePreference
+ */
+void QCollator::setCasePreference(CasePreference c)
+{
+ if (d->ref.load() != 1)
+ detach();
+
+#ifdef QT_USE_ICU
+ UColAttributeValue val = UCOL_OFF;
+ if (c == QCollator::CasePreferenceUpper)
+ val = UCOL_UPPER_FIRST;
+ else if (c == QCollator::CasePreferenceLower)
+ val = UCOL_LOWER_FIRST;
+
+ UErrorCode status = U_ZERO_ERROR;
+ ucol_setAttribute(d->collator, UCOL_CASE_FIRST, val, &status);
+ if (U_FAILURE(status))
+ qWarning("ucol_setAttribute: Case First failed: %d", status);
+#else
+ Q_UNUSED(c);
+#endif
+}
+
+/*!
+ Returns case preference of the collator.
+
+ \sa QCollator::CasePreference
+ */
+QCollator::CasePreference QCollator::casePreference() const
+{
+#ifdef QT_USE_ICU
+ UErrorCode status = U_ZERO_ERROR;
+ switch (ucol_getAttribute(d->collator, UCOL_CASE_FIRST, &status)) {
+ case UCOL_UPPER_FIRST:
+ return QCollator::CasePreferenceUpper;
+ case UCOL_LOWER_FIRST:
+ return QCollator::CasePreferenceLower;
+ case UCOL_OFF:
+ default:
+ break;
+ }
+#endif
+ return QCollator::CasePreferenceOff;
+}
+
+/*!
+ Enables numeric sorting mode when \a on is set to true.
+
+ This will enable proper sorting of numeric digits, so that e.g. 100 sorts after 99.
+
+ By default this mode is off.
+ */
+void QCollator::setNumericMode(bool on)
+{
+ if (d->ref.load() != 1)
+ detach();
+
+#ifdef QT_USE_ICU
+ UErrorCode status = U_ZERO_ERROR;
+ ucol_setAttribute(d->collator, UCOL_NUMERIC_COLLATION, on ? UCOL_ON : UCOL_OFF, &status);
+ if (U_FAILURE(status))
+ qWarning("ucol_setAttribute: numeric collation failed: %d", status);
+#else
+ Q_UNUSED(on);
+#endif
+}
+
+/*!
+ Returns true if numeric sorting is enabled, false otherwise.
+
+ \sa setNumericMode
+ */
+bool QCollator::numericMode() const
+{
+#ifdef QT_USE_ICU
+ UErrorCode status;
+ if (ucol_getAttribute(d->collator, UCOL_NUMERIC_COLLATION, &status) == UCOL_ON)
+ return true;
+#endif
+ return false;
+}
+
+/*!
+ If set to true, punctuation characters and symbols are ignored when determining sort order.
+
+ The default is locale dependent.
+ */
+void QCollator::setIgnorePunctuation(bool on)
+{
+ if (d->ref.load() != 1)
+ detach();
+
+#ifdef QT_USE_ICU
+ UErrorCode status;
+ ucol_setAttribute(d->collator, UCOL_ALTERNATE_HANDLING, on ? UCOL_SHIFTED : UCOL_NON_IGNORABLE, &status);
+ if (U_FAILURE(status))
+ qWarning("ucol_setAttribute: Alternate handling failed: %d", status);
+#else
+ Q_UNUSED(on);
+#endif
+}
+
+/*!
+ Returns true if punctuation characters and symbols are ignored when determining sort order.
+
+ \sa setIgnorePunctuation
+ */
+bool QCollator::ignorePunctuation() const
+{
+#ifdef QT_USE_ICU
+ UErrorCode status;
+ if (ucol_getAttribute(d->collator, UCOL_ALTERNATE_HANDLING, &status) == UCOL_SHIFTED)
+ return true;
+#endif
+ return false;
+}
+
+/*!
+ Compares \a s1 with \a s2. Returns -1, 0 or 1 depending on whether \a s1 is
+ smaller, equal or larger than \a s2.
+ */
+int QCollator::compare(const QString &s1, const QString &s2) const
+{
+ return compare(s1.constData(), s1.size(), s2.constData(), s2.size());
+}
+
+/*!
+ \overload
+
+ Compares \a s1 with \a s2. Returns -1, 0 or 1 depending on whether \a s1 is
+ smaller, equal or larger than \a s2.
+ */
+int QCollator::compare(const QStringRef &s1, const QStringRef &s2) const
+{
+ return compare(s1.constData(), s1.size(), s2.constData(), s2.size());
+}
+
+/*!
+ \overload
+
+ Compares \a s1 with \a s2. \a len1 and \a len2 specify the length of the
+ QChar arrays pointer to by \a s1 and \a s2.
+
+ Returns -1, 0 or 1 depending on whether \a s1 is smaller, equal or larger than \a s2.
+ */
+int QCollator::compare(const QChar *s1, int len1, const QChar *s2, int len2) const
+{
+#ifdef QT_USE_ICU
+ const UCollationResult result =
+ ucol_strcoll(d->collator, (const UChar *)s1, len1, (const UChar *)s2, len2);
+ return result;
+#else
+ return QString::compare_helper((const QChar *)s1, len1, (const QChar *)s2, len2, Qt::CaseInsensitive);
+#endif
+}
+
+/*!
+ Returns a sortKey for \a string. The sortkey can be used as a placeholder
+ for the string that can be then sorted using regular strcmp based sorting.
+
+ Creating the sort key is usually somewhat slower, then using the compare()
+ methods directly. But if the string is compared repeatedly (e.g. when sorting
+ a whole list of strings), it's usually faster to create the sort keys for each
+ string and then sort using the keys.
+ */
+QByteArray QCollator::sortKey(const QString &string) const
+{
+#ifdef QT_USE_ICU
+ QByteArray result(16 + string.size() + (string.size() >> 2), Qt::Uninitialized);
+ int size = ucol_getSortKey(d->collator, (const UChar *)string.constData(),
+ string.size(), (uint8_t *)result.data(), result.size());
+ if (size > result.size()) {
+ result.resize(size);
+ size = ucol_getSortKey(d->collator, (const UChar *)string.constData(),
+ string.size(), (uint8_t *)result.data(), result.size());
+ }
+ result.truncate(size);
+ return result;
+#else
+ return string.toLower().toUtf8();
+#endif
+}
+
+static QStringList englishIndexCharacters()
+{
+ QString chars = QString::fromLatin1("A B C D E F G H I J K L M N O P Q R S T U V W X Y Z");
+ return chars.split(QLatin1Char(' '), QString::SkipEmptyParts);
+}
+
+/*!
+ Returns a string list of primary index characters. This is useful when presenting the
+ sorted list in a user interface with section headers.
+*/
+QStringList QCollator::indexCharacters() const
+{
+ if (!d->indexCharacters.isEmpty())
+ return d->indexCharacters;
+
+#ifdef QT_USE_ICU
+ QByteArray id = identifier().toLatin1();
+
+ UErrorCode status = U_ZERO_ERROR;
+ UResourceBundle *res = ures_open(NULL, id, &status);
+
+ if (U_FAILURE(status)) {
+ d->indexCharacters = englishIndexCharacters();
+ } else {
+
+ qint32 len = 0;
+ status = U_ZERO_ERROR;
+ const UChar *val = ures_getStringByKey(res, "ExemplarCharactersIndex", &len, &status);
+ if (U_FAILURE(status)) {
+ d->indexCharacters = englishIndexCharacters();
+ } else {
+ QString chars = QString::fromUtf16(val, len);
+ chars.remove('[');
+ chars.remove(']');
+ chars.remove('{');
+ chars.remove('}');
+ d->indexCharacters = chars.split(QLatin1Char(' '), QString::SkipEmptyParts);
+ }
+ }
+
+ ures_close(res);
+#else
+ d->indexCharacters = englishIndexCharacters();
+#endif
+
+ return d->indexCharacters;
+}
+
+QT_END_NAMESPACE