diff options
Diffstat (limited to 'src/corelib/tools/qcollator.cpp')
-rw-r--r-- | src/corelib/tools/qcollator.cpp | 585 |
1 files changed, 585 insertions, 0 deletions
diff --git a/src/corelib/tools/qcollator.cpp b/src/corelib/tools/qcollator.cpp new file mode 100644 index 0000000000..58d9a0b920 --- /dev/null +++ b/src/corelib/tools/qcollator.cpp @@ -0,0 +1,585 @@ +/**************************************************************************** +** +** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies). +** Contact: http://www.qt-project.org/ +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** GNU Lesser General Public License Usage +** This file may be used under the terms of the GNU Lesser General Public +** License version 2.1 as published by the Free Software Foundation and +** appearing in the file LICENSE.LGPL included in the packaging of this +** file. Please review the following information to ensure the GNU Lesser +** General Public License version 2.1 requirements will be met: +** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain additional +** rights. These rights are described in the Nokia Qt LGPL Exception +** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU General +** Public License version 3.0 as published by the Free Software Foundation +** and appearing in the file LICENSE.GPL included in the packaging of this +** file. Please review the following information to ensure the GNU General +** Public License version 3.0 requirements will be met: +** http://www.gnu.org/copyleft/gpl.html. +** +** Other Usage +** Alternatively, this file may be used in accordance with the terms and +** conditions contained in a signed written agreement between you and Nokia. +** +** +** +** +** +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "qcollator_p.h" +#include "qstringlist.h" +#include "qstring.h" + +#ifdef QT_USE_ICU +#include <unicode/utypes.h> +#include <unicode/ucol.h> +#include <unicode/ustring.h> +#include <unicode/ures.h> +#endif + +#include "qdebug.h" + +QT_BEGIN_NAMESPACE + + +class QCollatorPrivate +{ +public: + QAtomicInt ref; + QLocale locale; + QCollator::Collation collation; + +#ifdef QT_USE_ICU + UCollator *collator; +#else + void *collator; +#endif + + QStringList indexCharacters; + + void clear() { +#ifdef QT_USE_ICU + if (collator) + ucol_close(collator); +#endif + collator = 0; + indexCharacters.clear(); + } + + QCollatorPrivate() + : collation(QCollator::Default), + collator(0) + { ref.store(1); } + ~QCollatorPrivate(); + +private: + Q_DISABLE_COPY(QCollatorPrivate) +}; + + +QCollatorPrivate::~QCollatorPrivate() +{ + clear(); +} + +static const int collationStringsCount = 13; +static const char * const collationStrings[collationStringsCount] = { + "default", + "big5han", + "dictionary", + "direct", + "gb2312han", + "phonebook", + "pinyin", + "phonetic", + "reformed", + "standard", + "stroke", + "traditional", + "unihan" +}; + +/*! + \class QCollator + \brief The QCollator class compares strings according to a localized collation algorithm. + + \reentrant + \ingroup i18n + \ingroup string-processing + \ingroup shared + + QCollator is initialized with a QLocale and an optional collation strategy. It tries to + initialize the collator with the specified values. The collator can then be used to compare + and sort strings in a locale dependent fashion. + + A QCollator object can be used together with template based sorting algorithms such as qSort + to sort a list of QStrings. + + In addition to the locale and collation strategy, several optional flags can be set that influence + the result of the collation. +*/ + +/*! + Constructs a QCollator from \a locale and \a collation. If \a collation is not + specified the default collation algorithm for the locale is being used. If + \a locale is not specified QLocale::default() is being used. + + \sa setLocale setCollation setOptions + */ +QCollator::QCollator(const QLocale &locale, QCollator::Collation collation) + : d(new QCollatorPrivate) +{ + d->locale = locale; + if ((int)collation >= 0 && (int)collation < collationStringsCount) + d->collation = collation; + + init(); +} + +/*! + Creates a copy of \a other. + */ +QCollator::QCollator(const QCollator &other) + : d(other.d) +{ + d->ref.ref(); +} + +/*! + Destroys the collator. + */ +QCollator::~QCollator() +{ + if (!d->ref.deref()) + delete d; +} + +/*! + Assigns \a other to this collator. + */ +QCollator &QCollator::operator=(const QCollator &other) +{ + if (this != &other) { + if (!d->ref.deref()) + delete d; + d = other.d; + d->ref.ref(); + } + return *this; +} + + +/*! + \internal + */ +void QCollator::init() +{ + Q_ASSERT((int)d->collation < collationStringsCount); +#ifdef QT_USE_ICU + const char *collationString = collationStrings[(int)d->collation]; + UErrorCode status = U_ZERO_ERROR; + QByteArray name = (d->locale.bcp47Name().replace(QLatin1Char('-'), QLatin1Char('_')) + QLatin1String("@collation=") + QLatin1String(collationString)).toLatin1(); + d->collator = ucol_open(name.constData(), &status); + if (U_FAILURE(status)) + qWarning("Could not create collator: %d", status); + + // enable normalization by default + ucol_setAttribute(d->collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); +#endif +} + +/*! + \internal + */ +void QCollator::detach() +{ + if (d->ref.load() != 1) { + QCollatorPrivate *x = new QCollatorPrivate; + x->ref.store(1); + x->locale = d->locale; + x->collation = d->collation; + x->collator = 0; + if (!d->ref.deref()) + delete d; + d = x; + } +} + + +/*! + Sets the locale of the collator to \a locale. + */ +void QCollator::setLocale(const QLocale &locale) +{ + if (d->ref.load() != 1) + detach(); + d->clear(); + d->locale = locale; + + init(); +} + +/*! + Returns the locale of the collator. + */ +QLocale QCollator::locale() const +{ + return d->locale; +} + +/*! + \enum QCollator::collation + + This enum can be used to specify an alternate collation algorithm to be used instead + of the default algorithm for the locale. + + Possible values are: + + \value Default Use the default algorithm for the locale + \value Big5Han + \value Dictionary + \value Direct + \value GB2312Han + \value PhoneBook + \value Pinyin + \value Phonetic + \value Reformed + \value Standard + \value Stroke + \value Traditional + \value UniHan +*/ + +/*! + Sets the collation algorithm to be used. + + \sa QCollator::Collation + */ +void QCollator::setCollation(QCollator::Collation collation) +{ + if ((int)collation < 0 || (int)collation >= collationStringsCount) + return; + + if (d->ref.load() != 1) + detach(); + d->clear(); + d->collation = collation; + + init(); +} +/*! + Returns the currently used collation algorithm. + + \sa QCollator::Collation + */ +QCollator::Collation QCollator::collation() const +{ + return d->collation; +} + +/*! + Returns a unique identifer for this collation object. + + This method is helpful to save and restore defined collation + objects. + + \sa fromIdentifier + */ +QString QCollator::identifier() const +{ + QString id = d->locale.bcp47Name(); + if (d->collation != QCollator::Default) { + id += QLatin1String("@collation="); + id += QLatin1String(collationStrings[d->collation]); + } + // this ensures the ID is compatible with ICU + id.replace('-', '_'); + return id; +} + +/*! + Creates a QCollator from a unique identifier and returns it. + + \sa identifier + */ +QCollator QCollator::fromIdentifier(const QString &identifier) +{ + QString localeString = identifier; + QString collationString; + int at = identifier.indexOf(QLatin1Char('@')); + if (at >= 0) { + localeString = identifier.left(at); + collationString = identifier.mid(at + strlen("@collation=")); + } + + QLocale locale(localeString); + Collation collation = Default; + if (!collationString.isEmpty()) { + for (int i = 0; i < collationStringsCount; ++i) { + if (QLatin1String(collationStrings[i]) == collationString) { + collation = Collation(i); + break; + } + } + } + return QCollator(locale, collation); +} + +/*! + \enum QCollator::CasePreference + + This enum can be used to tailor the case preference during collation. + + \value CasePreferenceOff No case preference, use what is the standard for the locale + \value CasePreferenceUpper Sort upper case characters before lower case + \value CasePreferenceLower Sort lower case characters before upper case +*/ + +/*! + Sets the case preference of the collator. + + \sa QCollator::CasePreference + */ +void QCollator::setCasePreference(CasePreference c) +{ + if (d->ref.load() != 1) + detach(); + +#ifdef QT_USE_ICU + UColAttributeValue val = UCOL_OFF; + if (c == QCollator::CasePreferenceUpper) + val = UCOL_UPPER_FIRST; + else if (c == QCollator::CasePreferenceLower) + val = UCOL_LOWER_FIRST; + + UErrorCode status = U_ZERO_ERROR; + ucol_setAttribute(d->collator, UCOL_CASE_FIRST, val, &status); + if (U_FAILURE(status)) + qWarning("ucol_setAttribute: Case First failed: %d", status); +#else + Q_UNUSED(c); +#endif +} + +/*! + Returns case preference of the collator. + + \sa QCollator::CasePreference + */ +QCollator::CasePreference QCollator::casePreference() const +{ +#ifdef QT_USE_ICU + UErrorCode status = U_ZERO_ERROR; + switch (ucol_getAttribute(d->collator, UCOL_CASE_FIRST, &status)) { + case UCOL_UPPER_FIRST: + return QCollator::CasePreferenceUpper; + case UCOL_LOWER_FIRST: + return QCollator::CasePreferenceLower; + case UCOL_OFF: + default: + break; + } +#endif + return QCollator::CasePreferenceOff; +} + +/*! + Enables numeric sorting mode when \a on is set to true. + + This will enable proper sorting of numeric digits, so that e.g. 100 sorts after 99. + + By default this mode is off. + */ +void QCollator::setNumericMode(bool on) +{ + if (d->ref.load() != 1) + detach(); + +#ifdef QT_USE_ICU + UErrorCode status = U_ZERO_ERROR; + ucol_setAttribute(d->collator, UCOL_NUMERIC_COLLATION, on ? UCOL_ON : UCOL_OFF, &status); + if (U_FAILURE(status)) + qWarning("ucol_setAttribute: numeric collation failed: %d", status); +#else + Q_UNUSED(on); +#endif +} + +/*! + Returns true if numeric sorting is enabled, false otherwise. + + \sa setNumericMode + */ +bool QCollator::numericMode() const +{ +#ifdef QT_USE_ICU + UErrorCode status; + if (ucol_getAttribute(d->collator, UCOL_NUMERIC_COLLATION, &status) == UCOL_ON) + return true; +#endif + return false; +} + +/*! + If set to true, punctuation characters and symbols are ignored when determining sort order. + + The default is locale dependent. + */ +void QCollator::setIgnorePunctuation(bool on) +{ + if (d->ref.load() != 1) + detach(); + +#ifdef QT_USE_ICU + UErrorCode status; + ucol_setAttribute(d->collator, UCOL_ALTERNATE_HANDLING, on ? UCOL_SHIFTED : UCOL_NON_IGNORABLE, &status); + if (U_FAILURE(status)) + qWarning("ucol_setAttribute: Alternate handling failed: %d", status); +#else + Q_UNUSED(on); +#endif +} + +/*! + Returns true if punctuation characters and symbols are ignored when determining sort order. + + \sa setIgnorePunctuation + */ +bool QCollator::ignorePunctuation() const +{ +#ifdef QT_USE_ICU + UErrorCode status; + if (ucol_getAttribute(d->collator, UCOL_ALTERNATE_HANDLING, &status) == UCOL_SHIFTED) + return true; +#endif + return false; +} + +/*! + Compares \a s1 with \a s2. Returns -1, 0 or 1 depending on whether \a s1 is + smaller, equal or larger than \a s2. + */ +int QCollator::compare(const QString &s1, const QString &s2) const +{ + return compare(s1.constData(), s1.size(), s2.constData(), s2.size()); +} + +/*! + \overload + + Compares \a s1 with \a s2. Returns -1, 0 or 1 depending on whether \a s1 is + smaller, equal or larger than \a s2. + */ +int QCollator::compare(const QStringRef &s1, const QStringRef &s2) const +{ + return compare(s1.constData(), s1.size(), s2.constData(), s2.size()); +} + +/*! + \overload + + Compares \a s1 with \a s2. \a len1 and \a len2 specify the length of the + QChar arrays pointer to by \a s1 and \a s2. + + Returns -1, 0 or 1 depending on whether \a s1 is smaller, equal or larger than \a s2. + */ +int QCollator::compare(const QChar *s1, int len1, const QChar *s2, int len2) const +{ +#ifdef QT_USE_ICU + const UCollationResult result = + ucol_strcoll(d->collator, (const UChar *)s1, len1, (const UChar *)s2, len2); + return result; +#else + return QString::compare_helper((const QChar *)s1, len1, (const QChar *)s2, len2, Qt::CaseInsensitive); +#endif +} + +/*! + Returns a sortKey for \a string. The sortkey can be used as a placeholder + for the string that can be then sorted using regular strcmp based sorting. + + Creating the sort key is usually somewhat slower, then using the compare() + methods directly. But if the string is compared repeatedly (e.g. when sorting + a whole list of strings), it's usually faster to create the sort keys for each + string and then sort using the keys. + */ +QByteArray QCollator::sortKey(const QString &string) const +{ +#ifdef QT_USE_ICU + QByteArray result(16 + string.size() + (string.size() >> 2), Qt::Uninitialized); + int size = ucol_getSortKey(d->collator, (const UChar *)string.constData(), + string.size(), (uint8_t *)result.data(), result.size()); + if (size > result.size()) { + result.resize(size); + size = ucol_getSortKey(d->collator, (const UChar *)string.constData(), + string.size(), (uint8_t *)result.data(), result.size()); + } + result.truncate(size); + return result; +#else + return string.toLower().toUtf8(); +#endif +} + +static QStringList englishIndexCharacters() +{ + QString chars = QString::fromLatin1("A B C D E F G H I J K L M N O P Q R S T U V W X Y Z"); + return chars.split(QLatin1Char(' '), QString::SkipEmptyParts); +} + +/*! + Returns a string list of primary index characters. This is useful when presenting the + sorted list in a user interface with section headers. +*/ +QStringList QCollator::indexCharacters() const +{ + if (!d->indexCharacters.isEmpty()) + return d->indexCharacters; + +#ifdef QT_USE_ICU + QByteArray id = identifier().toLatin1(); + + UErrorCode status = U_ZERO_ERROR; + UResourceBundle *res = ures_open(NULL, id, &status); + + if (U_FAILURE(status)) { + d->indexCharacters = englishIndexCharacters(); + } else { + + qint32 len = 0; + status = U_ZERO_ERROR; + const UChar *val = ures_getStringByKey(res, "ExemplarCharactersIndex", &len, &status); + if (U_FAILURE(status)) { + d->indexCharacters = englishIndexCharacters(); + } else { + QString chars = QString::fromUtf16(val, len); + chars.remove('['); + chars.remove(']'); + chars.remove('{'); + chars.remove('}'); + d->indexCharacters = chars.split(QLatin1Char(' '), QString::SkipEmptyParts); + } + } + + ures_close(res); +#else + d->indexCharacters = englishIndexCharacters(); +#endif + + return d->indexCharacters; +} + +QT_END_NAMESPACE |