summaryrefslogtreecommitdiffstats
path: root/src/corelib/text/qcollator_icu.cpp
blob: 84f9c515374c3267d311ca633fd417455bbf7b3c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
// Copyright (C) 2020 The Qt Company Ltd.
// Copyright (C) 2013 Aleix Pol Gonzalez <aleixpol@kde.org>
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only

#include "qcollator_p.h"
#include "qlocale_p.h"
#include "qstringlist.h"
#include "qstring.h"

#include <unicode/utypes.h>
#include <unicode/ucol.h>
#include <unicode/ustring.h>
#include <unicode/ures.h>

#include "qdebug.h"

QT_BEGIN_NAMESPACE

void QCollatorPrivate::init()
{
    cleanup();
    if (isC())
        return;

    UErrorCode status = U_ZERO_ERROR;
    QByteArray name = QLocalePrivate::get(locale)->bcp47Name('_');
    collator = ucol_open(name.constData(), &status);
    if (U_FAILURE(status)) {
        qWarning("Could not create collator: %d", status);
        collator = nullptr;
        dirty = false;
        return;
    }

    // enable normalization by default
    ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);

    // The strength attribute in ICU is rather badly documented. Basically UCOL_PRIMARY
    // ignores differences between base characters and accented characters as well as case.
    // So A and A-umlaut would compare equal.
    // UCOL_SECONDARY ignores case differences. UCOL_TERTIARY is the default in most languages
    // and does case sensitive comparison.
    // UCOL_QUATERNARY is used as default in a few languages such as Japanese to take care of some
    // additional differences in those languages.
    UColAttributeValue val = (caseSensitivity == Qt::CaseSensitive)
        ? UCOL_DEFAULT_STRENGTH : UCOL_SECONDARY;

    status = U_ZERO_ERROR;
    ucol_setAttribute(collator, UCOL_STRENGTH, val, &status);
    if (U_FAILURE(status))
        qWarning("ucol_setAttribute: Case First failed: %d", status);

    status = U_ZERO_ERROR;
    ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, numericMode ? UCOL_ON : UCOL_OFF, &status);
    if (U_FAILURE(status))
        qWarning("ucol_setAttribute: numeric collation failed: %d", status);

    status = U_ZERO_ERROR;
    ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING,
                      ignorePunctuation ? UCOL_SHIFTED : UCOL_NON_IGNORABLE, &status);
    if (U_FAILURE(status))
        qWarning("ucol_setAttribute: Alternate handling failed: %d", status);

    dirty = false;
}

void QCollatorPrivate::cleanup()
{
    if (collator)
        ucol_close(collator);
    collator = nullptr;
}

int QCollator::compare(QStringView s1, QStringView s2) const
{
    if (!s1.size())
        return s2.size() ? -1 : 0;
    if (!s2.size())
        return +1;

    d->ensureInitialized();

    if (d->collator) {
        // truncating sizes (QTBUG-105038)
        return ucol_strcoll(d->collator,
                            reinterpret_cast<const UChar *>(s1.data()), s1.size(),
                            reinterpret_cast<const UChar *>(s2.data()), s2.size());
    }

    return QtPrivate::compareStrings(s1, s2, d->caseSensitivity);
}

QCollatorSortKey QCollator::sortKey(const QString &string) const
{
    d->ensureInitialized();

    if (d->isC())
        return QCollatorSortKey(new QCollatorSortKeyPrivate(string.toUtf8()));

    if (d->collator) {
        QByteArray result(16 + string.size() + (string.size() >> 2), Qt::Uninitialized);
        // truncating sizes (QTBUG-105038)
        int size = ucol_getSortKey(d->collator, (const UChar *)string.constData(),
                                   string.size(), (uint8_t *)result.data(), result.size());
        if (size > result.size()) {
            result.resize(size);
            size = ucol_getSortKey(d->collator, (const UChar *)string.constData(),
                                   string.size(), (uint8_t *)result.data(), result.size());
        }
        result.truncate(size);
        return QCollatorSortKey(new QCollatorSortKeyPrivate(std::move(result)));
    }

    return QCollatorSortKey(new QCollatorSortKeyPrivate(QByteArray()));
}

int QCollatorSortKey::compare(const QCollatorSortKey &otherKey) const
{
    return qstrcmp(d->m_key, otherKey.d->m_key);
}

QT_END_NAMESPACE