1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
|
// Copyright (C) 2020 The Qt Company Ltd.
// Copyright (C) 2013 Aleix Pol Gonzalez <aleixpol@kde.org>
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
#include "qcollator_p.h"
#include "qlocale_p.h"
#include "qstringlist.h"
#include "qstring.h"
#include <unicode/utypes.h>
#include <unicode/ucol.h>
#include <unicode/ustring.h>
#include <unicode/ures.h>
#include "qdebug.h"
QT_BEGIN_NAMESPACE
void QCollatorPrivate::init()
{
cleanup();
if (isC())
return;
UErrorCode status = U_ZERO_ERROR;
QByteArray name = QLocalePrivate::get(locale)->bcp47Name('_');
collator = ucol_open(name.constData(), &status);
if (U_FAILURE(status)) {
qWarning("Could not create collator: %d", status);
collator = nullptr;
dirty = false;
return;
}
// enable normalization by default
ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
// The strength attribute in ICU is rather badly documented. Basically UCOL_PRIMARY
// ignores differences between base characters and accented characters as well as case.
// So A and A-umlaut would compare equal.
// UCOL_SECONDARY ignores case differences. UCOL_TERTIARY is the default in most languages
// and does case sensitive comparison.
// UCOL_QUATERNARY is used as default in a few languages such as Japanese to take care of some
// additional differences in those languages.
UColAttributeValue val = (caseSensitivity == Qt::CaseSensitive)
? UCOL_DEFAULT_STRENGTH : UCOL_SECONDARY;
status = U_ZERO_ERROR;
ucol_setAttribute(collator, UCOL_STRENGTH, val, &status);
if (U_FAILURE(status))
qWarning("ucol_setAttribute: Case First failed: %d", status);
status = U_ZERO_ERROR;
ucol_setAttribute(collator, UCOL_NUMERIC_COLLATION, numericMode ? UCOL_ON : UCOL_OFF, &status);
if (U_FAILURE(status))
qWarning("ucol_setAttribute: numeric collation failed: %d", status);
status = U_ZERO_ERROR;
ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING,
ignorePunctuation ? UCOL_SHIFTED : UCOL_NON_IGNORABLE, &status);
if (U_FAILURE(status))
qWarning("ucol_setAttribute: Alternate handling failed: %d", status);
dirty = false;
}
void QCollatorPrivate::cleanup()
{
if (collator)
ucol_close(collator);
collator = nullptr;
}
int QCollator::compare(QStringView s1, QStringView s2) const
{
if (!s1.size())
return s2.size() ? -1 : 0;
if (!s2.size())
return +1;
d->ensureInitialized();
if (d->collator) {
// truncating sizes (QTBUG-105038)
return ucol_strcoll(d->collator,
reinterpret_cast<const UChar *>(s1.data()), s1.size(),
reinterpret_cast<const UChar *>(s2.data()), s2.size());
}
return QtPrivate::compareStrings(s1, s2, d->caseSensitivity);
}
QCollatorSortKey QCollator::sortKey(const QString &string) const
{
d->ensureInitialized();
if (d->isC())
return QCollatorSortKey(new QCollatorSortKeyPrivate(string.toUtf8()));
if (d->collator) {
QByteArray result(16 + string.size() + (string.size() >> 2), Qt::Uninitialized);
// truncating sizes (QTBUG-105038)
int size = ucol_getSortKey(d->collator, (const UChar *)string.constData(),
string.size(), (uint8_t *)result.data(), result.size());
if (size > result.size()) {
result.resize(size);
size = ucol_getSortKey(d->collator, (const UChar *)string.constData(),
string.size(), (uint8_t *)result.data(), result.size());
}
result.truncate(size);
return QCollatorSortKey(new QCollatorSortKeyPrivate(std::move(result)));
}
return QCollatorSortKey(new QCollatorSortKeyPrivate(QByteArray()));
}
int QCollatorSortKey::compare(const QCollatorSortKey &otherKey) const
{
return qstrcmp(d->m_key, otherKey.d->m_key);
}
QT_END_NAMESPACE
|