summaryrefslogtreecommitdiffstats
path: root/src/corelib
diff options
context:
space:
mode:
authorØystein Heskestad <oystein.heskestad@qt.io>2022-11-22 17:48:45 +0100
committerØystein Heskestad <oystein.heskestad@qt.io>2022-12-02 11:35:49 +0100
commitb977ae371a753a82e1d0bb32c5b62099da663721 (patch)
tree895d517f35c5a961781c156cf8c23857d688de6c /src/corelib
parent724329b79ea8bf00cc4b393fa33d91d477b35497 (diff)
Add In-place utf-8 case-insensitive comparisons
Also add optimizations for more string comparisons and add tests and benchmarks. [ChangeLog][QtCore][QString] Added utf-8 case-insensitive comparisons Fixes: QTBUG-100235 Change-Id: I7c0809c6d80c00e9a5d0e8ac3ebb045cf7004a30 Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/corelib')
-rw-r--r--src/corelib/text/qstring.cpp13
-rw-r--r--src/corelib/text/qstring.h13
-rw-r--r--src/corelib/text/qstringconverter.cpp65
-rw-r--r--src/corelib/text/qstringconverter_p.h8
-rw-r--r--src/corelib/text/qstringview.h16
-rw-r--r--src/corelib/text/qutf8stringview.h11
6 files changed, 111 insertions, 15 deletions
diff --git a/src/corelib/text/qstring.cpp b/src/corelib/text/qstring.cpp
index ca949b9b32..3ae6404ed4 100644
--- a/src/corelib/text/qstring.cpp
+++ b/src/corelib/text/qstring.cpp
@@ -1482,8 +1482,7 @@ bool QtPrivate::equalStrings(QStringView lhs, QBasicUtf8StringView<false> rhs) n
bool QtPrivate::equalStrings(QLatin1StringView lhs, QBasicUtf8StringView<false> rhs) noexcept
{
- QString r = rhs.toString();
- return QtPrivate::equalStrings(lhs, r); // ### optimize!
+ return QUtf8::compareUtf8(QByteArrayView(rhs), lhs) == 0;
}
bool QtPrivate::equalStrings(QBasicUtf8StringView<false> lhs, QLatin1StringView rhs) noexcept
@@ -1612,7 +1611,7 @@ int QtPrivate::compareStrings(QLatin1StringView lhs, QLatin1StringView rhs, Qt::
*/
int QtPrivate::compareStrings(QLatin1StringView lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
{
- return compareStrings(lhs, rhs.toString(), cs); // ### optimize!
+ return -QUtf8::compareUtf8(QByteArrayView(rhs), lhs, cs);
}
/*!
@@ -1647,13 +1646,7 @@ int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QLatin1StringView
*/
int QtPrivate::compareStrings(QBasicUtf8StringView<false> lhs, QBasicUtf8StringView<false> rhs, Qt::CaseSensitivity cs) noexcept
{
- if (lhs.isEmpty())
- return lencmp(0, rhs.size());
- if (cs == Qt::CaseInsensitive)
- return compareStrings(lhs.toString(), rhs.toString(), cs); // ### optimize!
- const auto l = std::min(lhs.size(), rhs.size());
- int r = memcmp(lhs.data(), rhs.data(), l);
- return r ? r : lencmp(lhs.size(), rhs.size());
+ return QUtf8::compareUtf8(QByteArrayView(lhs), QByteArrayView(rhs), cs);
}
int QAnyStringView::compare(QAnyStringView lhs, QAnyStringView rhs, Qt::CaseSensitivity cs) noexcept
diff --git a/src/corelib/text/qstring.h b/src/corelib/text/qstring.h
index d8d8cd3425..32847e30f6 100644
--- a/src/corelib/text/qstring.h
+++ b/src/corelib/text/qstring.h
@@ -119,6 +119,12 @@ public:
{ return isEmpty() ? -1 : front() == c ? int(size() > 1) : uchar(m_data[0]) - c.unicode(); }
[[nodiscard]] int compare(QChar c, Qt::CaseSensitivity cs) const noexcept
{ return QtPrivate::compareStrings(*this, QStringView(&c, 1), cs); }
+ template<bool UseChar8T>
+ [[nodiscard]] int compare(QBasicUtf8StringView<UseChar8T> other,
+ Qt::CaseSensitivity cs = Qt::CaseSensitive) const noexcept
+ {
+ return QtPrivate::compareStrings(*this, other, cs);
+ }
[[nodiscard]] bool startsWith(QStringView s, Qt::CaseSensitivity cs = Qt::CaseSensitive) const noexcept
{ return QtPrivate::startsWith(*this, s, cs); }
@@ -1230,6 +1236,13 @@ QString QBasicUtf8StringView<UseChar8T>::toString() const
return QString::fromUtf8(data(), size());
}
+template<bool UseChar8T>
+[[nodiscard]] int QBasicUtf8StringView<UseChar8T>::compare(QLatin1StringView other,
+ Qt::CaseSensitivity cs) const noexcept
+{
+ return QtPrivate::compareStrings(*this, other, cs);
+}
+
//
// QAnyStringView inline members that require QString:
//
diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp
index bdba20de3a..04bd8e9733 100644
--- a/src/corelib/text/qstringconverter.cpp
+++ b/src/corelib/text/qstringconverter.cpp
@@ -820,7 +820,7 @@ QUtf8::ValidUtf8Result QUtf8::isValidUtf8(QByteArrayView in)
return { true, isValidAscii };
}
-int QUtf8::compareUtf8(QByteArrayView utf8, QStringView utf16) noexcept
+int QUtf8::compareUtf8(QByteArrayView utf8, QStringView utf16, Qt::CaseSensitivity cs) noexcept
{
auto src1 = reinterpret_cast<const qchar8_t *>(utf8.data());
auto end1 = src1 + utf8.size();
@@ -847,7 +847,10 @@ int QUtf8::compareUtf8(QByteArrayView utf8, QStringView utf16) noexcept
if (QChar::isHighSurrogate(uc2) && src2 < end2 && QChar::isLowSurrogate(*src2))
uc2 = QChar::surrogateToUcs4(uc2, *src2++);
}
-
+ if (cs == Qt::CaseInsensitive) {
+ uc1 = QChar::toCaseFolded(uc1);
+ uc2 = QChar::toCaseFolded(uc2);
+ }
if (uc1 != uc2)
return int(uc1) - int(uc2);
}
@@ -857,7 +860,7 @@ int QUtf8::compareUtf8(QByteArrayView utf8, QStringView utf16) noexcept
return (end1 > src1) - int(end2 > src2);
}
-int QUtf8::compareUtf8(QByteArrayView utf8, QLatin1StringView s)
+int QUtf8::compareUtf8(QByteArrayView utf8, QLatin1StringView s, Qt::CaseSensitivity cs)
{
char32_t uc1 = QChar::Null;
auto src1 = reinterpret_cast<const uchar *>(utf8.data());
@@ -875,6 +878,62 @@ int QUtf8::compareUtf8(QByteArrayView utf8, QLatin1StringView s)
}
char32_t uc2 = *src2++;
+ if (cs == Qt::CaseInsensitive) {
+ uc1 = QChar::toCaseFolded(uc1);
+ uc2 = QChar::toCaseFolded(uc2);
+ }
+ if (uc1 != uc2)
+ return int(uc1) - int(uc2);
+ }
+
+ // the shorter string sorts first
+ return (end1 > src1) - (end2 > src2);
+}
+
+static inline int lencmp(qsizetype lhs, qsizetype rhs) noexcept
+{
+ return lhs == rhs ? 0 :
+ lhs > rhs ? 1 :
+ /* else */ -1 ;
+}
+
+int QUtf8::compareUtf8(QByteArrayView lhs, QByteArrayView rhs, Qt::CaseSensitivity cs) noexcept
+{
+ if (lhs.isEmpty())
+ return lencmp(0, rhs.size());
+
+ if (cs == Qt::CaseSensitive) {
+ const auto l = std::min(lhs.size(), rhs.size());
+ int r = memcmp(lhs.data(), rhs.data(), l);
+ return r ? r : lencmp(lhs.size(), rhs.size());
+ }
+
+ char32_t uc1 = QChar::Null;
+ auto src1 = reinterpret_cast<const uchar *>(lhs.data());
+ auto end1 = src1 + lhs.size();
+ char32_t uc2 = QChar::Null;
+ auto src2 = reinterpret_cast<const uchar *>(rhs.data());
+ auto end2 = src2 + rhs.size();
+
+ while (src1 < end1 && src2 < end2) {
+ uchar b = *src1++;
+ char32_t *output = &uc1;
+ int res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, output, src1, end1);
+ if (res < 0) {
+ // decoding error
+ uc1 = QChar::ReplacementCharacter;
+ }
+
+ b = *src2++;
+ output = &uc2;
+ res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, output, src2, end2);
+ if (res < 0) {
+ // decoding error
+ uc2 = QChar::ReplacementCharacter;
+ }
+
+ uc1 = QChar::toCaseFolded(uc1);
+ uc2 = QChar::toCaseFolded(uc2);
if (uc1 != uc2)
return int(uc1) - int(uc2);
}
diff --git a/src/corelib/text/qstringconverter_p.h b/src/corelib/text/qstringconverter_p.h
index 26be8b713c..95d1eea072 100644
--- a/src/corelib/text/qstringconverter_p.h
+++ b/src/corelib/text/qstringconverter_p.h
@@ -276,8 +276,12 @@ struct QUtf8
bool isValidAscii;
};
static ValidUtf8Result isValidUtf8(QByteArrayView in);
- static int compareUtf8(QByteArrayView utf8, QStringView utf16) noexcept;
- static int compareUtf8(QByteArrayView utf8, QLatin1StringView s);
+ static int compareUtf8(QByteArrayView utf8, QStringView utf16,
+ Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
+ static int compareUtf8(QByteArrayView utf8, QLatin1StringView s,
+ Qt::CaseSensitivity cs = Qt::CaseSensitive);
+ static int compareUtf8(QByteArrayView lhs, QByteArrayView rhs,
+ Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
};
struct QUtf16
diff --git a/src/corelib/text/qstringview.h b/src/corelib/text/qstringview.h
index 2015927f19..95b089d565 100644
--- a/src/corelib/text/qstringview.h
+++ b/src/corelib/text/qstringview.h
@@ -8,6 +8,7 @@
#include <QtCore/qbytearray.h>
#include <QtCore/qstringliteral.h>
#include <QtCore/qstringalgorithms.h>
+#include <QtCore/qutf8stringview.h>
#include <string>
@@ -254,6 +255,12 @@ public:
[[nodiscard]] int compare(QStringView other, Qt::CaseSensitivity cs = Qt::CaseSensitive) const noexcept
{ return QtPrivate::compareStrings(*this, other, cs); }
[[nodiscard]] inline int compare(QLatin1StringView other, Qt::CaseSensitivity cs = Qt::CaseSensitive) const noexcept;
+ template<bool UseChar8T>
+ [[nodiscard]] int compare(QBasicUtf8StringView<UseChar8T> other,
+ Qt::CaseSensitivity cs = Qt::CaseSensitive) const noexcept
+ {
+ return QtPrivate::compareStrings(*this, other, cs);
+ }
[[nodiscard]] constexpr int compare(QChar c) const noexcept
{ return size() >= 1 ? compare_single_char_helper(*utf16() - c.unicode()) : -1; }
[[nodiscard]] int compare(QChar c, Qt::CaseSensitivity cs) const noexcept
@@ -449,6 +456,15 @@ inline QStringView qToStringViewIgnoringNull(const QStringLike &s) noexcept
R{{char16_t(c), u'\0'}} ;
}
+// QBasicUtf8StringView functions:
+
+template<bool UseChar8T>
+[[nodiscard]] int QBasicUtf8StringView<UseChar8T>::compare(QStringView other,
+ Qt::CaseSensitivity cs) const noexcept
+{
+ return QtPrivate::compareStrings(*this, other, cs);
+}
+
QT_END_NAMESPACE
#endif /* QSTRINGVIEW_H */
diff --git a/src/corelib/text/qutf8stringview.h b/src/corelib/text/qutf8stringview.h
index 3baac9c885..14a8a16e62 100644
--- a/src/corelib/text/qutf8stringview.h
+++ b/src/corelib/text/qutf8stringview.h
@@ -280,6 +280,17 @@ public:
[[nodiscard]] constexpr qsizetype length() const noexcept
{ return size(); }
+ [[nodiscard]] int compare(QBasicUtf8StringView other,
+ Qt::CaseSensitivity cs = Qt::CaseSensitive) const noexcept
+ {
+ return QtPrivate::compareStrings(*this, other, cs);
+ }
+
+ [[nodiscard]] int compare(QStringView other,
+ Qt::CaseSensitivity cs = Qt::CaseSensitive) const noexcept;
+ [[nodiscard]] int compare(QLatin1StringView other,
+ Qt::CaseSensitivity cs = Qt::CaseSensitive) const noexcept;
+
private:
[[nodiscard]] static inline int compare(QBasicUtf8StringView lhs, QBasicUtf8StringView rhs) noexcept
{