summaryrefslogtreecommitdiffstats
path: root/src/corelib/text
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2020-05-22 11:25:36 -0700
committerThiago Macieira <thiago.macieira@intel.com>2020-08-05 21:51:24 -0700
commit45838673df6e64a6fd42570c4e8874c5181f7717 (patch)
tree2ad8addd10efbb69191861d6e4960e20f7839ac3 /src/corelib/text
parent9422b5ebc3592f4687650a84131e736219308b9f (diff)
Implement UTF-16 to UTF-8 case-insensitive compare and make public
Change-Id: Ied637aece2a7427b8a2dfffd16116cf3645c6359 Reviewed-by: Lars Knoll <lars.knoll@qt.io>
Diffstat (limited to 'src/corelib/text')
-rw-r--r--src/corelib/text/qstring.cpp55
-rw-r--r--src/corelib/text/qstringalgorithms.h1
-rw-r--r--src/corelib/text/qstringconverter.cpp2
-rw-r--r--src/corelib/text/qstringconverter_p.h2
4 files changed, 57 insertions, 3 deletions
diff --git a/src/corelib/text/qstring.cpp b/src/corelib/text/qstring.cpp
index 82cdeec1a3..f068c5e94a 100644
--- a/src/corelib/text/qstring.cpp
+++ b/src/corelib/text/qstring.cpp
@@ -1,7 +1,7 @@
/****************************************************************************
**
** Copyright (C) 2020 The Qt Company Ltd.
-** Copyright (C) 2018 Intel Corporation.
+** Copyright (C) 2020 Intel Corporation.
** Copyright (C) 2019 Mail.ru Group.
** Contact: https://www.qt.io/licensing/
**
@@ -871,6 +871,35 @@ static int ucstricmp(const QChar *a, const QChar *ae, const char *b, const char
return 1;
}
+// Case-insensitive comparison between a Unicode string and a UTF-8 string
+static int ucstricmp8(const char *utf8, const char *utf8end, const QChar *utf16, const QChar *utf16end)
+{
+ auto src1 = reinterpret_cast<const uchar *>(utf8);
+ auto end1 = reinterpret_cast<const uchar *>(utf8end);
+ QStringIterator src2(utf16, utf16end);
+
+ while (src1 < end1 && src2.hasNext()) {
+ uint uc1;
+ uint *output = &uc1;
+ uchar b = *src1++;
+ int res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, output, src1, end1);
+ if (res < 0) {
+ // decoding error
+ uc1 = QChar::ReplacementCharacter;
+ } else {
+ uc1 = QChar::toCaseFolded(uc1);
+ }
+
+ uint uc2 = QChar::toCaseFolded(src2.next());
+ int diff = uc1 - uc2; // can't underflow
+ if (diff)
+ return diff;
+ }
+
+ // the shorter string sorts first
+ return (end1 > src1) - int(src2.hasNext());
+}
+
#if defined(__mips_dsp)
// From qstring_mips_dsp_asm.S
extern "C" int qt_ucstrncmp_mips_dsp_asm(const char16_t *a,
@@ -1334,6 +1363,30 @@ int QtPrivate::compareStrings(QLatin1String lhs, QLatin1String rhs, Qt::CaseSens
return qt_compare_strings(lhs, rhs, cs);
}
+/*!
+ \relates QStringView
+ \internal
+ \since 6.0
+ \overload
+
+ Returns an integer that compares to 0 as \a lhs compares to \a rhs.
+
+ If \a cs is Qt::CaseSensitive (the default), the comparison is case-sensitive;
+ otherwise the comparison is case-insensitive.
+
+ Case-sensitive comparison is based exclusively on the numeric values of the
+ decoded Unicode code points and is very fast, but is not what a human would
+ expect. Consider sorting user-visible strings with
+ QString::localeAwareCompare().
+*/
+int QtPrivate::compareStringsUtf8(const char *u8str, qsizetype u8len, QStringView rhs, Qt::CaseSensitivity cs) noexcept
+{
+ if (cs == Qt::CaseSensitive)
+ return QUtf8::compareUtf8(u8str, u8len, rhs.data(), rhs.size());
+ else
+ return ucstricmp8(u8str, u8str + u8len, rhs.begin(), rhs.end());
+}
+
#define REHASH(a) \
if (sl_minus_1 < sizeof(std::size_t) * CHAR_BIT) \
hashHaystack -= std::size_t(a) << sl_minus_1; \
diff --git a/src/corelib/text/qstringalgorithms.h b/src/corelib/text/qstringalgorithms.h
index 4a0f7dce9a..c407c54268 100644
--- a/src/corelib/text/qstringalgorithms.h
+++ b/src/corelib/text/qstringalgorithms.h
@@ -62,6 +62,7 @@ Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QStringV
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QStringView lhs, QLatin1String rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QLatin1String lhs, QStringView rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStrings(QLatin1String lhs, QLatin1String rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
+Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION int compareStringsUtf8(const char *, qsizetype, QStringView rhs, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
Q_REQUIRED_RESULT Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool startsWith(QStringView haystack, QStringView needle, Qt::CaseSensitivity cs = Qt::CaseSensitive) noexcept;
diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp
index 8e314ffd34..2111d22b2f 100644
--- a/src/corelib/text/qstringconverter.cpp
+++ b/src/corelib/text/qstringconverter.cpp
@@ -713,7 +713,7 @@ QUtf8::ValidUtf8Result QUtf8::isValidUtf8(const char *chars, qsizetype len)
return { true, isValidAscii };
}
-int QUtf8::compareUtf8(const char *utf8, qsizetype u8len, const QChar *utf16, qsizetype u16len)
+int QUtf8::compareUtf8(const char *utf8, qsizetype u8len, const QChar *utf16, qsizetype u16len) noexcept
{
uint uc1, uc2;
auto src1 = reinterpret_cast<const uchar *>(utf8);
diff --git a/src/corelib/text/qstringconverter_p.h b/src/corelib/text/qstringconverter_p.h
index 4e1efd3731..3a99a4191a 100644
--- a/src/corelib/text/qstringconverter_p.h
+++ b/src/corelib/text/qstringconverter_p.h
@@ -338,7 +338,7 @@ struct QUtf8
bool isValidAscii;
};
static ValidUtf8Result isValidUtf8(const char *, qsizetype);
- static int compareUtf8(const char *, qsizetype, const QChar *, qsizetype);
+ static int compareUtf8(const char *, qsizetype, const QChar *, qsizetype) noexcept;
static int compareUtf8(const char *, qsizetype, QLatin1String s);
};