summaryrefslogtreecommitdiffstats
path: root/src/corelib
diff options
context:
space:
mode:
authorMårten Nordheim <marten.nordheim@qt.io>2021-04-30 13:24:19 +0200
committerMårten Nordheim <marten.nordheim@qt.io>2021-06-11 23:30:08 +0200
commitb24e689cb561d81745ff47a5ce4595b809923914 (patch)
treecd1880b963222bffd350204ec62bee5bd39da511 /src/corelib
parentde2c3ccd49cb89e0c6912da3b03705a36ef03946 (diff)
QLatin1String: optimize indexOf
Some time ago I wanted to use QStringTokenizer with a QL1S haystack and needle to optimize a particular bottleneck. Only to realize the new bottleneck was more expensive (continuously converting both the strings to QString). Change-Id: Ica86db0043c839c2336a3c3886ffbe3875659b5b Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
Diffstat (limited to 'src/corelib')
-rw-r--r--src/corelib/.prev_configure.cmake26
-rw-r--r--src/corelib/configure.cmake26
-rw-r--r--src/corelib/configure.json23
-rw-r--r--src/corelib/global/qconfig-bootstrapped.h1
-rw-r--r--src/corelib/text/qstring.cpp111
5 files changed, 163 insertions, 24 deletions
diff --git a/src/corelib/.prev_configure.cmake b/src/corelib/.prev_configure.cmake
index 893acda953..18d9120aae 100644
--- a/src/corelib/.prev_configure.cmake
+++ b/src/corelib/.prev_configure.cmake
@@ -150,6 +150,28 @@ std::mt19937 mt(0);
}
")
+# cxx17_bm_searcher
+qt_config_compile_test(cxx17_bm_searcher
+ LABEL "C++17 boyer_moore_searcher"
+ CODE
+"#include <algorithm>
+#include <functional>
+
+int main(void)
+{
+ /* BEGIN TEST: */
+const char haystack[] = \"hello\";
+const char needle[] = \"e\";
+const auto it =
+std::search(haystack + 0, haystack + std::size(haystack),
+std::boyer_moore_searcher(needle + 0, needle + std::size(needle)));
+(void)it;
+ /* END TEST: */
+ return 0;
+}
+"# FIXME: qmake: CONFIG += c++17
+)
+
# cxx17_filesystem
qt_config_compile_test(cxx17_filesystem
LABEL "C++17 <filesystem>"
@@ -513,6 +535,10 @@ qt_feature("cxx11_future" PUBLIC
LABEL "C++11 <future>"
CONDITION TEST_cxx11_future
)
+qt_feature("cxx17_bm_searcher" PRIVATE
+ LABEL "C++17 boyer_moore_searcher"
+ CONDITION TEST_cxx17_bm_searcher
+)
qt_feature("cxx17_filesystem" PUBLIC
LABEL "C++17 <filesystem>"
CONDITION TEST_cxx17_filesystem
diff --git a/src/corelib/configure.cmake b/src/corelib/configure.cmake
index e82ac8bf93..e363c44b58 100644
--- a/src/corelib/configure.cmake
+++ b/src/corelib/configure.cmake
@@ -156,6 +156,28 @@ std::mt19937 mt(0);
}
")
+# cxx17_bm_searcher
+qt_config_compile_test(cxx17_bm_searcher
+ LABEL "C++17 boyer_moore_searcher"
+ CODE
+"#include <algorithm>
+#include <functional>
+
+int main(void)
+{
+ /* BEGIN TEST: */
+const char haystack[] = \"hello\";
+const char needle[] = \"e\";
+const auto it =
+std::search(haystack + 0, haystack + std::size(haystack),
+std::boyer_moore_searcher(needle + 0, needle + std::size(needle)));
+(void)it;
+ /* END TEST: */
+ return 0;
+}
+"# FIXME: qmake: CONFIG += c++17
+)
+
# cxx17_filesystem
qt_config_compile_test(cxx17_filesystem
LABEL "C++17 <filesystem>"
@@ -519,6 +541,10 @@ qt_feature("cxx11_future" PUBLIC
LABEL "C++11 <future>"
CONDITION TEST_cxx11_future
)
+qt_feature("cxx17_bm_searcher" PRIVATE
+ LABEL "C++17 boyer_moore_searcher"
+ CONDITION TEST_cxx17_bm_searcher
+)
qt_feature("cxx17_filesystem" PUBLIC
LABEL "C++17 <filesystem>"
CONDITION TEST_cxx17_filesystem
diff --git a/src/corelib/configure.json b/src/corelib/configure.json
index 4643531135..c68c0cf4bf 100644
--- a/src/corelib/configure.json
+++ b/src/corelib/configure.json
@@ -282,6 +282,22 @@
"main": "std::mt19937 mt(0);"
}
},
+ "cxx17_bm_searcher": {
+ "label": "C++17 boyer_moore_searcher",
+ "type": "compile",
+ "test": {
+ "include": [ "algorithm", "functional" ],
+ "main": [
+ "const char haystack[] = \"hello\";",
+ "const char needle[] = \"e\";",
+ "const auto it =",
+ "std::search(haystack + 0, haystack + std::size(haystack),",
+ "std::boyer_moore_searcher(needle + 0, needle + std::size(needle)));",
+ "(void)it;"
+ ],
+ "qmake": "CONFIG += c++17"
+ }
+ },
"cxx17_filesystem": {
"label": "C++17 <filesystem>",
"type": "compile",
@@ -543,6 +559,13 @@
"condition": "tests.cxx11_future",
"output": [ "publicFeature" ]
},
+ "cxx17_bm_searcher": {
+ "label": "C++17 boyer_moore_searcher",
+ "condition": "tests.cxx17_bm_searcher",
+ "output": [
+ "privateFeature"
+ ]
+ },
"cxx17_filesystem": {
"label": "C++17 <filesystem>",
"condition": "tests.cxx17_filesystem",
diff --git a/src/corelib/global/qconfig-bootstrapped.h b/src/corelib/global/qconfig-bootstrapped.h
index 42d38d0d04..4d3f88e071 100644
--- a/src/corelib/global/qconfig-bootstrapped.h
+++ b/src/corelib/global/qconfig-bootstrapped.h
@@ -78,6 +78,7 @@
#define QT_FEATURE_cborstreamwriter 1
#define QT_CRYPTOGRAPHICHASH_ONLY_SHA1
#define QT_FEATURE_cxx11_random (__has_include(<random>) ? 1 : -1)
+#define QT_FEATURE_cxx17_bm_searcher -1
#define QT_FEATURE_cxx17_filesystem -1
#define QT_NO_DATASTREAM
#define QT_FEATURE_datestring 1
diff --git a/src/corelib/text/qstring.cpp b/src/corelib/text/qstring.cpp
index 8f3bde843e..89abbcbff3 100644
--- a/src/corelib/text/qstring.cpp
+++ b/src/corelib/text/qstring.cpp
@@ -78,6 +78,11 @@
#include "qstringalgorithms_p.h"
#include "qthreadstorage.h"
+#include "qbytearraymatcher.h" // Helper for comparison of QLatin1String
+
+#include <algorithm>
+#include <functional>
+
#ifdef Q_OS_WIN
# include <qt_windows.h>
#endif
@@ -1204,27 +1209,27 @@ static int ucstrcmp(const QChar *a, size_t alen, const char *b, size_t blen)
return cmp ? cmp : lencmp(alen, blen);
}
+static constexpr uchar latin1Lower[256] = {
+ 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
+ 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
+ 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
+ 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
+ 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
+ 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x5b,0x5c,0x5d,0x5e,0x5f,
+ 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
+ 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
+ 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
+ 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
+ 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
+ 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
+ // 0xd7 (multiplication sign) and 0xdf (sz ligature) complicate life
+ 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
+ 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xd7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xdf,
+ 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
+ 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
+};
static int latin1nicmp(const char *lhsChar, qsizetype lSize, const char *rhsChar, qsizetype rSize)
{
- constexpr uchar latin1Lower[256] = {
- 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
- 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
- 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
- 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
- 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
- 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x5b,0x5c,0x5d,0x5e,0x5f,
- 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
- 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
- 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
- 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
- 0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
- 0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
- // 0xd7 (multiplication sign) and 0xdf (sz ligature) complicate life
- 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
- 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xd7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xdf,
- 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
- 0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff
- };
// We're called with QLatin1String's .data() and .size():
Q_ASSERT(lSize >= 0 && rSize >= 0);
if (!lSize)
@@ -10362,15 +10367,73 @@ qsizetype QtPrivate::findString(QLatin1String haystack, qsizetype from, QStringV
qsizetype QtPrivate::findString(QLatin1String haystack, qsizetype from, QLatin1String needle, Qt::CaseSensitivity cs) noexcept
{
- if (haystack.size() < needle.size())
+ if (from < 0)
+ from += haystack.size();
+ if (from < 0)
+ return -1;
+ qsizetype adjustedSize = haystack.size() - from;
+ if (adjustedSize < needle.size())
return -1;
+ if (needle.size() == 0)
+ return from;
- QVarLengthArray<char16_t> h(haystack.size());
- qt_from_latin1(h.data(), haystack.latin1(), haystack.size());
+ if (cs == Qt::CaseSensitive) {
+ const QByteArrayMatcher matcher(needle.data(), needle.size());
+ return matcher.indexIn(haystack.data(), haystack.size(), from);
+ }
+
+ // If the needle is sufficiently small we simply iteratively search through
+ // the haystack. When the needle is too long we use a boyer-moore searcher
+ // from the standard library, if available. If it is not available then the
+ // QLatin1Strings are converted to QString and compared as such. Though
+ // initialization is slower the boyer-moore search it employs still makes up
+ // for it when haystack and needle are sufficiently long.
+ // The needle size was chosen by testing various lengths using the
+ // qstringtokenizer benchmark with the
+ // "tokenize_qlatin1string_qlatin1string" test.
+#ifdef Q_CC_MSVC
+ const qsizetype threshold = 1;
+#else
+ const qsizetype threshold = 13;
+#endif
+ if (needle.size() <= threshold) {
+ const auto begin = haystack.begin();
+ const auto end = haystack.end() - needle.size() + 1;
+ const uchar needle1 = latin1Lower[uchar(needle[0].toLatin1())];
+ auto ciMatch = [needle1](const char ch) {
+ return latin1Lower[uchar(ch)] == needle1;
+ };
+ const qsizetype nlen1 = needle.size() - 1;
+ for (auto it = std::find_if(begin + from, end, ciMatch); it < end;
+ it = std::find_if(it + 1, end, ciMatch)) {
+ // In this comparison we skip the first character because we know it's a match
+ if (!nlen1 || QLatin1String(it + 1, nlen1).compare(needle.sliced(1), cs) == 0)
+ return std::distance(begin, it);
+ }
+ return -1;
+ }
+
+#if QT_CONFIG(cxx17_bm_searcher)
+ const auto ciHasher = [](char a) { return latin1Lower[uchar(a)]; };
+ const auto ciEqual = [](char a, char b) {
+ return latin1Lower[uchar(a)] == latin1Lower[uchar(b)];
+ };
+ const auto it =
+ std::search(haystack.begin() + from, haystack.end(),
+ std::boyer_moore_searcher(needle.begin(), needle.end(), ciHasher, ciEqual));
+ return it == haystack.end() ? -1 : std::distance(haystack.begin(), it);
+#else
+ QVarLengthArray<char16_t> h(adjustedSize);
+ const auto begin = haystack.end() - adjustedSize;
+ qt_from_latin1(h.data(), begin, adjustedSize);
QVarLengthArray<char16_t> n(needle.size());
qt_from_latin1(n.data(), needle.latin1(), needle.size());
- return QtPrivate::findString(QStringView(reinterpret_cast<const QChar*>(h.constData()), h.size()), from,
- QStringView(reinterpret_cast<const QChar*>(n.constData()), n.size()), cs);
+ qsizetype res = QtPrivate::findString(QStringView(h.constData(), h.size()), 0,
+ QStringView(n.constData(), n.size()), cs);
+ if (res == -1)
+ return -1;
+ return res + std::distance(haystack.begin(), begin);
+#endif
}
qsizetype QtPrivate::lastIndexOf(QStringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs) noexcept