From d8387fc538e31d3aef9ae046b327ddf42eca3aa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=98ystein=20Heskestad?= Date: Tue, 20 Sep 2022 17:48:11 +0200 Subject: Reduce foldCasing of the needle in Boyer-Moore QString searches Before searching, foldCase the first up to 256 characters, and use this buffer to compare against the haystack. If the needle is larger than the buffer, compare the rest of the needle against the rest of the haystack for every potential match. The buffer is placed on the stack and must be refolded for each search, but this change does not break the API. This is faster than the old implementation, except if the needle is long and it is found near the beginning of the haystack, or if the needle is long and it is not found in a short haystack where few comparisons are done and hence few case foldings were needed in the old implementation. Benchmarking using tst_bench_qstringtokenizer tokenize_qstring_qstring shows an improvement for the the total testcase and usually for each individual test. Fixes: QTBUG-100239 Change-Id: Ie61342eb5c19f32de3c1ba0a51dbb0db503bdf3a Reviewed-by: Thiago Macieira --- .../text/qstringmatcher/tst_qstringmatcher.cpp | 25 +++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'tests/auto/corelib') diff --git a/tests/auto/corelib/text/qstringmatcher/tst_qstringmatcher.cpp b/tests/auto/corelib/text/qstringmatcher/tst_qstringmatcher.cpp index 3da3e4b177..6378ed8f5a 100644 --- a/tests/auto/corelib/text/qstringmatcher/tst_qstringmatcher.cpp +++ b/tests/auto/corelib/text/qstringmatcher/tst_qstringmatcher.cpp @@ -92,7 +92,30 @@ void tst_QStringMatcher::setCaseSensitivity_data() QTest::newRow("overshot") << QString("foo") << QString("baFooz foo bar") << 14 << -1 << (int) Qt::CaseSensitive; QTest::newRow("sensitive") << QString("foo") << QString("baFooz foo bar") << 1 << 7 << (int) Qt::CaseSensitive; - QTest::newRow("insensitive") << QString("foo") << QString("baFooz foo bar") << 1 << 2 << (int) Qt::CaseInsensitive; + QTest::newRow("insensitive-1") + << QString("foo") << QString("baFooz foo bar") << 0 << 2 << (int)Qt::CaseInsensitive; + QTest::newRow("insensitive-2") + << QString("foo") << QString("baFooz foo bar") << 1 << 2 << (int)Qt::CaseInsensitive; + QTest::newRow("insensitive-3") + << QString("foo") << QString("baFooz foo bar") << 4 << 7 << (int)Qt::CaseInsensitive; + QTest::newRow("insensitive-4") + << QString("foogabooga") << QString("baFooGaBooga foogabooga bar") << 1 << 2 + << (int)Qt::CaseInsensitive; + QTest::newRow("insensitive-5") + << QString("foogabooga") << QString("baFooGaBooga foogabooga bar") << 3 << 13 + << (int)Qt::CaseInsensitive; + QTest::newRow("insensitive-6") << QString("foogabooga") << QString("GaBoogaFoogaBooga bar") << 0 + << 7 << (int)Qt::CaseInsensitive; + QTest::newRow("insensitive-7") << QString("foogabooga") << QString("FoGaBoogaFoogaBooga") << 9 + << 9 << (int)Qt::CaseInsensitive; + QTest::newRow("insensitive-8") << QString("foogaBooga") << QString("zzzzaazzffoogaBooga") << 0 + << 9 << (int)Qt::CaseInsensitive; + QString stringOf32("abcdefghijklmnopqrstuvwxyz123456"); + Q_ASSERT(stringOf32.size() == 32); + QString stringOf128 = stringOf32 + stringOf32 + stringOf32 + stringOf32; + QString needle = stringOf128 + stringOf128 + "CAse"; + QString haystack = stringOf128 + stringOf128 + "caSE"; + QTest::newRow("insensitive-9") << needle << haystack << 0 << 0 << (int)Qt::CaseInsensitive; } void tst_QStringMatcher::setCaseSensitivity() -- cgit v1.2.3