diff options
author | Øystein Heskestad <oystein.heskestad@qt.io> | 2022-10-04 17:24:17 +0200 |
---|---|---|
committer | Øystein Heskestad <oystein.heskestad@qt.io> | 2022-12-08 17:56:47 +0100 |
commit | 3fedcd4e4aeb79e432276aaac128910f8de05f25 (patch) | |
tree | a6b7e9dfaa44df39bdc171e516024ad64de23364 /tests/auto/corelib/text/qlatin1stringmatcher | |
parent | bb5d4094e040359d2f74c75b8a25732c1ea89b87 (diff) |
Add Boyer-Moore Latin-1 string searcher with optional case sensitivity
[ChangeLog][QtCore][QString] Added Boyer-Moore Latin-1 string searcher with optional case sensitivity
Task-number: QTBUG-100236
Change-Id: I200a0dac7c8012add1ee02511dba791d233115e0
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'tests/auto/corelib/text/qlatin1stringmatcher')
-rw-r--r-- | tests/auto/corelib/text/qlatin1stringmatcher/CMakeLists.txt | 14 | ||||
-rw-r--r-- | tests/auto/corelib/text/qlatin1stringmatcher/tst_qlatin1stringmatcher.cpp | 306 |
2 files changed, 320 insertions, 0 deletions
diff --git a/tests/auto/corelib/text/qlatin1stringmatcher/CMakeLists.txt b/tests/auto/corelib/text/qlatin1stringmatcher/CMakeLists.txt new file mode 100644 index 0000000000..fb4ef9fd8b --- /dev/null +++ b/tests/auto/corelib/text/qlatin1stringmatcher/CMakeLists.txt @@ -0,0 +1,14 @@ +# Copyright (C) 2022 The Qt Company Ltd. +# SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 + +##################################################################### +## tst_qlatin1sgtringmatcher Test: +##################################################################### + +qt_internal_add_test(tst_qlatin1stringmatcher + SOURCES + tst_qlatin1stringmatcher.cpp +) + +## Scopes: +##################################################################### diff --git a/tests/auto/corelib/text/qlatin1stringmatcher/tst_qlatin1stringmatcher.cpp b/tests/auto/corelib/text/qlatin1stringmatcher/tst_qlatin1stringmatcher.cpp new file mode 100644 index 0000000000..9572466a7b --- /dev/null +++ b/tests/auto/corelib/text/qlatin1stringmatcher/tst_qlatin1stringmatcher.cpp @@ -0,0 +1,306 @@ +// Copyright (C) 2022 The Qt Company Ltd. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 + +#include <QTest> + +#include <QtCore/QLatin1StringMatcher> + +#include <numeric> +#include <string> + +#if QT_CONFIG(cxx11_future) +# include <thread> +#endif + +// COM interface +#if defined(interface) +# undef interface +#endif + +using namespace Qt::Literals::StringLiterals; + +class tst_QLatin1StringMatcher : public QObject +{ + Q_OBJECT + +private slots: + void overloads(); + void interface(); + void indexIn(); + void haystacksWithMoreThan4GiBWork(); +}; + +void tst_QLatin1StringMatcher::overloads() +{ + QLatin1StringView hello = "hello"_L1; + QByteArray hello2B = QByteArrayView(hello).toByteArray().repeated(2); + QLatin1StringView hello2(hello2B); + { + QLatin1StringMatcher m("hello"_L1, Qt::CaseSensitive); + QCOMPARE(m.pattern(), "hello"_L1); + QCOMPARE(m.indexIn("hello"_L1), 0); + QCOMPARE(m.indexIn("Hello"_L1), -1); + QCOMPARE(m.indexIn("Hellohello"_L1), 5); + QCOMPARE(m.indexIn("helloHello"_L1), 0); + QCOMPARE(m.indexIn("helloHello"_L1, 1), -1); + } + { + QLatin1StringMatcher m("Hello"_L1, Qt::CaseSensitive); + QCOMPARE(m.pattern(), "Hello"_L1); + QCOMPARE(m.indexIn("hello"_L1), -1); + QCOMPARE(m.indexIn("Hello"_L1), 0); + QCOMPARE(m.indexIn("Hellohello"_L1), 0); + QCOMPARE(m.indexIn("helloHello"_L1), 5); + QCOMPARE(m.indexIn("helloHello"_L1, 6), -1); + } + { + QLatin1StringMatcher m("hello"_L1, Qt::CaseInsensitive); + QCOMPARE(m.pattern(), "hello"_L1); + QCOMPARE(m.indexIn("hello"_L1), 0); + QCOMPARE(m.indexIn("Hello"_L1), 0); + QCOMPARE(m.indexIn("Hellohello"_L1), 0); + QCOMPARE(m.indexIn("helloHello"_L1), 0); + QCOMPARE(m.indexIn("helloHello"_L1, 1), 5); + QCOMPARE(m.indexIn("helloHello"_L1, 6), -1); + } + { + QLatin1StringMatcher m("Hello"_L1, Qt::CaseInsensitive); + QCOMPARE(m.pattern(), "Hello"_L1); + QCOMPARE(m.indexIn("hello"_L1), 0); + QCOMPARE(m.indexIn("Hello"_L1), 0); + QCOMPARE(m.indexIn("Hellohello"_L1), 0); + QCOMPARE(m.indexIn("helloHello"_L1), 0); + QCOMPARE(m.indexIn("helloHello"_L1, 1), 5); + QCOMPARE(m.indexIn("helloHello"_L1, 6), -1); + } + { + QLatin1StringMatcher m(hello, Qt::CaseSensitive); + QCOMPARE(m.pattern(), "hello"_L1); + QCOMPARE(m.indexIn(hello), 0); + QCOMPARE(m.indexIn(hello, 1), -1); + QCOMPARE(m.indexIn(hello2, 1), hello.size()); + QCOMPARE(m.indexIn(hello2, 6), -1); + } +} + +void tst_QLatin1StringMatcher::interface() +{ + QLatin1StringView needle = "abc123"_L1; + QByteArray haystackT(500, 'a'); + haystackT.insert(6, "123"); + haystackT.insert(31, "abc"); + haystackT.insert(42, "abc123"); + haystackT.insert(84, "abc123"); + QLatin1StringView haystack(haystackT); + + QLatin1StringMatcher matcher1; + + matcher1 = QLatin1StringMatcher(needle, Qt::CaseSensitive); + QLatin1StringMatcher matcher2; + matcher2.setPattern(needle); + + QLatin1StringMatcher matcher3 = QLatin1StringMatcher(needle, Qt::CaseSensitive); + QLatin1StringMatcher matcher4; + matcher4 = matcher3; + + QCOMPARE(matcher1.indexIn(haystack), 42); + QCOMPARE(matcher2.indexIn(haystack), 42); + QCOMPARE(matcher3.indexIn(haystack), 42); + QCOMPARE(matcher4.indexIn(haystack), 42); + + QCOMPARE(matcher1.indexIn(haystack, 43), 84); + QCOMPARE(matcher1.indexIn(haystack, 85), -1); + + QLatin1StringMatcher matcher5("123"_L1, Qt::CaseSensitive); + QCOMPARE(matcher5.indexIn(haystack), 6); + + matcher5 = QLatin1StringMatcher("abc"_L1, Qt::CaseSensitive); + QCOMPARE(matcher5.indexIn(haystack), 31); + + matcher5.setPattern(matcher4.pattern()); + QCOMPARE(matcher5.indexIn(haystack), 42); + + QLatin1StringMatcher matcher6 = matcher5; + QCOMPARE(matcher6.indexIn(haystack), 42); + + QLatin1StringMatcher matcher7 = std::move(matcher5); + QCOMPARE(matcher7.indexIn(haystack), 42); + + matcher1.setPattern("123"_L1); + matcher7 = std::move(matcher1); + QCOMPARE(matcher7.indexIn(haystack), 6); +} + +#define LONG_STRING__32 "abcdefghijklmnopqrstuvwxyz012345" +#define LONG_STRING__64 LONG_STRING__32 LONG_STRING__32 +#define LONG_STRING_128 LONG_STRING__64 LONG_STRING__64 +#define LONG_STRING_256 LONG_STRING_128 LONG_STRING_128 +#define LONG_STRING_512 LONG_STRING_256 LONG_STRING_256 + +void tst_QLatin1StringMatcher::indexIn() +{ + const char p_data[] = { 0x0, 0x0, 0x1 }; + QLatin1StringView pattern(p_data, sizeof(p_data)); + + QByteArray haystackT(8, '\0'); + haystackT[7] = 0x1; + QLatin1StringView haystack(haystackT); + + QLatin1StringMatcher matcher; + + matcher = QLatin1StringMatcher(pattern, Qt::CaseSensitive); + QCOMPARE(matcher.indexIn(haystack, 0), 5); + QCOMPARE(matcher.indexIn(haystack, 1), 5); + QCOMPARE(matcher.indexIn(haystack, 2), 5); + + matcher.setPattern(pattern); + QCOMPARE(matcher.indexIn(haystack, 0), 5); + QCOMPARE(matcher.indexIn(haystack, 1), 5); + QCOMPARE(matcher.indexIn(haystack, 2), 5); + + std::array<char, 256> allChars; + for (int i = 0; i < 256; ++i) + allChars[i] = char(i); + + matcher = QLatin1StringMatcher(QLatin1StringView(allChars), Qt::CaseSensitive); + haystackT = LONG_STRING__32 "x"; + haystackT += matcher.pattern(); + haystack = QLatin1StringView(haystackT); + QCOMPARE(matcher.indexIn(haystack, 0), 33); + QCOMPARE(matcher.indexIn(haystack, 1), 33); + QCOMPARE(matcher.indexIn(haystack, 2), 33); + QCOMPARE(matcher.indexIn(haystack, 33), 33); + QCOMPARE(matcher.indexIn(haystack, 34), -1); + + matcher = QLatin1StringMatcher(QLatin1StringView(LONG_STRING_256), Qt::CaseSensitive); + haystackT = QByteArray(LONG_STRING__32 "x"); + haystackT += matcher.pattern(); + haystackT += QByteArrayView("Just junk at the end"); + haystack = QLatin1StringView(haystackT); + QCOMPARE(matcher.indexIn(haystack, 0), 33); + QCOMPARE(matcher.indexIn(haystack, 1), 33); + QCOMPARE(matcher.indexIn(haystack, 2), 33); + QCOMPARE(matcher.indexIn(haystack, 33), 33); + QCOMPARE(matcher.indexIn(haystack, 34), -1); + matcher.setCaseSensitivity(Qt::CaseInsensitive); + QCOMPARE(matcher.indexIn(haystack, 0), 33); + QCOMPARE(matcher.indexIn(haystack, 1), 33); + QCOMPARE(matcher.indexIn(haystack, 2), 33); + QCOMPARE(matcher.indexIn(haystack, 33), 33); + QCOMPARE(matcher.indexIn(haystack, 34), -1); + + matcher = QLatin1StringMatcher(QLatin1StringView(LONG_STRING_512), Qt::CaseInsensitive); + haystackT = QByteArray(LONG_STRING__32 "x"); + haystackT += matcher.pattern(); + haystackT += QByteArrayView("Just junk at the end"); + haystack = QLatin1StringView(haystackT); + QCOMPARE(matcher.indexIn(haystack, 0), 33); + QCOMPARE(matcher.indexIn(haystack, 1), 33); + QCOMPARE(matcher.indexIn(haystack, 2), 33); + QCOMPARE(matcher.indexIn(haystack, 33), 33); + QCOMPARE(matcher.indexIn(haystack, 34), -1); + matcher.setCaseSensitivity(Qt::CaseSensitive); + QCOMPARE(matcher.indexIn(haystack, 0), 33); + QCOMPARE(matcher.indexIn(haystack, 1), 33); + QCOMPARE(matcher.indexIn(haystack, 2), 33); + QCOMPARE(matcher.indexIn(haystack, 33), 33); + QCOMPARE(matcher.indexIn(haystack, 34), -1); + + matcher = QLatin1StringMatcher(QLatin1StringView(""), Qt::CaseSensitive); + haystackT = QByteArray(LONG_STRING__32 "x"); + haystack = QLatin1StringView(haystackT); + QCOMPARE(matcher.indexIn(haystack, 0), 0); + QCOMPARE(matcher.indexIn(haystack, 1), 1); + QCOMPARE(matcher.indexIn(haystack, 2), 2); + QCOMPARE(matcher.indexIn(haystack, 33), 33); + + matcher = QLatin1StringMatcher(QLatin1StringView(""), Qt::CaseInsensitive); + haystackT = QByteArray(LONG_STRING__32 "x"); + haystack = QLatin1StringView(haystackT); + QCOMPARE(matcher.indexIn(haystack, 0), 0); + QCOMPARE(matcher.indexIn(haystack, 1), 1); + QCOMPARE(matcher.indexIn(haystack, 2), 2); + QCOMPARE(matcher.indexIn(haystack, 33), 33); + + matcher = QLatin1StringMatcher(QLatin1StringView("m\xF8"), Qt::CaseInsensitive); + haystackT = QByteArray("M\xF8m\xF8"); + haystack = QLatin1StringView(haystackT); + QCOMPARE(matcher.indexIn(haystack, 0), 0); + QCOMPARE(matcher.indexIn(haystack, 1), 2); + QCOMPARE(matcher.indexIn(haystack, 2), 2); + QCOMPARE(matcher.indexIn(haystack, 3), -1); + matcher.setCaseSensitivity(Qt::CaseSensitive); + QCOMPARE(matcher.indexIn(haystack, 0), 2); + QCOMPARE(matcher.indexIn(haystack, 1), 2); + QCOMPARE(matcher.indexIn(haystack, 2), 2); + QCOMPARE(matcher.indexIn(haystack, 3), -1); +} + +void tst_QLatin1StringMatcher::haystacksWithMoreThan4GiBWork() +{ +#if QT_POINTER_SIZE > 4 + // use a large needle to trigger long skips in the Boyer-Moore algorithm + // (to speed up the test) + constexpr std::string_view needle = LONG_STRING_256; + + // + // GIVEN: a haystack with more than 4 GiB of data + // + + // don't use QByteArray because freeSpaceAtEnd() may break reserve() + // semantics and a realloc is the last thing we need here + std::string large; + QElapsedTimer timer; + timer.start(); + constexpr size_t GiB = 1024 * 1024 * 1024; + constexpr size_t BaseSize = 4 * GiB + 1; + try { + large.reserve(BaseSize + needle.size()); + large.resize(BaseSize, '\0'); + large.append(needle); + } catch (const std::bad_alloc &) { + QSKIP("Could not allocate 4GiB plus a couple hundred bytes of RAM."); + } + QCOMPARE(large.size(), BaseSize + needle.size()); + qDebug("created dataset in %lld ms", timer.elapsed()); + +# if QT_CONFIG(cxx11_future) + using MaybeThread = std::thread; +# else + struct MaybeThread + { + std::function<void()> func; + void join() { func(); } + }; +# endif + + // + // WHEN: trying to match an occurrence past the 4GiB mark + // + + qsizetype dynamicResult; + + auto t = MaybeThread{ [&] { + QLatin1StringMatcher m(QLatin1StringView(needle), Qt::CaseSensitive); + dynamicResult = m.indexIn(QLatin1StringView(large)); + } }; + t.join(); + + // + // THEN: the result index is not trucated + // + + QCOMPARE(dynamicResult, qsizetype(BaseSize)); +#else + QSKIP("This test is 64-bit only."); +#endif +} + +#undef LONG_STRING_512 +#undef LONG_STRING_256 +#undef LONG_STRING_128 +#undef LONG_STRING__64 +#undef LONG_STRING__32 + +QTEST_APPLESS_MAIN(tst_QLatin1StringMatcher) +#include "tst_qlatin1stringmatcher.moc" |