diff options
author | Øystein Heskestad <oystein.heskestad@qt.io> | 2022-07-27 13:41:54 +0200 |
---|---|---|
committer | Øystein Heskestad <oystein.heskestad@qt.io> | 2022-08-04 23:12:39 +0200 |
commit | 15368fb31b22bbfc3b41ebac32a0d148d4babca1 (patch) | |
tree | a5d1d174f70094aa14b36d9587e7913b80647a75 /src/corelib/text/qstring.cpp | |
parent | 26a73e1b3127f5e41a73b90121110d3533f4ad60 (diff) |
Add Latin 1 case-insensitive Boyer-Moore searcher
The std::boyer_moore_searcher is buggy for older verions of Microsoft's
STL, and missing in AppleClang's libc++ with an inefficient fall back.
Fixes: QTBUG-100236
Change-Id: Ic3cc916946546d2ef78456cd15e1425d957b989d
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Reviewed-by: Mårten Nordheim <marten.nordheim@qt.io>
Diffstat (limited to 'src/corelib/text/qstring.cpp')
-rw-r--r-- | src/corelib/text/qstring.cpp | 93 |
1 files changed, 71 insertions, 22 deletions
diff --git a/src/corelib/text/qstring.cpp b/src/corelib/text/qstring.cpp index 56d1bdf605..029e3b8325 100644 --- a/src/corelib/text/qstring.cpp +++ b/src/corelib/text/qstring.cpp @@ -1385,6 +1385,71 @@ static int latin1nicmp(const char *lhsChar, qsizetype lSize, const char *rhsChar } return lencmp(lSize, rSize); } + +namespace { + +template<Qt::CaseSensitivity cs> +inline uchar latin1_fold(const uchar c) +{ + return c; +} + +template<> +inline uchar latin1_fold<Qt::CaseSensitivity::CaseInsensitive>(const uchar c) +{ + return latin1Lower[c]; +} + +template<Qt::CaseSensitivity cs> +inline void bm_latin1_init_skiptable(const uchar *cc, qsizetype len, uchar *skiptable) +{ + int l = int(qMin(len, qsizetype(255))); + memset(skiptable, l, 256 * sizeof(uchar)); + cc += len - l; + while (l--) + skiptable[latin1_fold<cs>(*cc++)] = l; +} + +template<Qt::CaseSensitivity cs> +inline qsizetype bm_latin1_find(const uchar *cc, qsizetype l, qsizetype index, const uchar *puc, + qsizetype pl, const uchar *skiptable) +{ + if (pl == 0) + return index > l ? -1 : index; + const qsizetype pl_minus_one = pl - 1; + + const uchar *current = cc + index + pl_minus_one; + const uchar *end = cc + l; + + while (current < end) { + qsizetype skip = skiptable[latin1_fold<cs>(*current)]; + if (!skip) { + // possible match + while (skip < pl) { + if (latin1_fold<cs>(*(current - skip)) != latin1_fold<cs>(puc[pl_minus_one - skip])) + break; + skip++; + } + if (skip > pl_minus_one) // we have a match + return (current - cc) - skip + 1; + + // in case we don't have a match we are a bit inefficient as we only skip by one + // when we have the non matching char in the string. + if (skiptable[latin1_fold<cs>(*(current - skip))] == pl) + skip = pl - skip; + else + skip = 1; + } + if (current > end - skip) + break; + current += skip; + } + + return -1; // not found +} + +} + bool QtPrivate::equalStrings(QStringView lhs, QStringView rhs) noexcept { return ucstreq(lhs.utf16(), lhs.size(), rhs.utf16(), rhs.size()); @@ -10752,28 +10817,12 @@ qsizetype QtPrivate::findString(QLatin1StringView haystack, qsizetype from, QLat } return -1; } - -#ifdef __cpp_lib_boyer_moore_searcher - const auto ciHasher = [](char a) { return latin1Lower[uchar(a)]; }; - const auto ciEqual = [](char a, char b) { - return latin1Lower[uchar(a)] == latin1Lower[uchar(b)]; - }; - const auto it = - std::search(haystack.begin() + from, haystack.end(), - std::boyer_moore_searcher(needle.begin(), needle.end(), ciHasher, ciEqual)); - return it == haystack.end() ? -1 : std::distance(haystack.begin(), it); -#else - QVarLengthArray<char16_t> h(adjustedSize); - const auto begin = haystack.end() - adjustedSize; - qt_from_latin1(h.data(), begin, adjustedSize); - QVarLengthArray<char16_t> n(needle.size()); - qt_from_latin1(n.data(), needle.latin1(), needle.size()); - qsizetype res = QtPrivate::findString(QStringView(h.constData(), h.size()), 0, - QStringView(n.constData(), n.size()), cs); - if (res == -1) - return -1; - return res + std::distance(haystack.begin(), begin); -#endif + uchar skiptable[256]; + bm_latin1_init_skiptable<Qt::CaseSensitivity::CaseInsensitive>( + reinterpret_cast<const unsigned char *>(needle.begin()), needle.size(), skiptable); + return bm_latin1_find<Qt::CaseSensitivity::CaseInsensitive>( + reinterpret_cast<const unsigned char *>(haystack.begin()), haystack.size(), from, + reinterpret_cast<const unsigned char *>(needle.begin()), needle.size(), skiptable); } qsizetype QtPrivate::lastIndexOf(QStringView haystack, qsizetype from, QStringView needle, Qt::CaseSensitivity cs) noexcept |