summaryrefslogtreecommitdiffstats
path: root/src/corelib/text/qstaticlatin1stringmatcher.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/text/qstaticlatin1stringmatcher.h')
-rw-r--r--src/corelib/text/qstaticlatin1stringmatcher.h140
1 files changed, 140 insertions, 0 deletions
diff --git a/src/corelib/text/qstaticlatin1stringmatcher.h b/src/corelib/text/qstaticlatin1stringmatcher.h
new file mode 100644
index 0000000000..d80ebd8547
--- /dev/null
+++ b/src/corelib/text/qstaticlatin1stringmatcher.h
@@ -0,0 +1,140 @@
+// Copyright (C) 2023 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
+
+#ifndef QSTATICLATIN1STRINGMATCHER_H
+#define QSTATICLATIN1STRINGMATCHER_H
+
+#include <functional>
+#include <iterator>
+#include <limits>
+
+#include <QtCore/q20algorithm.h>
+#include <QtCore/qlatin1stringmatcher.h>
+#include <QtCore/qstring.h>
+
+QT_BEGIN_NAMESPACE
+
+#ifdef Q_CC_GHS
+# define QT_STATIC_BOYER_MOORE_NOT_SUPPORTED
+#else
+namespace QtPrivate {
+template <class RandomIt1,
+ class Hash = std::hash<typename std::iterator_traits<RandomIt1>::value_type>,
+ class BinaryPredicate = std::equal_to<>>
+class q_boyer_moore_searcher
+{
+public:
+ constexpr q_boyer_moore_searcher(RandomIt1 pat_first, RandomIt1 pat_last) : m_skiptable{}
+ {
+ const size_t n = std::distance(pat_first, pat_last);
+ constexpr auto uchar_max = (std::numeric_limits<uchar>::max)();
+ uchar max = n > uchar_max ? uchar_max : uchar(n);
+ q20::fill(std::begin(m_skiptable), std::end(m_skiptable), max);
+ Hash hf;
+ RandomIt1 pattern = std::next(pat_first, n - max);
+ while (max--)
+ m_skiptable[hf(*pattern++)] = max;
+ }
+
+ template <class RandomIt2>
+ constexpr auto operator()(RandomIt2 first, RandomIt2 last, RandomIt1 pat_first,
+ RandomIt1 pat_last) const
+ {
+ struct R
+ {
+ RandomIt2 begin, end;
+ };
+ Hash hf;
+ BinaryPredicate pred;
+ auto pat_length = std::distance(pat_first, pat_last);
+ if (pat_length == 0)
+ return R{ first, first };
+
+ auto haystack_length = std::distance(first, last);
+ if (haystack_length < pat_length)
+ return R{ last, last };
+
+ const qsizetype pl_minus_one = qsizetype(pat_length - 1);
+ RandomIt2 current = first + pl_minus_one;
+
+ qsizetype skip = 0;
+ while (current < last - skip) {
+ current += skip;
+ skip = m_skiptable[hf(*current)];
+ if (!skip) {
+ // possible match
+ while (skip < pat_length) {
+ if (!pred(hf(*(current - skip)), hf(pat_first[pl_minus_one - skip])))
+ break;
+ skip++;
+ }
+ if (skip > pl_minus_one) { // we have a match
+ auto match = current + 1 - skip;
+ return R{ match, match + pat_length };
+ }
+
+ // If we don't have a match we are a bit inefficient as we only skip by one
+ // when we have the non matching char in the string.
+ if (m_skiptable[hf(*(current - skip))] == pat_length)
+ skip = pat_length - skip;
+ else
+ skip = 1;
+ }
+ }
+
+ return R{ last, last };
+ }
+
+private:
+ alignas(16) uchar m_skiptable[256];
+};
+} // namespace QtPrivate
+
+template <Qt::CaseSensitivity CS, size_t N>
+class QStaticLatin1StringMatcher
+{
+ static_assert(N > 2,
+ "QStaticLatin1StringMatcher makes no sense for finding a single-char pattern");
+
+ QLatin1StringView m_pattern;
+ using Hasher = std::conditional_t<CS == Qt::CaseSensitive, QtPrivate::QCaseSensitiveLatin1Hash,
+ QtPrivate::QCaseInsensitiveLatin1Hash>;
+ QtPrivate::q_boyer_moore_searcher<const char *, Hasher> m_searcher;
+
+public:
+ explicit constexpr QStaticLatin1StringMatcher(QLatin1StringView patternToMatch) noexcept
+ : m_pattern(patternToMatch),
+ m_searcher(patternToMatch.begin(), patternToMatch.begin() + N - 1)
+ {
+ }
+
+ constexpr qsizetype indexIn(QLatin1StringView haystack, qsizetype from = 0) const noexcept
+ {
+ if (from >= haystack.size())
+ return -1;
+ const char *begin = haystack.begin() + from;
+ const char *end = haystack.end();
+ const auto r = m_searcher(begin, end, m_pattern.begin(), m_pattern.end());
+ return r.begin == end ? -1 : std::distance(haystack.begin(), r.begin);
+ }
+};
+
+template <size_t N>
+constexpr auto qMakeStaticCaseSensitiveLatin1StringMatcher(const char (&patternToMatch)[N]) noexcept
+{
+ return QStaticLatin1StringMatcher<Qt::CaseSensitive, N>(
+ QLatin1StringView(patternToMatch, qsizetype(N) - 1));
+}
+
+template <size_t N>
+constexpr auto
+qMakeStaticCaseInsensitiveLatin1StringMatcher(const char (&patternToMatch)[N]) noexcept
+{
+ return QStaticLatin1StringMatcher<Qt::CaseInsensitive, N>(
+ QLatin1StringView(patternToMatch, qsizetype(N) - 1));
+}
+#endif
+
+QT_END_NAMESPACE
+
+#endif // QSTATICLATIN1STRINGMATCHER_H