summaryrefslogtreecommitdiffstats
path: root/src/corelib/text/qstaticlatin1stringmatcher.h
blob: d80ebd8547d782ef1335fb8dc725f4330ef1a857 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
// Copyright (C) 2023 The Qt Company Ltd.
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only

#ifndef QSTATICLATIN1STRINGMATCHER_H
#define QSTATICLATIN1STRINGMATCHER_H

#include <functional>
#include <iterator>
#include <limits>

#include <QtCore/q20algorithm.h>
#include <QtCore/qlatin1stringmatcher.h>
#include <QtCore/qstring.h>

QT_BEGIN_NAMESPACE

#ifdef Q_CC_GHS
#  define QT_STATIC_BOYER_MOORE_NOT_SUPPORTED
#else
namespace QtPrivate {
template <class RandomIt1,
          class Hash = std::hash<typename std::iterator_traits<RandomIt1>::value_type>,
          class BinaryPredicate = std::equal_to<>>
class q_boyer_moore_searcher
{
public:
    constexpr q_boyer_moore_searcher(RandomIt1 pat_first, RandomIt1 pat_last) : m_skiptable{}
    {
        const size_t n = std::distance(pat_first, pat_last);
        constexpr auto uchar_max = (std::numeric_limits<uchar>::max)();
        uchar max = n > uchar_max ? uchar_max : uchar(n);
        q20::fill(std::begin(m_skiptable), std::end(m_skiptable), max);
        Hash hf;
        RandomIt1 pattern = std::next(pat_first, n - max);
        while (max--)
            m_skiptable[hf(*pattern++)] = max;
    }

    template <class RandomIt2>
    constexpr auto operator()(RandomIt2 first, RandomIt2 last, RandomIt1 pat_first,
                              RandomIt1 pat_last) const
    {
        struct R
        {
            RandomIt2 begin, end;
        };
        Hash hf;
        BinaryPredicate pred;
        auto pat_length = std::distance(pat_first, pat_last);
        if (pat_length == 0)
            return R{ first, first };

        auto haystack_length = std::distance(first, last);
        if (haystack_length < pat_length)
            return R{ last, last };

        const qsizetype pl_minus_one = qsizetype(pat_length - 1);
        RandomIt2 current = first + pl_minus_one;

        qsizetype skip = 0;
        while (current < last - skip) {
            current += skip;
            skip = m_skiptable[hf(*current)];
            if (!skip) {
                // possible match
                while (skip < pat_length) {
                    if (!pred(hf(*(current - skip)), hf(pat_first[pl_minus_one - skip])))
                        break;
                    skip++;
                }
                if (skip > pl_minus_one) { // we have a match
                    auto match = current + 1 - skip;
                    return R{ match, match + pat_length };
                }

                // If we don't have a match we are a bit inefficient as we only skip by one
                // when we have the non matching char in the string.
                if (m_skiptable[hf(*(current - skip))] == pat_length)
                    skip = pat_length - skip;
                else
                    skip = 1;
            }
        }

        return R{ last, last };
    }

private:
    alignas(16) uchar m_skiptable[256];
};
} // namespace QtPrivate

template <Qt::CaseSensitivity CS, size_t N>
class QStaticLatin1StringMatcher
{
    static_assert(N > 2,
                  "QStaticLatin1StringMatcher makes no sense for finding a single-char pattern");

    QLatin1StringView m_pattern;
    using Hasher = std::conditional_t<CS == Qt::CaseSensitive, QtPrivate::QCaseSensitiveLatin1Hash,
                                      QtPrivate::QCaseInsensitiveLatin1Hash>;
    QtPrivate::q_boyer_moore_searcher<const char *, Hasher> m_searcher;

public:
    explicit constexpr QStaticLatin1StringMatcher(QLatin1StringView patternToMatch) noexcept
        : m_pattern(patternToMatch),
          m_searcher(patternToMatch.begin(), patternToMatch.begin() + N - 1)
    {
    }

    constexpr qsizetype indexIn(QLatin1StringView haystack, qsizetype from = 0) const noexcept
    {
        if (from >= haystack.size())
            return -1;
        const char *begin = haystack.begin() + from;
        const char *end = haystack.end();
        const auto r = m_searcher(begin, end, m_pattern.begin(), m_pattern.end());
        return r.begin == end ? -1 : std::distance(haystack.begin(), r.begin);
    }
};

template <size_t N>
constexpr auto qMakeStaticCaseSensitiveLatin1StringMatcher(const char (&patternToMatch)[N]) noexcept
{
    return QStaticLatin1StringMatcher<Qt::CaseSensitive, N>(
            QLatin1StringView(patternToMatch, qsizetype(N) - 1));
}

template <size_t N>
constexpr auto
qMakeStaticCaseInsensitiveLatin1StringMatcher(const char (&patternToMatch)[N]) noexcept
{
    return QStaticLatin1StringMatcher<Qt::CaseInsensitive, N>(
            QLatin1StringView(patternToMatch, qsizetype(N) - 1));
}
#endif

QT_END_NAMESPACE

#endif // QSTATICLATIN1STRINGMATCHER_H