diff options
Diffstat (limited to 'src/corelib/text/qlatin1stringmatcher.cpp')
-rw-r--r-- | src/corelib/text/qlatin1stringmatcher.cpp | 233 |
1 files changed, 233 insertions, 0 deletions
diff --git a/src/corelib/text/qlatin1stringmatcher.cpp b/src/corelib/text/qlatin1stringmatcher.cpp new file mode 100644 index 0000000000..9036048fff --- /dev/null +++ b/src/corelib/text/qlatin1stringmatcher.cpp @@ -0,0 +1,233 @@ +// Copyright (C) 2022 The Qt Company Ltd. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only + +#include "qlatin1stringmatcher.h" +#include <limits.h> + +QT_BEGIN_NAMESPACE + +/*! \class QLatin1StringMatcher + \inmodule QtCore + \brief Optimized search for substring in Latin-1 text. + + A QLatin1StringMatcher can search for one QLatin1StringView + as a substring of another, either ignoring case or taking it into + account. + + \since 6.5 + \ingroup tools + \ingroup string-processing + + This class is useful when you have a Latin-1 encoded string that + you want to repeatedly search for in some QLatin1StringViews + (perhaps in a loop), or when you want to search for all + instances of it in a given QLatin1StringView. Using a matcher + object and indexIn() is faster than matching a plain + QLatin1StringView with QLatin1StringView::indexOf() if repeated + matching takes place. This class offers no benefit if you are + doing one-off matches. The string to be searched for must not + be destroyed or changed before the matcher object is destroyed, + as the matcher accesses the string when searching for it. + + Create a QLatin1StringMatcher for the QLatin1StringView + you want to search for and the case sensitivity. Then call + indexIn() with the QLatin1StringView that you want to search + within. + + \sa QLatin1StringView, QStringMatcher, QByteArrayMatcher +*/ + +/*! + Construct an empty Latin-1 string matcher. + This will match at each position in any string. + \sa setPattern(), setCaseSensitivity(), indexIn() +*/ +QLatin1StringMatcher::QLatin1StringMatcher() noexcept + : m_pattern(), + m_cs(Qt::CaseSensitive), + m_caseSensitiveSearcher(m_pattern.data(), m_pattern.data()) +{ +} + +/*! + Constructs a Latin-1 string matcher that searches for the given \a pattern + with given case sensitivity \a cs. The \a pattern argument must + not be destroyed before this matcher object. Call indexIn() + to find the \a pattern in the given QLatin1StringView. +*/ +QLatin1StringMatcher::QLatin1StringMatcher(QLatin1StringView pattern, + Qt::CaseSensitivity cs) noexcept + : m_pattern(pattern), m_cs(cs) +{ + setSearcher(); +} + +/*! + Destroys the Latin-1 string matcher. +*/ +QLatin1StringMatcher::~QLatin1StringMatcher() noexcept +{ + freeSearcher(); +} + +/*! + \internal +*/ +void QLatin1StringMatcher::setSearcher() noexcept +{ + if (m_cs == Qt::CaseSensitive) { + new (&m_caseSensitiveSearcher) CaseSensitiveSearcher(m_pattern.data(), m_pattern.end()); + } else { + QtPrivate::QCaseInsensitiveLatin1Hash foldCase; + qsizetype bufferSize = std::min(m_pattern.size(), qsizetype(sizeof m_foldBuffer)); + for (qsizetype i = 0; i < bufferSize; ++i) + m_foldBuffer[i] = static_cast<char>(foldCase(m_pattern[i].toLatin1())); + + new (&m_caseInsensitiveSearcher) + CaseInsensitiveSearcher(m_foldBuffer, &m_foldBuffer[bufferSize]); + } +} + +/*! + \internal +*/ +void QLatin1StringMatcher::freeSearcher() noexcept +{ + if (m_cs == Qt::CaseSensitive) + m_caseSensitiveSearcher.~CaseSensitiveSearcher(); + else + m_caseInsensitiveSearcher.~CaseInsensitiveSearcher(); +} + +/*! + Sets the \a pattern to search for. The string pointed to by the + QLatin1StringView must not be destroyed before the matcher is + destroyed, unless it is set to point to a different \a pattern + with longer lifetime first. + + \sa pattern(), indexIn() +*/ +void QLatin1StringMatcher::setPattern(QLatin1StringView pattern) noexcept +{ + if (m_pattern.latin1() == pattern.latin1() && m_pattern.size() == pattern.size()) + return; // Same address and size + + freeSearcher(); + m_pattern = pattern; + setSearcher(); +} + +/*! + Returns the Latin-1 pattern that the matcher searches for. + + \sa setPattern(), indexIn() +*/ +QLatin1StringView QLatin1StringMatcher::pattern() const noexcept +{ + return m_pattern; +} + +/*! + Sets the case sensitivity to \a cs. + + \sa caseSensitivity(), indexIn() +*/ +void QLatin1StringMatcher::setCaseSensitivity(Qt::CaseSensitivity cs) noexcept +{ + if (m_cs == cs) + return; + + freeSearcher(); + m_cs = cs; + setSearcher(); +} + +/*! + Returns the case sensitivity the matcher uses. + + \sa setCaseSensitivity(), indexIn() +*/ +Qt::CaseSensitivity QLatin1StringMatcher::caseSensitivity() const noexcept +{ + return m_cs; +} + +/*! + Searches for the pattern in the given \a haystack starting from + \a from. + + \sa caseSensitivity(), pattern() +*/ +qsizetype QLatin1StringMatcher::indexIn(QLatin1StringView haystack, qsizetype from) const noexcept +{ + return indexIn_helper(haystack, from); +} + +/*! + \since 6.8 + \overload + + Searches for the pattern in the given \a haystack starting from index + position \a from. + + \sa caseSensitivity(), pattern() +*/ +qsizetype QLatin1StringMatcher::indexIn(QStringView haystack, qsizetype from) const noexcept +{ + return indexIn_helper(haystack, from); +} + +/*! + \internal +*/ +template <typename String> +qsizetype QLatin1StringMatcher::indexIn_helper(String haystack, qsizetype from) const noexcept +{ + static_assert(QtPrivate::isLatin1OrUtf16View<String>); + + if (m_pattern.isEmpty() && from == haystack.size()) + return from; + if (from < 0) // Historical behavior (see QString::indexOf and co.) + from += haystack.size(); + if (from >= haystack.size()) + return -1; + + const auto start = [haystack] { + if constexpr (std::is_same_v<String, QStringView>) + return haystack.utf16(); + else + return haystack.begin(); + }(); + + auto begin = start + from; + auto end = start + haystack.size(); + auto found = begin; + if (m_cs == Qt::CaseSensitive) { + found = m_caseSensitiveSearcher(begin, end, m_pattern.begin(), m_pattern.end()).begin; + if (found == end) + return -1; + } else { + const qsizetype bufferSize = std::min(m_pattern.size(), qsizetype(sizeof m_foldBuffer)); + const QLatin1StringView restNeedle = m_pattern.sliced(bufferSize); + const bool needleLongerThanBuffer = restNeedle.size() > 0; + String restHaystack = haystack; + do { + found = m_caseInsensitiveSearcher(found, end, m_foldBuffer, &m_foldBuffer[bufferSize]) + .begin; + if (found == end) { + return -1; + } else if (!needleLongerThanBuffer) { + break; + } + restHaystack = haystack.sliced( + qMin(haystack.size(), + bufferSize + qsizetype(std::distance(start, found)))); + if (restHaystack.startsWith(restNeedle, Qt::CaseInsensitive)) + break; + ++found; + } while (true); + } + return std::distance(start, found); +} + +QT_END_NAMESPACE |