diff options
Diffstat (limited to 'src/corelib/text/qbytearraymatcher.cpp')
-rw-r--r-- | src/corelib/text/qbytearraymatcher.cpp | 178 |
1 files changed, 71 insertions, 107 deletions
diff --git a/src/corelib/text/qbytearraymatcher.cpp b/src/corelib/text/qbytearraymatcher.cpp index 4292064de2..a332f035ef 100644 --- a/src/corelib/text/qbytearraymatcher.cpp +++ b/src/corelib/text/qbytearraymatcher.cpp @@ -1,41 +1,5 @@ -/**************************************************************************** -** -** Copyright (C) 2016 The Qt Company Ltd. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the QtCore module of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:LGPL$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 3 as published by the Free Software -** Foundation and appearing in the file LICENSE.LGPL3 included in the -** packaging of this file. Please review the following information to -** ensure the GNU Lesser General Public License version 3 requirements -** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 2.0 or (at your option) the GNU General -** Public license version 3 or any later version approved by the KDE Free -** Qt Foundation. The licenses are as published by the Free Software -** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-2.0.html and -** https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ +// Copyright (C) 2016 The Qt Company Ltd. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only #include "qbytearraymatcher.h" @@ -124,14 +88,19 @@ QByteArrayMatcher::QByteArrayMatcher() } /*! - Constructs a byte array matcher from \a pattern. \a pattern - has the given \a length. \a pattern must remain in scope, but - the destructor does not delete \a pattern. - */ + Constructs a byte array matcher from \a pattern. \a pattern + has the given \a length. Call indexIn() to perform a search. + + \note the data that \a pattern is referencing must remain valid while this + object is used. +*/ QByteArrayMatcher::QByteArrayMatcher(const char *pattern, qsizetype length) : d(nullptr) { p.p = reinterpret_cast<const uchar *>(pattern); - p.l = length; + if (length < 0) + p.l = qstrlen(pattern); + else + p.l = length; bm_init_skiptable(p.p, p.l, p.q_skiptable); } @@ -148,6 +117,18 @@ QByteArrayMatcher::QByteArrayMatcher(const QByteArray &pattern) } /*! + \fn QByteArrayMatcher::QByteArrayMatcher(QByteArrayView pattern) + \since 6.3 + \overload + + Constructs a byte array matcher that will search for \a pattern. + Call indexIn() to perform a search. + + \note the data that \a pattern is referencing must remain valid while this + object is used. +*/ + +/*! Copies the \a other byte array matcher to this byte array matcher. */ QByteArrayMatcher::QByteArrayMatcher(const QByteArrayMatcher &other) @@ -189,32 +170,36 @@ void QByteArrayMatcher::setPattern(const QByteArray &pattern) } /*! - Searches the byte array \a ba, from byte position \a from (default - 0, i.e. from the first byte), for the byte array pattern() that - was set in the constructor or in the most recent call to - setPattern(). Returns the position where the pattern() matched in - \a ba, or -1 if no match was found. + Searches the char string \a str, which has length \a len, from + byte position \a from (default 0, i.e. from the first byte), for + the byte array pattern() that was set in the constructor or in the + most recent call to setPattern(). Returns the position where the + pattern() matched in \a str, or -1 if no match was found. */ -qsizetype QByteArrayMatcher::indexIn(const QByteArray &ba, qsizetype from) const +qsizetype QByteArrayMatcher::indexIn(const char *str, qsizetype len, qsizetype from) const { if (from < 0) from = 0; - return bm_find(reinterpret_cast<const uchar *>(ba.constData()), ba.size(), from, + return bm_find(reinterpret_cast<const uchar *>(str), len, from, p.p, p.l, p.q_skiptable); } /*! - Searches the char string \a str, which has length \a len, from - byte position \a from (default 0, i.e. from the first byte), for - the byte array pattern() that was set in the constructor or in the - most recent call to setPattern(). Returns the position where the - pattern() matched in \a str, or -1 if no match was found. + \fn qsizetype QByteArrayMatcher::indexIn(QByteArrayView data, qsizetype from) const + \since 6.3 + \overload + + Searches the byte array \a data, from byte position \a from (default + 0, i.e. from the first byte), for the byte array pattern() that + was set in the constructor or in the most recent call to + setPattern(). Returns the position where the pattern() matched in + \a data, or -1 if no match was found. */ -qsizetype QByteArrayMatcher::indexIn(const char *str, qsizetype len, qsizetype from) const +qsizetype QByteArrayMatcher::indexIn(QByteArrayView data, qsizetype from) const { if (from < 0) from = 0; - return bm_find(reinterpret_cast<const uchar *>(str), len, from, + return bm_find(reinterpret_cast<const uchar *>(data.data()), data.size(), from, p.p, p.l, p.q_skiptable); } @@ -227,26 +212,10 @@ qsizetype QByteArrayMatcher::indexIn(const char *str, qsizetype len, qsizetype f \sa setPattern() */ - -static qsizetype findChar(const char *str, qsizetype len, char ch, qsizetype from) -{ - const uchar *s = (const uchar *)str; - uchar c = (uchar)ch; - if (from < 0) - from = qMax(from + len, qsizetype(0)); - if (from < len) { - const uchar *n = s + from - 1; - const uchar *e = s + len; - while (++n != e) - if (*n == c) - return n - s; - } - return -1; -} - /*! \internal */ +Q_NEVER_INLINE static qsizetype qFindByteArrayBoyerMoore( const char *haystack, qsizetype haystackLen, qsizetype haystackOffset, const char *needle, qsizetype needleLen) @@ -259,20 +228,19 @@ static qsizetype qFindByteArrayBoyerMoore( (const uchar *)needle, needleLen, skiptable); } -#define REHASH(a) \ - if (sl_minus_1 < sizeof(std::size_t) * CHAR_BIT) \ - hashHaystack -= std::size_t(a) << sl_minus_1; \ - hashHaystack <<= 1 - /*! \internal */ -qsizetype qFindByteArray( - const char *haystack0, qsizetype haystackLen, qsizetype from, - const char *needle, qsizetype needleLen) +static qsizetype qFindByteArray(const char *haystack0, qsizetype l, qsizetype from, + const char *needle, qsizetype sl); +qsizetype QtPrivate::findByteArray(QByteArrayView haystack, qsizetype from, QByteArrayView needle) noexcept { - const auto l = haystackLen; - const auto sl = needleLen; + const auto haystack0 = haystack.data(); + const auto l = haystack.size(); + const auto sl = needle.size(); + if (sl == 1) + return findByteArray(haystack, from, needle.front()); + if (from < 0) from += l; if (std::size_t(sl + from) > std::size_t(l)) @@ -282,27 +250,28 @@ qsizetype qFindByteArray( if (!l) return -1; - if (sl == 1) - return findChar(haystack0, haystackLen, needle[0], from); - /* We use the Boyer-Moore algorithm in cases where the overhead for the skip table should pay off, otherwise we use a simple hash function. */ if (l > 500 && sl > 5) - return qFindByteArrayBoyerMoore(haystack0, haystackLen, from, - needle, needleLen); + return qFindByteArrayBoyerMoore(haystack0, l, from, needle.data(), sl); + return qFindByteArray(haystack0, l, from, needle.data(), sl); +} +qsizetype qFindByteArray(const char *haystack0, qsizetype l, qsizetype from, + const char *needle, qsizetype sl) +{ /* We use some hashing for efficiency's sake. Instead of comparing strings, we compare the hash value of str with that - of a part of this QString. Only if that matches, we call memcmp(). + of a part of this QByteArray. Only if that matches, we call memcmp(). */ const char *haystack = haystack0 + from; const char *end = haystack0 + (l - sl); - const auto sl_minus_1 = std::size_t(sl - 1); - std::size_t hashNeedle = 0, hashHaystack = 0; + const qregisteruint sl_minus_1 = sl - 1; + qregisteruint hashNeedle = 0, hashHaystack = 0; qsizetype idx; for (idx = 0; idx < sl; ++idx) { hashNeedle = ((hashNeedle<<1) + needle[idx]); @@ -316,7 +285,9 @@ qsizetype qFindByteArray( && memcmp(needle, haystack, sl) == 0) return haystack - haystack0; - REHASH(*haystack); + if (sl_minus_1 < sizeof(sl_minus_1) * CHAR_BIT) + hashHaystack -= qregisteruint(*haystack) << sl_minus_1; + hashHaystack <<= 1; ++haystack; } return -1; @@ -346,8 +317,7 @@ qsizetype qFindByteArray( QByteArray::indexOf(), in particular if repeated matching takes place. Unlike QByteArrayMatcher, this class calculates the internal - representation at \e{compile-time}, if your compiler supports - C++14-level \c{constexpr} (C++11 is not sufficient), so it can + representation at \e{compile-time}, so it can even benefit if you are doing one-off byte array matches. Create the QStaticByteArrayMatcher by calling qMakeStaticByteArrayMatcher(), @@ -363,16 +333,11 @@ qsizetype qFindByteArray( Since this class is designed to do all the up-front calculations at compile-time, it does not offer a setPattern() method. - \note Qt detects the necessary C++14 compiler support by way of the feature - test recommendations from - \l{https://isocpp.org/std/standing-documents/sd-6-sg10-feature-test-recommendations} - {C++ Committee's Standing Document 6}. - \sa QByteArrayMatcher, QStringMatcher */ /*! - \fn template <uint N> int QStaticByteArrayMatcher<N>::indexIn(const char *haystack, int hlen, int from = 0) const + \fn template <size_t N> qsizetype QStaticByteArrayMatcher<N>::indexIn(const char *haystack, qsizetype hlen, qsizetype from = 0) const Searches the char string \a haystack, which has length \a hlen, from byte position \a from (default 0, i.e. from the first byte), for @@ -382,7 +347,7 @@ qsizetype qFindByteArray( */ /*! - \fn template <uint N> int QStaticByteArrayMatcher<N>::indexIn(const QByteArray &haystack, int from = 0) const + \fn template <size_t N> qsizetype QStaticByteArrayMatcher<N>::indexIn(const QByteArray &haystack, qsizetype from = 0) const Searches the char string \a haystack, from byte position \a from (default 0, i.e. from the first byte), for the byte array pattern() @@ -392,7 +357,7 @@ qsizetype qFindByteArray( */ /*! - \fn template <uint N> QByteArray QStaticByteArrayMatcher<N>::pattern() const + \fn template <size_t N> QByteArray QStaticByteArrayMatcher<N>::pattern() const Returns the byte array pattern that this byte array matcher will search for. @@ -403,7 +368,7 @@ qsizetype qFindByteArray( /*! \internal */ -int QStaticByteArrayMatcherBase::indexOfIn(const char *needle, uint nlen, const char *haystack, int hlen, int from) const noexcept +qsizetype QStaticByteArrayMatcherBase::indexOfIn(const char *needle, size_t nlen, const char *haystack, qsizetype hlen, qsizetype from) const noexcept { if (from < 0) from = 0; @@ -412,12 +377,12 @@ int QStaticByteArrayMatcherBase::indexOfIn(const char *needle, uint nlen, const } /*! - \fn template <uint N> QStaticByteArrayMatcher<N>::QStaticByteArrayMatcher(const char (&pattern)[N]) + \fn template <size_t N> QStaticByteArrayMatcher<N>::QStaticByteArrayMatcher(const char (&pattern)[N]) \internal */ /*! - \fn template <uint N> QStaticByteArrayMatcher qMakeStaticByteArrayMatcher(const char (&pattern)[N]) + \fn template <size_t N> QStaticByteArrayMatcher qMakeStaticByteArrayMatcher(const char (&pattern)[N]) \since 5.9 \relates QStaticByteArrayMatcher @@ -430,5 +395,4 @@ int QStaticByteArrayMatcherBase::indexOfIn(const char *needle, uint nlen, const \snippet code/src_corelib_text_qbytearraymatcher.cpp 1 */ - QT_END_NAMESPACE |