diff options
author | Edward Welbourne <edward.welbourne@qt.io> | 2019-05-27 19:13:54 +0200 |
---|---|---|
committer | Edward Welbourne <edward.welbourne@qt.io> | 2019-07-10 17:05:30 +0200 |
commit | a9aa206b7b8ac4e69f8c46233b4080e00e845ff5 (patch) | |
tree | 0d19cb1f1a3b9d79d322e6e63f6f72160977ca67 /src/corelib/tools/qbytearraymatcher.cpp | |
parent | 85d3061c1cd4617ef09cb381320611c27da205a5 (diff) |
Move text-related code out of corelib/tools/ to corelib/text/
This includes byte array, string, char, unicode, locale, collation and
regular expressions.
Change-Id: I8b125fa52c8c513eb57a0f1298b91910e5a0d786
Reviewed-by: Volker Hilsheimer <volker.hilsheimer@qt.io>
Diffstat (limited to 'src/corelib/tools/qbytearraymatcher.cpp')
-rw-r--r-- | src/corelib/tools/qbytearraymatcher.cpp | 435 |
1 files changed, 0 insertions, 435 deletions
diff --git a/src/corelib/tools/qbytearraymatcher.cpp b/src/corelib/tools/qbytearraymatcher.cpp deleted file mode 100644 index 72e09226af..0000000000 --- a/src/corelib/tools/qbytearraymatcher.cpp +++ /dev/null @@ -1,435 +0,0 @@ -/**************************************************************************** -** -** Copyright (C) 2016 The Qt Company Ltd. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the QtCore module of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:LGPL$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 3 as published by the Free Software -** Foundation and appearing in the file LICENSE.LGPL3 included in the -** packaging of this file. Please review the following information to -** ensure the GNU Lesser General Public License version 3 requirements -** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 2.0 or (at your option) the GNU General -** Public license version 3 or any later version approved by the KDE Free -** Qt Foundation. The licenses are as published by the Free Software -** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-2.0.html and -** https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ - -#include "qbytearraymatcher.h" - -#include <limits.h> - -QT_BEGIN_NAMESPACE - -static inline void bm_init_skiptable(const uchar *cc, int len, uchar *skiptable) -{ - int l = qMin(len, 255); - memset(skiptable, l, 256*sizeof(uchar)); - cc += len - l; - while (l--) - skiptable[*cc++] = l; -} - -static inline int bm_find(const uchar *cc, int l, int index, const uchar *puc, uint pl, - const uchar *skiptable) -{ - if (pl == 0) - return index > l ? -1 : index; - const uint pl_minus_one = pl - 1; - - const uchar *current = cc + index + pl_minus_one; - const uchar *end = cc + l; - while (current < end) { - uint skip = skiptable[*current]; - if (!skip) { - // possible match - while (skip < pl) { - if (*(current - skip) != puc[pl_minus_one - skip]) - break; - skip++; - } - if (skip > pl_minus_one) // we have a match - return (current - cc) - skip + 1; - - // in case we don't have a match we are a bit inefficient as we only skip by one - // when we have the non matching char in the string. - if (skiptable[*(current - skip)] == pl) - skip = pl - skip; - else - skip = 1; - } - if (current > end - skip) - break; - current += skip; - } - return -1; // not found -} - -/*! \class QByteArrayMatcher - \inmodule QtCore - \brief The QByteArrayMatcher class holds a sequence of bytes that - can be quickly matched in a byte array. - - \ingroup tools - \ingroup string-processing - - This class is useful when you have a sequence of bytes that you - want to repeatedly match against some byte arrays (perhaps in a - loop), or when you want to search for the same sequence of bytes - multiple times in the same byte array. Using a matcher object and - indexIn() is faster than matching a plain QByteArray with - QByteArray::indexOf() if repeated matching takes place. This - class offers no benefit if you are doing one-off byte array - matches. - - Create the QByteArrayMatcher with the QByteArray you want to - search for. Then call indexIn() on the QByteArray that you want to - search. - - \sa QByteArray, QStringMatcher -*/ - -/*! - Constructs an empty byte array matcher that won't match anything. - Call setPattern() to give it a pattern to match. -*/ -QByteArrayMatcher::QByteArrayMatcher() - : d(nullptr) -{ - p.p = nullptr; - p.l = 0; - memset(p.q_skiptable, 0, sizeof(p.q_skiptable)); -} - -/*! - Constructs a byte array matcher from \a pattern. \a pattern - has the given \a length. \a pattern must remain in scope, but - the destructor does not delete \a pattern. - */ -QByteArrayMatcher::QByteArrayMatcher(const char *pattern, int length) - : d(nullptr) -{ - p.p = reinterpret_cast<const uchar *>(pattern); - p.l = length; - bm_init_skiptable(p.p, p.l, p.q_skiptable); -} - -/*! - Constructs a byte array matcher that will search for \a pattern. - Call indexIn() to perform a search. -*/ -QByteArrayMatcher::QByteArrayMatcher(const QByteArray &pattern) - : d(nullptr), q_pattern(pattern) -{ - p.p = reinterpret_cast<const uchar *>(pattern.constData()); - p.l = pattern.size(); - bm_init_skiptable(p.p, p.l, p.q_skiptable); -} - -/*! - Copies the \a other byte array matcher to this byte array matcher. -*/ -QByteArrayMatcher::QByteArrayMatcher(const QByteArrayMatcher &other) - : d(nullptr) -{ - operator=(other); -} - -/*! - Destroys the byte array matcher. -*/ -QByteArrayMatcher::~QByteArrayMatcher() -{ - Q_UNUSED(d); -} - -/*! - Assigns the \a other byte array matcher to this byte array matcher. -*/ -QByteArrayMatcher &QByteArrayMatcher::operator=(const QByteArrayMatcher &other) -{ - q_pattern = other.q_pattern; - memcpy(&p, &other.p, sizeof(p)); - return *this; -} - -/*! - Sets the byte array that this byte array matcher will search for - to \a pattern. - - \sa pattern(), indexIn() -*/ -void QByteArrayMatcher::setPattern(const QByteArray &pattern) -{ - q_pattern = pattern; - p.p = reinterpret_cast<const uchar *>(pattern.constData()); - p.l = pattern.size(); - bm_init_skiptable(p.p, p.l, p.q_skiptable); -} - -/*! - Searches the byte array \a ba, from byte position \a from (default - 0, i.e. from the first byte), for the byte array pattern() that - was set in the constructor or in the most recent call to - setPattern(). Returns the position where the pattern() matched in - \a ba, or -1 if no match was found. -*/ -int QByteArrayMatcher::indexIn(const QByteArray &ba, int from) const -{ - if (from < 0) - from = 0; - return bm_find(reinterpret_cast<const uchar *>(ba.constData()), ba.size(), from, - p.p, p.l, p.q_skiptable); -} - -/*! - Searches the char string \a str, which has length \a len, from - byte position \a from (default 0, i.e. from the first byte), for - the byte array pattern() that was set in the constructor or in the - most recent call to setPattern(). Returns the position where the - pattern() matched in \a str, or -1 if no match was found. -*/ -int QByteArrayMatcher::indexIn(const char *str, int len, int from) const -{ - if (from < 0) - from = 0; - return bm_find(reinterpret_cast<const uchar *>(str), len, from, - p.p, p.l, p.q_skiptable); -} - -/*! - \fn QByteArray QByteArrayMatcher::pattern() const - - Returns the byte array pattern that this byte array matcher will - search for. - - \sa setPattern() -*/ - - -static int findChar(const char *str, int len, char ch, int from) -{ - const uchar *s = (const uchar *)str; - uchar c = (uchar)ch; - if (from < 0) - from = qMax(from + len, 0); - if (from < len) { - const uchar *n = s + from - 1; - const uchar *e = s + len; - while (++n != e) - if (*n == c) - return n - s; - } - return -1; -} - -/*! - \internal - */ -static int qFindByteArrayBoyerMoore( - const char *haystack, int haystackLen, int haystackOffset, - const char *needle, int needleLen) -{ - uchar skiptable[256]; - bm_init_skiptable((const uchar *)needle, needleLen, skiptable); - if (haystackOffset < 0) - haystackOffset = 0; - return bm_find((const uchar *)haystack, haystackLen, haystackOffset, - (const uchar *)needle, needleLen, skiptable); -} - -#define REHASH(a) \ - if (sl_minus_1 < sizeof(uint) * CHAR_BIT) \ - hashHaystack -= uint(a) << sl_minus_1; \ - hashHaystack <<= 1 - -/*! - \internal - */ -int qFindByteArray( - const char *haystack0, int haystackLen, int from, - const char *needle, int needleLen) -{ - const int l = haystackLen; - const int sl = needleLen; - if (from < 0) - from += l; - if (uint(sl + from) > (uint)l) - return -1; - if (!sl) - return from; - if (!l) - return -1; - - if (sl == 1) - return findChar(haystack0, haystackLen, needle[0], from); - - /* - We use the Boyer-Moore algorithm in cases where the overhead - for the skip table should pay off, otherwise we use a simple - hash function. - */ - if (l > 500 && sl > 5) - return qFindByteArrayBoyerMoore(haystack0, haystackLen, from, - needle, needleLen); - - /* - We use some hashing for efficiency's sake. Instead of - comparing strings, we compare the hash value of str with that - of a part of this QString. Only if that matches, we call memcmp(). - */ - const char *haystack = haystack0 + from; - const char *end = haystack0 + (l - sl); - const uint sl_minus_1 = sl - 1; - uint hashNeedle = 0, hashHaystack = 0; - int idx; - for (idx = 0; idx < sl; ++idx) { - hashNeedle = ((hashNeedle<<1) + needle[idx]); - hashHaystack = ((hashHaystack<<1) + haystack[idx]); - } - hashHaystack -= *(haystack + sl_minus_1); - - while (haystack <= end) { - hashHaystack += *(haystack + sl_minus_1); - if (hashHaystack == hashNeedle && *needle == *haystack - && memcmp(needle, haystack, sl) == 0) - return haystack - haystack0; - - REHASH(*haystack); - ++haystack; - } - return -1; -} - -/*! - \class QStaticByteArrayMatcherBase - \since 5.9 - \internal - \brief Non-template base class of QStaticByteArrayMatcher. -*/ - -/*! - \class QStaticByteArrayMatcher - \since 5.9 - \inmodule QtCore - \brief The QStaticByteArrayMatcher class is a compile-time version of QByteArrayMatcher. - - \ingroup tools - \ingroup string-processing - - This class is useful when you have a sequence of bytes that you - want to repeatedly match against some byte arrays (perhaps in a - loop), or when you want to search for the same sequence of bytes - multiple times in the same byte array. Using a matcher object and - indexIn() is faster than matching a plain QByteArray with - QByteArray::indexOf(), in particular if repeated matching takes place. - - Unlike QByteArrayMatcher, this class calculates the internal - representation at \e{compile-time}, if your compiler supports - C++14-level \c{constexpr} (C++11 is not sufficient), so it can - even benefit if you are doing one-off byte array matches. - - Create the QStaticByteArrayMatcher by calling qMakeStaticByteArrayMatcher(), - passing it the C string literal you want to search for. Store the return - value of that function in a \c{static const auto} variable, so you don't need - to pass the \c{N} template parameter explicitly: - - \snippet code/src_corelib_tools_qbytearraymatcher.cpp 0 - - Then call indexIn() on the QByteArray in which you want to search, just like - with QByteArrayMatcher. - - Since this class is designed to do all the up-front calculations at compile-time, - it does not offer a setPattern() method. - - \note Qt detects the necessary C++14 compiler support by way of the feature - test recommendations from - \l{https://isocpp.org/std/standing-documents/sd-6-sg10-feature-test-recommendations} - {C++ Committee's Standing Document 6}. - - \sa QByteArrayMatcher, QStringMatcher -*/ - -/*! - \fn template <uint N> int QStaticByteArrayMatcher<N>::indexIn(const char *haystack, int hlen, int from = 0) const - - Searches the char string \a haystack, which has length \a hlen, from - byte position \a from (default 0, i.e. from the first byte), for - the byte array pattern() that was set in the constructor. - - Returns the position where the pattern() matched in \a haystack, or -1 if no match was found. -*/ - -/*! - \fn template <uint N> int QStaticByteArrayMatcher<N>::indexIn(const QByteArray &haystack, int from = 0) const - - Searches the char string \a haystack, from byte position \a from - (default 0, i.e. from the first byte), for the byte array pattern() - that was set in the constructor. - - Returns the position where the pattern() matched in \a haystack, or -1 if no match was found. -*/ - -/*! - \fn template <uint N> QByteArray QStaticByteArrayMatcher<N>::pattern() const - - Returns the byte array pattern that this byte array matcher will - search for. - - \sa QByteArrayMatcher::setPattern() -*/ - -/*! - \internal -*/ -int QStaticByteArrayMatcherBase::indexOfIn(const char *needle, uint nlen, const char *haystack, int hlen, int from) const noexcept -{ - if (from < 0) - from = 0; - return bm_find(reinterpret_cast<const uchar *>(haystack), hlen, from, - reinterpret_cast<const uchar *>(needle), nlen, m_skiptable.data); -} - -/*! - \fn template <uint N> QStaticByteArrayMatcher<N>::QStaticByteArrayMatcher(const char (&pattern)[N]) - \internal -*/ - -/*! - \fn template <uint N> QStaticByteArrayMatcher qMakeStaticByteArrayMatcher(const char (&pattern)[N]) - \since 5.9 - \relates QStaticByteArrayMatcher - - Return a QStaticByteArrayMatcher with the correct \c{N} determined - automatically from the \a pattern passed. - - To take full advantage of this function, assign the result to an - \c{auto} variable: - - \snippet code/src_corelib_tools_qbytearraymatcher.cpp 1 -*/ - - -QT_END_NAMESPACE |