summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarc Mutz <marc.mutz@kdab.com>2016-02-25 02:32:27 +0100
committerGiuseppe D'Angelo <giuseppe.dangelo@kdab.com>2017-01-19 21:37:46 +0000
commit3af5cab0547fc7c6a82d94e8a2104f1b052ccd7d (patch)
tree544ca6119abae93cb65ef7b67db5b4f984f608c4
parenta6cdfacf8d9c646333e81693e73e2c74173ad29a (diff)
Long live QStaticByteArrayMatcher!
This is a version of QByteArrayMatcher that calculates the Boyer-Moore skip table at compile-time instead of at run-time, making this class more generally applicable than QByteArray- Matcher itself, at least for statically-known strings. The compile-time part requires C++14 constexpr support, but the class should compile and work even in C++98 mode, just with runtime initialization of the skip-table. While touching tst_qbytearraymatcher, clean up the static global QByteArrayMatchers there and add tests with needles longer than 255 characters for QByteArrayMatcher, too. [ChangeLog][QtCore] Added QStaticByteArrayMatcher. Change-Id: I0662f262ab19b79ae4096f3ab384d5b3ada72347 Reviewed-by: David Faure <david.faure@kdab.com>
-rw-r--r--src/corelib/tools/qbytearraymatcher.cpp108
-rw-r--r--src/corelib/tools/qbytearraymatcher.h74
-rw-r--r--tests/auto/corelib/tools/qbytearraymatcher/qbytearraymatcher.pro1
-rw-r--r--tests/auto/corelib/tools/qbytearraymatcher/tst_qbytearraymatcher.cpp109
4 files changed, 288 insertions, 4 deletions
diff --git a/src/corelib/tools/qbytearraymatcher.cpp b/src/corelib/tools/qbytearraymatcher.cpp
index d06ca1292a..fc9d12aba5 100644
--- a/src/corelib/tools/qbytearraymatcher.cpp
+++ b/src/corelib/tools/qbytearraymatcher.cpp
@@ -323,4 +323,112 @@ int qFindByteArray(
return -1;
}
+/*!
+ \class QStaticByteArrayMatcherBase
+ \since 5.9
+ \internal
+ \brief Non-template base class of QStaticByteArrayMatcher.
+*/
+
+/*!
+ \class QStaticByteArrayMatcher
+ \since 5.9
+ \inmodule QtCore
+ \brief The QStaticByteArrayMatcher class is a compile-time version of QByteArrayMatcher
+
+ \ingroup tools
+ \ingroup string-processing
+
+ This class is useful when you have a sequence of bytes that you
+ want to repeatedly match against some byte arrays (perhaps in a
+ loop), or when you want to search for the same sequence of bytes
+ multiple times in the same byte array. Using a matcher object and
+ indexIn() is faster than matching a plain QByteArray with
+ QByteArray::indexOf(), in particular if repeated matching takes place.
+
+ Unlike QByteArrayMatcher, this class calculates the internal
+ representation at \e{compile-time}, if your compiler supports
+ C++14-level \c{constexpr} (C++11 is not sufficient), so it can
+ even benefit if you are doing one-off byte array matches.
+
+ Create the QStaticByteArrayMatcher by calling qMakeStaticByteArrayMatcher(),
+ passing it the C string literal you want to search for. Store the return
+ value of that function in a \c{static const auto} variable, so you don't need
+ to pass the \c{N} template parameter explicitly:
+
+ \code
+ static const auto matcher = qMakeStaticByteArrayMatcher("needle");
+ \endcode
+
+ Then call indexIn() on the QByteArray in which you want to search, just like
+ with QByteArrayMatcher.
+
+ Since this class is designed to do all the up-front calculations at compile-time,
+ it does not offer a setPattern() method.
+
+ \sa QByteArrayMatcher, QStringMatcher
+*/
+
+/*!
+ \fn QStaticByteArrayMatcher::indexIn(const char *haystack, int hlen, int from)
+
+ Searches the char string \a haystack, which has length \a hlen, from
+ byte position \a from (default 0, i.e. from the first byte), for
+ the byte array pattern() that was set in the constructor.
+
+ Returns the position where the pattern() matched in \a haystack, or -1 if no match was found.
+*/
+
+/*!
+ \fn QStaticByteArrayMatcher::indexIn(const QByteArray &haystack, int from)
+
+ Searches the char string \a haystack, from byte position \a from
+ (default 0, i.e. from the first byte), for the byte array pattern()
+ that was set in the constructor.
+
+ Returns the position where the pattern() matched in \a haystack, or -1 if no match was found.
+*/
+
+/*!
+ \fn QByteArray QStaticByteArrayMatcher::pattern() const
+
+ Returns the byte array pattern that this byte array matcher will
+ search for.
+
+ \sa setPattern()
+*/
+
+/*!
+ \internal
+*/
+int QStaticByteArrayMatcherBase::indexOfIn(const char *needle, uint nlen, const char *haystack, int hlen, int from) const Q_DECL_NOTHROW
+{
+ if (from < 0)
+ from = 0;
+ return bm_find(reinterpret_cast<const uchar *>(haystack), hlen, from,
+ reinterpret_cast<const uchar *>(needle), nlen, m_skiptable.data);
+}
+
+/*!
+ \fn QStaticByteArrayMatcher::QStaticByteArrayMatcher(const char (&pattern)[N])
+ \internal
+*/
+
+/*!
+ \fn qMakeStaticByteArrayMatcher(const char (&pattern)[N])
+ \since 5.9
+ \relates QStaticByteArrayMatcher
+
+ Return a QStaticByteArrayMatcher with the correct \c{N} determined
+ automatically from the \a pattern passed.
+
+ To take full advantage of this function, assign the result to an
+ \c{auto} variable:
+
+ \code
+ static const auto matcher = qMakeStaticByteArrayMatcher("needle");
+ \endcode
+*/
+
+
QT_END_NAMESPACE
diff --git a/src/corelib/tools/qbytearraymatcher.h b/src/corelib/tools/qbytearraymatcher.h
index aac62715a1..51e08ba4bf 100644
--- a/src/corelib/tools/qbytearraymatcher.h
+++ b/src/corelib/tools/qbytearraymatcher.h
@@ -83,6 +83,80 @@ private:
};
};
+class QStaticByteArrayMatcherBase {
+ struct Skiptable {
+ uchar data[256];
+ } m_skiptable;
+protected:
+ explicit Q_DECL_RELAXED_CONSTEXPR QStaticByteArrayMatcherBase(const char *pattern, uint n) Q_DECL_NOTHROW
+ : m_skiptable(generate(pattern, n)) {}
+ // compiler-generated copy/more ctors/assignment operators are ok!
+ // compiler-generated dtor is ok!
+
+ Q_CORE_EXPORT int indexOfIn(const char *needle, uint nlen, const char *haystack, int hlen, int from) const Q_DECL_NOTHROW;
+
+private:
+ static Q_DECL_RELAXED_CONSTEXPR Skiptable generate(const char *pattern, uint n) Q_DECL_NOTHROW
+ {
+ const auto uchar_max = (std::numeric_limits<uchar>::max)();
+ uchar max = n > uchar_max ? uchar_max : n;
+ Skiptable table = {
+ // this verbose initialization code aims to avoid some opaque error messages
+ // even on powerful compilers such as GCC 5.3. Even though for GCC a loop
+ // format can be found that v5.3 groks, it's probably better to go with this
+ // for the time being:
+ {
+ max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
+ max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
+ max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
+ max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
+ max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
+ max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
+ max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
+ max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
+
+ max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
+ max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
+ max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
+ max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
+ max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
+ max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
+ max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
+ max, max, max, max, max, max, max, max, max, max, max, max, max, max, max, max,
+ }
+ };
+ pattern += n - max;
+ while (max--)
+ table.data[uchar(*pattern++)] = max;
+ return table;
+ }
+};
+
+template <uint N>
+class QStaticByteArrayMatcher : QStaticByteArrayMatcherBase
+{
+ char m_pattern[N];
+ Q_STATIC_ASSERT_X(N > 2, "QStaticByteArrayMatcher makes no sense for finding a single-char pattern");
+public:
+ explicit Q_DECL_RELAXED_CONSTEXPR QStaticByteArrayMatcher(const char (&patternToMatch)[N]) Q_DECL_NOTHROW
+ : QStaticByteArrayMatcherBase(patternToMatch, N - 1), m_pattern()
+ {
+ for (uint i = 0; i < N; ++i)
+ m_pattern[i] = patternToMatch[i];
+ }
+
+ int indexIn(const QByteArray &haystack, int from = 0) const Q_DECL_NOTHROW
+ { return this->indexOfIn(m_pattern, N - 1, haystack.data(), haystack.size(), from); }
+ int indexIn(const char *haystack, int hlen, int from = 0) const Q_DECL_NOTHROW
+ { return this->indexOfIn(m_pattern, N - 1, haystack, hlen, from); }
+
+ QByteArray pattern() const { return QByteArray(m_pattern, int(N - 1)); }
+};
+
+template <uint N>
+Q_DECL_RELAXED_CONSTEXPR QStaticByteArrayMatcher<N> qMakeStaticByteArrayMatcher(const char (&pattern)[N]) Q_DECL_NOTHROW
+{ return QStaticByteArrayMatcher<N>(pattern); }
+
QT_END_NAMESPACE
#endif // QBYTEARRAYMATCHER_H
diff --git a/tests/auto/corelib/tools/qbytearraymatcher/qbytearraymatcher.pro b/tests/auto/corelib/tools/qbytearraymatcher/qbytearraymatcher.pro
index 0624e1886c..9d4d5964c9 100644
--- a/tests/auto/corelib/tools/qbytearraymatcher/qbytearraymatcher.pro
+++ b/tests/auto/corelib/tools/qbytearraymatcher/qbytearraymatcher.pro
@@ -2,3 +2,4 @@ CONFIG += testcase
TARGET = tst_qbytearraymatcher
QT = core testlib
SOURCES = tst_qbytearraymatcher.cpp
+contains(QT_CONFIG, c++14):CONFIG += c++14
diff --git a/tests/auto/corelib/tools/qbytearraymatcher/tst_qbytearraymatcher.cpp b/tests/auto/corelib/tools/qbytearraymatcher/tst_qbytearraymatcher.cpp
index 82cc432d55..647a3ae379 100644
--- a/tests/auto/corelib/tools/qbytearraymatcher/tst_qbytearraymatcher.cpp
+++ b/tests/auto/corelib/tools/qbytearraymatcher/tst_qbytearraymatcher.cpp
@@ -43,10 +43,9 @@ class tst_QByteArrayMatcher : public QObject
private slots:
void interface();
void indexIn();
+ void staticByteArrayMatcher();
};
-static QByteArrayMatcher matcher1;
-
void tst_QByteArrayMatcher::interface()
{
const char needle[] = "abc123";
@@ -56,6 +55,8 @@ void tst_QByteArrayMatcher::interface()
haystack.insert(42, "abc123");
haystack.insert(84, "abc123");
+ QByteArrayMatcher matcher1;
+
matcher1 = QByteArrayMatcher(QByteArray(needle));
QByteArrayMatcher matcher2;
matcher2.setPattern(QByteArray(needle));
@@ -90,8 +91,10 @@ void tst_QByteArrayMatcher::interface()
QCOMPARE(matcher7.indexIn(haystack), 42);
}
-
-static QByteArrayMatcher matcher;
+#define LONG_STRING__32 "abcdefghijklmnopqrstuvwxyz012345"
+#define LONG_STRING__64 LONG_STRING__32 LONG_STRING__32
+#define LONG_STRING_128 LONG_STRING__64 LONG_STRING__64
+#define LONG_STRING_256 LONG_STRING_128 LONG_STRING_128
void tst_QByteArrayMatcher::indexIn()
{
@@ -101,6 +104,8 @@ void tst_QByteArrayMatcher::indexIn()
QByteArray haystack(8, '\0');
haystack[7] = 0x1;
+ QByteArrayMatcher matcher;
+
matcher = QByteArrayMatcher(pattern);
QCOMPARE(matcher.indexIn(haystack, 0), 5);
QCOMPARE(matcher.indexIn(haystack, 1), 5);
@@ -110,7 +115,103 @@ void tst_QByteArrayMatcher::indexIn()
QCOMPARE(matcher.indexIn(haystack, 0), 5);
QCOMPARE(matcher.indexIn(haystack, 1), 5);
QCOMPARE(matcher.indexIn(haystack, 2), 5);
+
+ QByteArray allChars(256, Qt::Uninitialized);
+ for (int i = 0; i < 256; ++i)
+ allChars[i] = char(i);
+
+ matcher = QByteArrayMatcher(allChars);
+ haystack = LONG_STRING__32 "x" + matcher.pattern();
+ QCOMPARE(matcher.indexIn(haystack, 0), 33);
+ QCOMPARE(matcher.indexIn(haystack, 1), 33);
+ QCOMPARE(matcher.indexIn(haystack, 2), 33);
+ QCOMPARE(matcher.indexIn(haystack, 33), 33);
+ QCOMPARE(matcher.indexIn(haystack, 34), -1);
+
+ matcher = QByteArrayMatcher(LONG_STRING_256);
+ haystack = LONG_STRING__32 "x" + matcher.pattern();
+ QCOMPARE(matcher.indexIn(haystack, 0), 33);
+ QCOMPARE(matcher.indexIn(haystack, 1), 33);
+ QCOMPARE(matcher.indexIn(haystack, 2), 33);
+ QCOMPARE(matcher.indexIn(haystack, 33), 33);
+ QCOMPARE(matcher.indexIn(haystack, 34), -1);
}
+void tst_QByteArrayMatcher::staticByteArrayMatcher()
+{
+ {
+ static Q_RELAXED_CONSTEXPR auto smatcher = qMakeStaticByteArrayMatcher("Hello");
+ QCOMPARE(smatcher.pattern(), QByteArrayLiteral("Hello"));
+
+ QCOMPARE(smatcher.indexIn(QByteArray("Hello, World!")), 0);
+ QCOMPARE(smatcher.indexIn(QByteArray("Hello, World!"), 0), 0);
+ QCOMPARE(smatcher.indexIn(QByteArray("Hello, World!"), 1), -1);
+ QCOMPARE(smatcher.indexIn(QByteArray("aHello, World!")), 1);
+ QCOMPARE(smatcher.indexIn(QByteArray("aaHello, World!")), 2);
+ QCOMPARE(smatcher.indexIn(QByteArray("aaaHello, World!")), 3);
+ QCOMPARE(smatcher.indexIn(QByteArray("aaaaHello, World!")), 4);
+ QCOMPARE(smatcher.indexIn(QByteArray("aaaaaHello, World!")), 5);
+ QCOMPARE(smatcher.indexIn(QByteArray("aaaaaaHello, World!")), 6);
+ QCOMPARE(smatcher.indexIn(QByteArray("HHello, World!")), 1);
+ QCOMPARE(smatcher.indexIn(QByteArray("HeHello, World!")), 2);
+ QCOMPARE(smatcher.indexIn(QByteArray("HelHello, World!")), 3);
+ QCOMPARE(smatcher.indexIn(QByteArray("HellHello, World!")), 4);
+ QCOMPARE(smatcher.indexIn(QByteArray("HellaHello, World!")), 5);
+ QCOMPARE(smatcher.indexIn(QByteArray("HellauHello, World!")), 6);
+ QCOMPARE(smatcher.indexIn(QByteArray("aHella, World!")), -1);
+ QCOMPARE(smatcher.indexIn(QByteArray("aaHella, World!")), -1);
+ QCOMPARE(smatcher.indexIn(QByteArray("aaaHella, World!")), -1);
+ QCOMPARE(smatcher.indexIn(QByteArray("aaaaHella, World!")), -1);
+ QCOMPARE(smatcher.indexIn(QByteArray("aaaaaHella, World!")), -1);
+ QCOMPARE(smatcher.indexIn(QByteArray("aaaaaaHella, World!")), -1);
+
+ QCOMPARE(smatcher.indexIn(QByteArray("aHello")), 1);
+ QCOMPARE(smatcher.indexIn(QByteArray("aaHello")), 2);
+ QCOMPARE(smatcher.indexIn(QByteArray("aaaHello")), 3);
+ QCOMPARE(smatcher.indexIn(QByteArray("aaaaHello")), 4);
+ QCOMPARE(smatcher.indexIn(QByteArray("aaaaaHello")), 5);
+ QCOMPARE(smatcher.indexIn(QByteArray("aaaaaaHello")), 6);
+ QCOMPARE(smatcher.indexIn(QByteArray("HHello")), 1);
+ QCOMPARE(smatcher.indexIn(QByteArray("HeHello")), 2);
+ QCOMPARE(smatcher.indexIn(QByteArray("HelHello")), 3);
+ QCOMPARE(smatcher.indexIn(QByteArray("HellHello")), 4);
+ QCOMPARE(smatcher.indexIn(QByteArray("HellaHello")), 5);
+ QCOMPARE(smatcher.indexIn(QByteArray("HellauHello")), 6);
+ QCOMPARE(smatcher.indexIn(QByteArray("aHella")), -1);
+ QCOMPARE(smatcher.indexIn(QByteArray("aaHella")), -1);
+ QCOMPARE(smatcher.indexIn(QByteArray("aaaHella")), -1);
+ QCOMPARE(smatcher.indexIn(QByteArray("aaaaHella")), -1);
+ QCOMPARE(smatcher.indexIn(QByteArray("aaaaaHella")), -1);
+ QCOMPARE(smatcher.indexIn(QByteArray("aaaaaaHella")), -1);
+ }
+
+ {
+ static Q_RELAXED_CONSTEXPR auto smatcher = qMakeStaticByteArrayMatcher(LONG_STRING_256);
+ QCOMPARE(smatcher.pattern(), QByteArrayLiteral(LONG_STRING_256));
+
+ QCOMPARE(smatcher.indexIn(QByteArray("a" LONG_STRING_256)), 1);
+ QCOMPARE(smatcher.indexIn(QByteArray("aa" LONG_STRING_256)), 2);
+ QCOMPARE(smatcher.indexIn(QByteArray("aaa" LONG_STRING_256)), 3);
+ QCOMPARE(smatcher.indexIn(QByteArray("aaaa" LONG_STRING_256)), 4);
+ QCOMPARE(smatcher.indexIn(QByteArray("aaaaa" LONG_STRING_256)), 5);
+ QCOMPARE(smatcher.indexIn(QByteArray("aaaaaa" LONG_STRING_256)), 6);
+ QCOMPARE(smatcher.indexIn(QByteArray("a" LONG_STRING_256 "a")), 1);
+ QCOMPARE(smatcher.indexIn(QByteArray("aa" LONG_STRING_256 "a")), 2);
+ QCOMPARE(smatcher.indexIn(QByteArray("aaa" LONG_STRING_256 "a")), 3);
+ QCOMPARE(smatcher.indexIn(QByteArray("aaaa" LONG_STRING_256 "a")), 4);
+ QCOMPARE(smatcher.indexIn(QByteArray("aaaaa" LONG_STRING_256 "a")), 5);
+ QCOMPARE(smatcher.indexIn(QByteArray("aaaaaa" LONG_STRING_256 "a")), 6);
+ QCOMPARE(smatcher.indexIn(QByteArray(LONG_STRING__32 "x" LONG_STRING_256)), 33);
+ QCOMPARE(smatcher.indexIn(QByteArray(LONG_STRING__64 "x" LONG_STRING_256)), 65);
+ QCOMPARE(smatcher.indexIn(QByteArray(LONG_STRING_128 "x" LONG_STRING_256)), 129);
+ }
+
+}
+
+#undef LONG_STRING_256
+#undef LONG_STRING_128
+#undef LONG_STRING__64
+#undef LONG_STRING__32
+
QTEST_APPLESS_MAIN(tst_QByteArrayMatcher)
#include "tst_qbytearraymatcher.moc"