summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarc Mutz <marc.mutz@qt.io>2022-01-21 15:02:08 +0100
committerMarc Mutz <marc.mutz@qt.io>2022-01-23 01:06:57 +0000
commit3d3558dc8f0a1885f416b4650037364f4ef11bd4 (patch)
tree33c0a28a52ec17c86057ba4632e68f92bb2266a0
parent3ec587666f89c996cc0a403775c352954d8b804f (diff)
QStaticByteArrayMatcher: fix searching in 2+GiB haystacks
Add a test (same techniques as for the 4+GiB check in tst_qcryptographichash). Takes ~1s to build the 4GiB test data here, and skips when RAM is too low: $ qtbase/tests/auto/corelib/text/qbytearraymatcher/tst_qbytearraymatcher haystacksWithMoreThan4GiBWork [...] QDEBUG : tst_QByteArrayMatcher::haystacksWithMoreThan4GiBWork() created dataset in 891 ms [...] $ (ulimit -v 2000000; qtbase/tests/auto/corelib/text/qbytearraymatcher/tst_qbytearraymatcher haystacksWithMoreThan4GiBWork) ********* Start testing of tst_QByteArrayMatcher ********* [...] SKIP : tst_QByteArrayMatcher::haystacksWithMoreThan4GiBWork() Could not allocate 4GiB plus a couple hundred bytes of RAM. Loc: [/home/marc/Qt/qt5/qtbase/tests/auto/corelib/text/qbytearraymatcher/tst_qbytearraymatcher.cpp(242)] [...] Found during 6.3 API review. [ChangeLog][QtCore][QStaticByteArrayMatcher] Fixed searching in strings with size > 2GiB (on 64-bit platforms). Fixes: QTBUG-100118 Pick-to: 6.3 Change-Id: I1df420965673b5555fef2b75e785954cc50b654f Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
-rw-r--r--src/corelib/compat/removed_api.cpp13
-rw-r--r--src/corelib/text/qbytearraymatcher.cpp12
-rw-r--r--src/corelib/text/qbytearraymatcher.h21
-rw-r--r--tests/auto/corelib/text/qbytearraymatcher/tst_qbytearraymatcher.cpp74
4 files changed, 106 insertions, 14 deletions
diff --git a/src/corelib/compat/removed_api.cpp b/src/corelib/compat/removed_api.cpp
index d27656bf7a..4cc0bffe20 100644
--- a/src/corelib/compat/removed_api.cpp
+++ b/src/corelib/compat/removed_api.cpp
@@ -62,6 +62,19 @@ int QMetaType::id() const
#if QT_REMOVED_SINCE(6, 3)
+#include "qbytearraymatcher.h"
+
+# if QT_POINTER_SIZE != 4
+
+int QStaticByteArrayMatcherBase::indexOfIn(const char *h, uint hl, const char *n, int nl, int from) const noexcept
+{
+ qsizetype r = indexOfIn(h, size_t(hl), n, qsizetype(nl), qsizetype(from));
+ Q_ASSERT(r == int(r));
+ return r;
+}
+
+# endif // QT_POINTER_SIZE != 4
+
#include "tools/qcryptographichash.h"
void QCryptographicHash::addData(const QByteArray &data)
diff --git a/src/corelib/text/qbytearraymatcher.cpp b/src/corelib/text/qbytearraymatcher.cpp
index 7ade5bc5b7..ee4d8d265b 100644
--- a/src/corelib/text/qbytearraymatcher.cpp
+++ b/src/corelib/text/qbytearraymatcher.cpp
@@ -395,7 +395,7 @@ qsizetype qFindByteArray(
*/
/*!
- \fn template <uint N> int QStaticByteArrayMatcher<N>::indexIn(const char *haystack, int hlen, int from = 0) const
+ \fn template <size_t N> qsizetype QStaticByteArrayMatcher<N>::indexIn(const char *haystack, qsizetype hlen, qsizetype from = 0) const
Searches the char string \a haystack, which has length \a hlen, from
byte position \a from (default 0, i.e. from the first byte), for
@@ -405,7 +405,7 @@ qsizetype qFindByteArray(
*/
/*!
- \fn template <uint N> int QStaticByteArrayMatcher<N>::indexIn(const QByteArray &haystack, int from = 0) const
+ \fn template <size_t N> qsizetype QStaticByteArrayMatcher<N>::indexIn(const QByteArray &haystack, qsizetype from = 0) const
Searches the char string \a haystack, from byte position \a from
(default 0, i.e. from the first byte), for the byte array pattern()
@@ -415,7 +415,7 @@ qsizetype qFindByteArray(
*/
/*!
- \fn template <uint N> QByteArray QStaticByteArrayMatcher<N>::pattern() const
+ \fn template <size_t N> QByteArray QStaticByteArrayMatcher<N>::pattern() const
Returns the byte array pattern that this byte array matcher will
search for.
@@ -426,7 +426,7 @@ qsizetype qFindByteArray(
/*!
\internal
*/
-int QStaticByteArrayMatcherBase::indexOfIn(const char *needle, uint nlen, const char *haystack, int hlen, int from) const noexcept
+qsizetype QStaticByteArrayMatcherBase::indexOfIn(const char *needle, size_t nlen, const char *haystack, qsizetype hlen, qsizetype from) const noexcept
{
if (from < 0)
from = 0;
@@ -435,12 +435,12 @@ int QStaticByteArrayMatcherBase::indexOfIn(const char *needle, uint nlen, const
}
/*!
- \fn template <uint N> QStaticByteArrayMatcher<N>::QStaticByteArrayMatcher(const char (&pattern)[N])
+ \fn template <size_t N> QStaticByteArrayMatcher<N>::QStaticByteArrayMatcher(const char (&pattern)[N])
\internal
*/
/*!
- \fn template <uint N> QStaticByteArrayMatcher qMakeStaticByteArrayMatcher(const char (&pattern)[N])
+ \fn template <size_t N> QStaticByteArrayMatcher qMakeStaticByteArrayMatcher(const char (&pattern)[N])
\since 5.9
\relates QStaticByteArrayMatcher
diff --git a/src/corelib/text/qbytearraymatcher.h b/src/corelib/text/qbytearraymatcher.h
index 2a65a257ee..80473c7585 100644
--- a/src/corelib/text/qbytearraymatcher.h
+++ b/src/corelib/text/qbytearraymatcher.h
@@ -99,15 +99,20 @@ class QStaticByteArrayMatcherBase
uchar data[256];
} m_skiptable;
protected:
- explicit constexpr QStaticByteArrayMatcherBase(const char *pattern, uint n) noexcept
+ explicit constexpr QStaticByteArrayMatcherBase(const char *pattern, size_t n) noexcept
: m_skiptable(generate(pattern, n)) {}
// compiler-generated copy/more ctors/assignment operators are ok!
// compiler-generated dtor is ok!
+#if QT_REMOVED_SINCE(6, 3) && QT_POINTER_SIZE != 4
Q_CORE_EXPORT int indexOfIn(const char *needle, uint nlen, const char *haystack, int hlen, int from) const noexcept;
+#endif
+ Q_CORE_EXPORT qsizetype indexOfIn(const char *needle, size_t nlen,
+ const char *haystack, qsizetype hlen,
+ qsizetype from) const noexcept;
private:
- static constexpr Skiptable generate(const char *pattern, uint n) noexcept
+ static constexpr Skiptable generate(const char *pattern, size_t n) noexcept
{
const auto uchar_max = (std::numeric_limits<uchar>::max)();
uchar max = n > uchar_max ? uchar_max : uchar(n);
@@ -143,7 +148,7 @@ private:
}
};
-template <uint N>
+template <size_t N>
class QStaticByteArrayMatcher : QStaticByteArrayMatcherBase
{
char m_pattern[N];
@@ -153,19 +158,19 @@ public:
explicit constexpr QStaticByteArrayMatcher(const char (&patternToMatch)[N]) noexcept
: QStaticByteArrayMatcherBase(patternToMatch, N - 1), m_pattern()
{
- for (uint i = 0; i < N; ++i)
+ for (size_t i = 0; i < N; ++i)
m_pattern[i] = patternToMatch[i];
}
- int indexIn(const QByteArray &haystack, int from = 0) const noexcept
+ qsizetype indexIn(const QByteArray &haystack, qsizetype from = 0) const noexcept
{ return this->indexOfIn(m_pattern, N - 1, haystack.data(), haystack.size(), from); }
- int indexIn(const char *haystack, int hlen, int from = 0) const noexcept
+ qsizetype indexIn(const char *haystack, qsizetype hlen, qsizetype from = 0) const noexcept
{ return this->indexOfIn(m_pattern, N - 1, haystack, hlen, from); }
- QByteArray pattern() const { return QByteArray(m_pattern, int(N - 1)); }
+ QByteArray pattern() const { return QByteArray(m_pattern, qsizetype(N - 1)); }
};
-template <uint N>
+template <size_t N>
constexpr QStaticByteArrayMatcher<N> qMakeStaticByteArrayMatcher(const char (&pattern)[N]) noexcept
{ return QStaticByteArrayMatcher<N>(pattern); }
diff --git a/tests/auto/corelib/text/qbytearraymatcher/tst_qbytearraymatcher.cpp b/tests/auto/corelib/text/qbytearraymatcher/tst_qbytearraymatcher.cpp
index 42b978acc8..aa2b928e95 100644
--- a/tests/auto/corelib/text/qbytearraymatcher/tst_qbytearraymatcher.cpp
+++ b/tests/auto/corelib/text/qbytearraymatcher/tst_qbytearraymatcher.cpp
@@ -31,6 +31,13 @@
#include <qbytearraymatcher.h>
+#include <numeric>
+#include <string>
+
+#if QT_CONFIG(cxx11_future)
+# include <thread>
+#endif
+
// COM interface
#if defined(Q_OS_WIN) && defined(interface)
# undef interface
@@ -44,6 +51,7 @@ private slots:
void interface();
void indexIn();
void staticByteArrayMatcher();
+ void haystacksWithMoreThan4GiBWork();
};
void tst_QByteArrayMatcher::interface()
@@ -208,6 +216,72 @@ void tst_QByteArrayMatcher::staticByteArrayMatcher()
}
+void tst_QByteArrayMatcher::haystacksWithMoreThan4GiBWork()
+{
+#if QT_POINTER_SIZE > 4
+ // use a large needle to trigger long skips in the Boyer-Moore algorithm
+ // (to speed up the test)
+ constexpr std::string_view needle = LONG_STRING_256;
+
+ //
+ // GIVEN: a haystack with more than 4 GiB of data
+ //
+
+ // don't use QByteArray because freeSpaceAtEnd() may break reserve()
+ // semantics and a realloc is the last thing we need here
+ std::string large;
+ QElapsedTimer timer;
+ timer.start();
+ constexpr size_t GiB = 1024 * 1024 * 1024;
+ constexpr size_t BaseSize = 4 * GiB + 1;
+ try {
+ large.reserve(BaseSize + needle.size());
+ large.resize(BaseSize, '\0');
+ large.append(needle);
+ } catch (const std::bad_alloc &) {
+ QSKIP("Could not allocate 4GiB plus a couple hundred bytes of RAM.");
+ }
+ QCOMPARE(large.size(), BaseSize + needle.size());
+ qDebug("created dataset in %lld ms", timer.elapsed());
+
+# if QT_CONFIG(cxx11_future)
+ using MaybeThread = std::thread;
+# else
+ struct MaybeThread {
+ std::function<void()> func;
+ void join() { func(); }
+ };
+# endif
+
+ //
+ // WHEN: trying to match an occurrence past the 4GiB mark
+ //
+
+ qsizetype dynamicResult, staticResult;
+
+ auto t = MaybeThread{[&]{
+ QByteArrayMatcher m(needle);
+ dynamicResult = m.indexIn(large);
+ }};
+ {
+ static_assert(needle == LONG_STRING_256); // need a string literal in the following line:
+ QStaticByteArrayMatcher m(LONG_STRING_256);
+ staticResult = m.indexIn(large.data(), large.size());
+ }
+ t.join();
+
+ //
+ // THEN: the result index is not trucated
+ //
+
+ QCOMPARE(staticResult, qsizetype(BaseSize));
+ QCOMPARE(dynamicResult, qsizetype(BaseSize));
+#else
+ QSKIP("This test is 64-bit only.");
+#endif
+
+}
+
#undef LONG_STRING_256
#undef LONG_STRING_128
#undef LONG_STRING__64