diff options
Diffstat (limited to 'tests/benchmarks/corelib/text/qregexp')
-rw-r--r-- | tests/benchmarks/corelib/text/qregexp/main.cpp | 615 | ||||
-rw-r--r-- | tests/benchmarks/corelib/text/qregexp/qregexp.pro | 20 | ||||
-rw-r--r-- | tests/benchmarks/corelib/text/qregexp/qregexp.qrc | 6 |
3 files changed, 641 insertions, 0 deletions
diff --git a/tests/benchmarks/corelib/text/qregexp/main.cpp b/tests/benchmarks/corelib/text/qregexp/main.cpp new file mode 100644 index 0000000000..798b23f2b0 --- /dev/null +++ b/tests/benchmarks/corelib/text/qregexp/main.cpp @@ -0,0 +1,615 @@ +/**************************************************************************** +** +** Copyright (C) 2016 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the test suite of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:GPL-EXCEPT$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 3 as published by the Free Software +** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include <QDebug> +#include <QRegExp> +#include <QString> +#include <QFile> + +#include <qtest.h> +#ifdef HAVE_BOOST +#include <boost/regex.hpp> +#endif + +#ifdef HAVE_JSC +#include <QtScript> +#include "pcre/pcre.h" +#endif +#define ZLIB_VERSION "1.2.3.4" + +class tst_qregexp : public QObject +{ + Q_OBJECT +public: + tst_qregexp(); +private slots: + void escape_old(); + void escape_old_data() { escape_data(); } + void escape_new1(); + void escape_new1_data() { escape_data(); } + void escape_new2(); + void escape_new2_data() { escape_data(); } + void escape_new3(); + void escape_new3_data() { escape_data(); } + void escape_new4(); + void escape_new4_data() { escape_data(); } +/* + JSC outperforms everything. + Boost is less impressive then expected. + */ + void simpleFind1(); + void rangeReplace1(); + void matchReplace1(); + + void simpleFind2(); + void rangeReplace2(); + void matchReplace2(); + + void simpleFindJSC(); + void rangeReplaceJSC(); + void matchReplaceJSC(); + + void simpleFindBoost(); + void rangeReplaceBoost(); + void matchReplaceBoost(); + +/* those apply an (incorrect) regexp on entire source + (this main.cpp). JSC appears to handle this + (ab)use case best. QRegExp performs extremly bad. + */ + void horribleWrongReplace1(); + void horribleReplace1(); + void horribleReplace2(); + void horribleWrongReplace2(); + void horribleWrongReplaceJSC(); + void horribleReplaceJSC(); + void horribleWrongReplaceBoost(); + void horribleReplaceBoost(); +private: + QString str1; + QString str2; + void escape_data(); +}; + +tst_qregexp::tst_qregexp() + :QObject() + ,str1("We are all happy monkeys") +{ + QFile f(":/main.cpp"); + f.open(QFile::ReadOnly); + str2=f.readAll(); +} + +static void verify(const QString "ed, const QString &expected) +{ + if (quoted != expected) + qDebug() << "ERROR:" << quoted << expected; +} + +void tst_qregexp::escape_data() +{ + QTest::addColumn<QString>("pattern"); + QTest::addColumn<QString>("expected"); + + QTest::newRow("escape 0") << "Hello world" << "Hello world"; + QTest::newRow("escape 1") << "(Hello world)" << "\\(Hello world\\)"; + { + QString s; + for (int i = 0; i < 10; ++i) + s += "(escape)"; + QTest::newRow("escape 10") << s << QRegExp::escape(s); + } + { + QString s; + for (int i = 0; i < 100; ++i) + s += "(escape)"; + QTest::newRow("escape 100") << s << QRegExp::escape(s); + } +} + +void tst_qregexp::escape_old() +{ + QFETCH(QString, pattern); + QFETCH(QString, expected); + + QBENCHMARK { + static const char meta[] = "$()*+.?[\\]^{|}"; + QString quoted = pattern; + int i = 0; + + while (i < quoted.length()) { + if (strchr(meta, quoted.at(i).toLatin1()) != 0) + quoted.insert(i++, QLatin1Char('\\')); + ++i; + } + + verify(quoted, expected); + } +} + +void tst_qregexp::escape_new1() +{ + QFETCH(QString, pattern); + QFETCH(QString, expected); + + QBENCHMARK { + QString quoted; + const int count = pattern.count(); + quoted.reserve(count * 2); + const QLatin1Char backslash('\\'); + for (int i = 0; i < count; i++) { + switch (pattern.at(i).toLatin1()) { + case '$': + case '(': + case ')': + case '*': + case '+': + case '.': + case '?': + case '[': + case '\\': + case ']': + case '^': + case '{': + case '|': + case '}': + quoted.append(backslash); + } + quoted.append(pattern.at(i)); + } + verify(quoted, expected); + } +} + +void tst_qregexp::escape_new2() +{ + QFETCH(QString, pattern); + QFETCH(QString, expected); + + QBENCHMARK { + int count = pattern.count(); + const QLatin1Char backslash('\\'); + QString quoted(count * 2, backslash); + const QChar *patternData = pattern.data(); + QChar *quotedData = quoted.data(); + int escaped = 0; + for ( ; --count >= 0; ++patternData) { + const QChar c = *patternData; + switch (c.unicode()) { + case '$': + case '(': + case ')': + case '*': + case '+': + case '.': + case '?': + case '[': + case '\\': + case ']': + case '^': + case '{': + case '|': + case '}': + ++escaped; + ++quotedData; + } + *quotedData = c; + ++quotedData; + } + quoted.resize(pattern.size() + escaped); + + verify(quoted, expected); + } +} + +void tst_qregexp::escape_new3() +{ + QFETCH(QString, pattern); + QFETCH(QString, expected); + + QBENCHMARK { + QString quoted; + const int count = pattern.count(); + quoted.reserve(count * 2); + const QLatin1Char backslash('\\'); + for (int i = 0; i < count; i++) { + switch (pattern.at(i).toLatin1()) { + case '$': + case '(': + case ')': + case '*': + case '+': + case '.': + case '?': + case '[': + case '\\': + case ']': + case '^': + case '{': + case '|': + case '}': + quoted += backslash; + } + quoted += pattern.at(i); + } + + verify(quoted, expected); + } +} + + +static inline bool needsEscaping(int c) +{ + switch (c) { + case '$': + case '(': + case ')': + case '*': + case '+': + case '.': + case '?': + case '[': + case '\\': + case ']': + case '^': + case '{': + case '|': + case '}': + return true; + } + return false; +} + +void tst_qregexp::escape_new4() +{ + QFETCH(QString, pattern); + QFETCH(QString, expected); + + QBENCHMARK { + const int n = pattern.size(); + const QChar *patternData = pattern.data(); + // try to prevent copy if no escape is needed + int i = 0; + for (int i = 0; i != n; ++i) { + const QChar c = patternData[i]; + if (needsEscaping(c.unicode())) + break; + } + if (i == n) { + verify(pattern, expected); + // no escaping needed, "return pattern" should be done here. + return; + } + const QLatin1Char backslash('\\'); + QString quoted(n * 2, backslash); + QChar *quotedData = quoted.data(); + for (int j = 0; j != i; ++j) + *quotedData++ = *patternData++; + int escaped = 0; + for (; i != n; ++i) { + const QChar c = *patternData; + if (needsEscaping(c.unicode())) { + ++escaped; + ++quotedData; + } + *quotedData = c; + ++quotedData; + ++patternData; + } + quoted.resize(n + escaped); + verify(quoted, expected); + // "return quoted" + } +} + + +void tst_qregexp::simpleFind1() +{ + int roff; + QRegExp rx("happy"); + rx.setPatternSyntax(QRegExp::RegExp); + QBENCHMARK{ + roff = rx.indexIn(str1); + } + QCOMPARE(roff, 11); +} + +void tst_qregexp::rangeReplace1() +{ + QString r; + QRegExp rx("[a-f]"); + rx.setPatternSyntax(QRegExp::RegExp); + QBENCHMARK{ + r = QString(str1).replace(rx, "-"); + } + QCOMPARE(r, QString("W- -r- -ll h-ppy monk-ys")); +} + +void tst_qregexp::matchReplace1() +{ + QString r; + QRegExp rx("[^a-f]*([a-f]+)[^a-f]*"); + rx.setPatternSyntax(QRegExp::RegExp); + QBENCHMARK{ + r = QString(str1).replace(rx, "\\1"); + } + QCOMPARE(r, QString("eaeaae")); +} + +void tst_qregexp::horribleWrongReplace1() +{ + QString r; + QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*"); + rx.setPatternSyntax(QRegExp::RegExp); + QBENCHMARK{ + r = QString(str2).replace(rx, "\\1.\\2.\\3"); + } + QCOMPARE(r, str2); +} + +void tst_qregexp::horribleReplace1() +{ + QString r; + QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*"); + rx.setPatternSyntax(QRegExp::RegExp); + QBENCHMARK{ + r = QString(str2).replace(rx, "\\1.\\2.\\3"); + } + QCOMPARE(r, QString("1.2.3")); +} + + +void tst_qregexp::simpleFind2() +{ + int roff; + QRegExp rx("happy"); + rx.setPatternSyntax(QRegExp::RegExp2); + QBENCHMARK{ + roff = rx.indexIn(str1); + } + QCOMPARE(roff, 11); +} + +void tst_qregexp::rangeReplace2() +{ + QString r; + QRegExp rx("[a-f]"); + rx.setPatternSyntax(QRegExp::RegExp2); + QBENCHMARK{ + r = QString(str1).replace(rx, "-"); + } + QCOMPARE(r, QString("W- -r- -ll h-ppy monk-ys")); +} + +void tst_qregexp::matchReplace2() +{ + QString r; + QRegExp rx("[^a-f]*([a-f]+)[^a-f]*"); + rx.setPatternSyntax(QRegExp::RegExp2); + QBENCHMARK{ + r = QString(str1).replace(rx, "\\1"); + } + QCOMPARE(r, QString("eaeaae")); +} + +void tst_qregexp::horribleWrongReplace2() +{ + QString r; + QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*"); + rx.setPatternSyntax(QRegExp::RegExp2); + QBENCHMARK{ + r = QString(str2).replace(rx, "\\1.\\2.\\3"); + } + QCOMPARE(r, str2); +} + +void tst_qregexp::horribleReplace2() +{ + QString r; + QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*"); + rx.setPatternSyntax(QRegExp::RegExp2); + QBENCHMARK{ + r = QString(str2).replace(rx, "\\1.\\2.\\3"); + } + QCOMPARE(r, QString("1.2.3")); +} +void tst_qregexp::simpleFindJSC() +{ +#ifdef HAVE_JSC + int numr; + const char * errmsg=" "; + QString rxs("happy"); + JSRegExp *rx = jsRegExpCompile(rxs.utf16(), rxs.length(), JSRegExpDoNotIgnoreCase, JSRegExpSingleLine, 0, &errmsg); + QVERIFY(rx != 0); + QString s(str1); + int offsetVector[3]; + QBENCHMARK{ + numr = jsRegExpExecute(rx, s.utf16(), s.length(), 0, offsetVector, 3); + } + jsRegExpFree(rx); + QCOMPARE(numr, 1); + QCOMPARE(offsetVector[0], 11); +#else + QSKIP("JSC is not enabled for this platform"); +#endif +} + +void tst_qregexp::rangeReplaceJSC() +{ +#ifdef HAVE_JSC + QScriptValue r; + QScriptEngine engine; + engine.globalObject().setProperty("s", str1); + QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/[a-f]/g, '-') } )"); + QVERIFY(replaceFunc.isFunction()); + QBENCHMARK{ + r = replaceFunc.call(QScriptValue()); + } + QCOMPARE(r.toString(), QString("W- -r- -ll h-ppy monk-ys")); +#else + QSKIP("JSC is not enabled for this platform"); +#endif +} + +void tst_qregexp::matchReplaceJSC() +{ +#ifdef HAVE_JSC + QScriptValue r; + QScriptEngine engine; + engine.globalObject().setProperty("s", str1); + QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/[^a-f]*([a-f]+)[^a-f]*/g, '$1') } )"); + QVERIFY(replaceFunc.isFunction()); + QBENCHMARK{ + r = replaceFunc.call(QScriptValue()); + } + QCOMPARE(r.toString(), QString("eaeaae")); +#else + QSKIP("JSC is not enabled for this platform"); +#endif +} + +void tst_qregexp::horribleWrongReplaceJSC() +{ +#ifdef HAVE_JSC + QScriptValue r; + QScriptEngine engine; + engine.globalObject().setProperty("s", str2); + QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/.*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*/gm, '$1.$2.$3') } )"); + QVERIFY(replaceFunc.isFunction()); + QBENCHMARK{ + r = replaceFunc.call(QScriptValue()); + } + QCOMPARE(r.toString(), str2); +#else + QSKIP("JSC is not enabled for this platform"); +#endif +} + +void tst_qregexp::horribleReplaceJSC() +{ +#ifdef HAVE_JSC + QScriptValue r; + QScriptEngine engine; + // the m flag doesn't actually work here; dunno + engine.globalObject().setProperty("s", str2.replace('\n', ' ')); + QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/.*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*/gm, '$1.$2.$3') } )"); + QVERIFY(replaceFunc.isFunction()); + QBENCHMARK{ + r = replaceFunc.call(QScriptValue()); + } + QCOMPARE(r.toString(), QString("1.2.3")); +#else + QSKIP("JSC is not enabled for this platform"); +#endif +} + +void tst_qregexp::simpleFindBoost() +{ +#ifdef HAVE_BOOST + int roff; + boost::regex rx ("happy", boost::regex_constants::perl); + std::string s = str1.toStdString(); + std::string::const_iterator start, end; + start = s.begin(); + end = s.end(); + boost::match_flag_type flags = boost::match_default; + QBENCHMARK{ + boost::match_results<std::string::const_iterator> what; + regex_search(start, end, what, rx, flags); + roff = (what[0].first)-start; + } + QCOMPARE(roff, 11); +#else + QSKIP("Boost is not enabled for this platform"); +#endif + +} + +void tst_qregexp::rangeReplaceBoost() +{ +#ifdef HAVE_BOOST + boost::regex pattern ("[a-f]", boost::regex_constants::perl); + std::string s = str1.toStdString(); + std::string r; + QBENCHMARK{ + r = boost::regex_replace (s, pattern, "-"); + } + QCOMPARE(r, std::string("W- -r- -ll h-ppy monk-ys")); +#else + QSKIP("Boost is not enabled for this platform"); +#endif +} + +void tst_qregexp::matchReplaceBoost() +{ +#ifdef HAVE_BOOST + boost::regex pattern ("[^a-f]*([a-f]+)[^a-f]*",boost::regex_constants::perl); + std::string s = str1.toStdString(); + std::string r; + QBENCHMARK{ + r = boost::regex_replace (s, pattern, "$1"); + } + QCOMPARE(r, std::string("eaeaae")); +#else + QSKIP("Boost is not enabled for this platform"); +#endif +} + +void tst_qregexp::horribleWrongReplaceBoost() +{ +#ifdef HAVE_BOOST + boost::regex pattern (".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*", boost::regex_constants::perl); + std::string s = str2.toStdString(); + std::string r; + QBENCHMARK{ + r = boost::regex_replace (s, pattern, "$1.$2.$3"); + } + QCOMPARE(r, s); +#else + QSKIP("Boost is not enabled for this platform"); +#endif +} + +void tst_qregexp::horribleReplaceBoost() +{ +#ifdef HAVE_BOOST + boost::regex pattern (".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*", boost::regex_constants::perl); + std::string s = str2.toStdString(); + std::string r; + QBENCHMARK{ + r = boost::regex_replace (s, pattern, "$1.$2.$3"); + } + QCOMPARE(r, std::string("1.2.3")); +#else + QSKIP("Boost is not enabled for this platform"); +#endif +} + +QTEST_MAIN(tst_qregexp) + +#include "main.moc" diff --git a/tests/benchmarks/corelib/text/qregexp/qregexp.pro b/tests/benchmarks/corelib/text/qregexp/qregexp.pro new file mode 100644 index 0000000000..f64ae781a2 --- /dev/null +++ b/tests/benchmarks/corelib/text/qregexp/qregexp.pro @@ -0,0 +1,20 @@ +TEMPLATE = app +TARGET = tst_bench_qregexp +QT = core testlib +CONFIG += release exceptions + +SOURCES += main.cpp +RESOURCES += qregexp.qrc + +qtHaveModule(script):!pcre { + DEFINES += HAVE_JSC + QT += script +} + +!qnx { + exists($$[QT_SYSROOT]/usr/include/boost/regex.hpp) { + DEFINES += HAVE_BOOST + LIBS += -lboost_regex + } +} + diff --git a/tests/benchmarks/corelib/text/qregexp/qregexp.qrc b/tests/benchmarks/corelib/text/qregexp/qregexp.qrc new file mode 100644 index 0000000000..a7fe13c035 --- /dev/null +++ b/tests/benchmarks/corelib/text/qregexp/qregexp.qrc @@ -0,0 +1,6 @@ +<!DOCTYPE RCC><RCC version="1.0"> +<qresource> + <file>main.cpp</file> +</qresource> +</RCC> + |