summaryrefslogtreecommitdiffstats
path: root/tests/benchmarks/corelib/text/qregexp/main.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'tests/benchmarks/corelib/text/qregexp/main.cpp')
-rw-r--r--tests/benchmarks/corelib/text/qregexp/main.cpp615
1 files changed, 615 insertions, 0 deletions
diff --git a/tests/benchmarks/corelib/text/qregexp/main.cpp b/tests/benchmarks/corelib/text/qregexp/main.cpp
new file mode 100644
index 0000000000..798b23f2b0
--- /dev/null
+++ b/tests/benchmarks/corelib/text/qregexp/main.cpp
@@ -0,0 +1,615 @@
+/****************************************************************************
+**
+** Copyright (C) 2016 The Qt Company Ltd.
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of the test suite of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:GPL-EXCEPT$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3 as published by the Free Software
+** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-3.0.html.
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include <QDebug>
+#include <QRegExp>
+#include <QString>
+#include <QFile>
+
+#include <qtest.h>
+#ifdef HAVE_BOOST
+#include <boost/regex.hpp>
+#endif
+
+#ifdef HAVE_JSC
+#include <QtScript>
+#include "pcre/pcre.h"
+#endif
+#define ZLIB_VERSION "1.2.3.4"
+
+class tst_qregexp : public QObject
+{
+ Q_OBJECT
+public:
+ tst_qregexp();
+private slots:
+ void escape_old();
+ void escape_old_data() { escape_data(); }
+ void escape_new1();
+ void escape_new1_data() { escape_data(); }
+ void escape_new2();
+ void escape_new2_data() { escape_data(); }
+ void escape_new3();
+ void escape_new3_data() { escape_data(); }
+ void escape_new4();
+ void escape_new4_data() { escape_data(); }
+/*
+ JSC outperforms everything.
+ Boost is less impressive then expected.
+ */
+ void simpleFind1();
+ void rangeReplace1();
+ void matchReplace1();
+
+ void simpleFind2();
+ void rangeReplace2();
+ void matchReplace2();
+
+ void simpleFindJSC();
+ void rangeReplaceJSC();
+ void matchReplaceJSC();
+
+ void simpleFindBoost();
+ void rangeReplaceBoost();
+ void matchReplaceBoost();
+
+/* those apply an (incorrect) regexp on entire source
+ (this main.cpp). JSC appears to handle this
+ (ab)use case best. QRegExp performs extremly bad.
+ */
+ void horribleWrongReplace1();
+ void horribleReplace1();
+ void horribleReplace2();
+ void horribleWrongReplace2();
+ void horribleWrongReplaceJSC();
+ void horribleReplaceJSC();
+ void horribleWrongReplaceBoost();
+ void horribleReplaceBoost();
+private:
+ QString str1;
+ QString str2;
+ void escape_data();
+};
+
+tst_qregexp::tst_qregexp()
+ :QObject()
+ ,str1("We are all happy monkeys")
+{
+ QFile f(":/main.cpp");
+ f.open(QFile::ReadOnly);
+ str2=f.readAll();
+}
+
+static void verify(const QString &quoted, const QString &expected)
+{
+ if (quoted != expected)
+ qDebug() << "ERROR:" << quoted << expected;
+}
+
+void tst_qregexp::escape_data()
+{
+ QTest::addColumn<QString>("pattern");
+ QTest::addColumn<QString>("expected");
+
+ QTest::newRow("escape 0") << "Hello world" << "Hello world";
+ QTest::newRow("escape 1") << "(Hello world)" << "\\(Hello world\\)";
+ {
+ QString s;
+ for (int i = 0; i < 10; ++i)
+ s += "(escape)";
+ QTest::newRow("escape 10") << s << QRegExp::escape(s);
+ }
+ {
+ QString s;
+ for (int i = 0; i < 100; ++i)
+ s += "(escape)";
+ QTest::newRow("escape 100") << s << QRegExp::escape(s);
+ }
+}
+
+void tst_qregexp::escape_old()
+{
+ QFETCH(QString, pattern);
+ QFETCH(QString, expected);
+
+ QBENCHMARK {
+ static const char meta[] = "$()*+.?[\\]^{|}";
+ QString quoted = pattern;
+ int i = 0;
+
+ while (i < quoted.length()) {
+ if (strchr(meta, quoted.at(i).toLatin1()) != 0)
+ quoted.insert(i++, QLatin1Char('\\'));
+ ++i;
+ }
+
+ verify(quoted, expected);
+ }
+}
+
+void tst_qregexp::escape_new1()
+{
+ QFETCH(QString, pattern);
+ QFETCH(QString, expected);
+
+ QBENCHMARK {
+ QString quoted;
+ const int count = pattern.count();
+ quoted.reserve(count * 2);
+ const QLatin1Char backslash('\\');
+ for (int i = 0; i < count; i++) {
+ switch (pattern.at(i).toLatin1()) {
+ case '$':
+ case '(':
+ case ')':
+ case '*':
+ case '+':
+ case '.':
+ case '?':
+ case '[':
+ case '\\':
+ case ']':
+ case '^':
+ case '{':
+ case '|':
+ case '}':
+ quoted.append(backslash);
+ }
+ quoted.append(pattern.at(i));
+ }
+ verify(quoted, expected);
+ }
+}
+
+void tst_qregexp::escape_new2()
+{
+ QFETCH(QString, pattern);
+ QFETCH(QString, expected);
+
+ QBENCHMARK {
+ int count = pattern.count();
+ const QLatin1Char backslash('\\');
+ QString quoted(count * 2, backslash);
+ const QChar *patternData = pattern.data();
+ QChar *quotedData = quoted.data();
+ int escaped = 0;
+ for ( ; --count >= 0; ++patternData) {
+ const QChar c = *patternData;
+ switch (c.unicode()) {
+ case '$':
+ case '(':
+ case ')':
+ case '*':
+ case '+':
+ case '.':
+ case '?':
+ case '[':
+ case '\\':
+ case ']':
+ case '^':
+ case '{':
+ case '|':
+ case '}':
+ ++escaped;
+ ++quotedData;
+ }
+ *quotedData = c;
+ ++quotedData;
+ }
+ quoted.resize(pattern.size() + escaped);
+
+ verify(quoted, expected);
+ }
+}
+
+void tst_qregexp::escape_new3()
+{
+ QFETCH(QString, pattern);
+ QFETCH(QString, expected);
+
+ QBENCHMARK {
+ QString quoted;
+ const int count = pattern.count();
+ quoted.reserve(count * 2);
+ const QLatin1Char backslash('\\');
+ for (int i = 0; i < count; i++) {
+ switch (pattern.at(i).toLatin1()) {
+ case '$':
+ case '(':
+ case ')':
+ case '*':
+ case '+':
+ case '.':
+ case '?':
+ case '[':
+ case '\\':
+ case ']':
+ case '^':
+ case '{':
+ case '|':
+ case '}':
+ quoted += backslash;
+ }
+ quoted += pattern.at(i);
+ }
+
+ verify(quoted, expected);
+ }
+}
+
+
+static inline bool needsEscaping(int c)
+{
+ switch (c) {
+ case '$':
+ case '(':
+ case ')':
+ case '*':
+ case '+':
+ case '.':
+ case '?':
+ case '[':
+ case '\\':
+ case ']':
+ case '^':
+ case '{':
+ case '|':
+ case '}':
+ return true;
+ }
+ return false;
+}
+
+void tst_qregexp::escape_new4()
+{
+ QFETCH(QString, pattern);
+ QFETCH(QString, expected);
+
+ QBENCHMARK {
+ const int n = pattern.size();
+ const QChar *patternData = pattern.data();
+ // try to prevent copy if no escape is needed
+ int i = 0;
+ for (int i = 0; i != n; ++i) {
+ const QChar c = patternData[i];
+ if (needsEscaping(c.unicode()))
+ break;
+ }
+ if (i == n) {
+ verify(pattern, expected);
+ // no escaping needed, "return pattern" should be done here.
+ return;
+ }
+ const QLatin1Char backslash('\\');
+ QString quoted(n * 2, backslash);
+ QChar *quotedData = quoted.data();
+ for (int j = 0; j != i; ++j)
+ *quotedData++ = *patternData++;
+ int escaped = 0;
+ for (; i != n; ++i) {
+ const QChar c = *patternData;
+ if (needsEscaping(c.unicode())) {
+ ++escaped;
+ ++quotedData;
+ }
+ *quotedData = c;
+ ++quotedData;
+ ++patternData;
+ }
+ quoted.resize(n + escaped);
+ verify(quoted, expected);
+ // "return quoted"
+ }
+}
+
+
+void tst_qregexp::simpleFind1()
+{
+ int roff;
+ QRegExp rx("happy");
+ rx.setPatternSyntax(QRegExp::RegExp);
+ QBENCHMARK{
+ roff = rx.indexIn(str1);
+ }
+ QCOMPARE(roff, 11);
+}
+
+void tst_qregexp::rangeReplace1()
+{
+ QString r;
+ QRegExp rx("[a-f]");
+ rx.setPatternSyntax(QRegExp::RegExp);
+ QBENCHMARK{
+ r = QString(str1).replace(rx, "-");
+ }
+ QCOMPARE(r, QString("W- -r- -ll h-ppy monk-ys"));
+}
+
+void tst_qregexp::matchReplace1()
+{
+ QString r;
+ QRegExp rx("[^a-f]*([a-f]+)[^a-f]*");
+ rx.setPatternSyntax(QRegExp::RegExp);
+ QBENCHMARK{
+ r = QString(str1).replace(rx, "\\1");
+ }
+ QCOMPARE(r, QString("eaeaae"));
+}
+
+void tst_qregexp::horribleWrongReplace1()
+{
+ QString r;
+ QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*");
+ rx.setPatternSyntax(QRegExp::RegExp);
+ QBENCHMARK{
+ r = QString(str2).replace(rx, "\\1.\\2.\\3");
+ }
+ QCOMPARE(r, str2);
+}
+
+void tst_qregexp::horribleReplace1()
+{
+ QString r;
+ QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*");
+ rx.setPatternSyntax(QRegExp::RegExp);
+ QBENCHMARK{
+ r = QString(str2).replace(rx, "\\1.\\2.\\3");
+ }
+ QCOMPARE(r, QString("1.2.3"));
+}
+
+
+void tst_qregexp::simpleFind2()
+{
+ int roff;
+ QRegExp rx("happy");
+ rx.setPatternSyntax(QRegExp::RegExp2);
+ QBENCHMARK{
+ roff = rx.indexIn(str1);
+ }
+ QCOMPARE(roff, 11);
+}
+
+void tst_qregexp::rangeReplace2()
+{
+ QString r;
+ QRegExp rx("[a-f]");
+ rx.setPatternSyntax(QRegExp::RegExp2);
+ QBENCHMARK{
+ r = QString(str1).replace(rx, "-");
+ }
+ QCOMPARE(r, QString("W- -r- -ll h-ppy monk-ys"));
+}
+
+void tst_qregexp::matchReplace2()
+{
+ QString r;
+ QRegExp rx("[^a-f]*([a-f]+)[^a-f]*");
+ rx.setPatternSyntax(QRegExp::RegExp2);
+ QBENCHMARK{
+ r = QString(str1).replace(rx, "\\1");
+ }
+ QCOMPARE(r, QString("eaeaae"));
+}
+
+void tst_qregexp::horribleWrongReplace2()
+{
+ QString r;
+ QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*");
+ rx.setPatternSyntax(QRegExp::RegExp2);
+ QBENCHMARK{
+ r = QString(str2).replace(rx, "\\1.\\2.\\3");
+ }
+ QCOMPARE(r, str2);
+}
+
+void tst_qregexp::horribleReplace2()
+{
+ QString r;
+ QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*");
+ rx.setPatternSyntax(QRegExp::RegExp2);
+ QBENCHMARK{
+ r = QString(str2).replace(rx, "\\1.\\2.\\3");
+ }
+ QCOMPARE(r, QString("1.2.3"));
+}
+void tst_qregexp::simpleFindJSC()
+{
+#ifdef HAVE_JSC
+ int numr;
+ const char * errmsg=" ";
+ QString rxs("happy");
+ JSRegExp *rx = jsRegExpCompile(rxs.utf16(), rxs.length(), JSRegExpDoNotIgnoreCase, JSRegExpSingleLine, 0, &errmsg);
+ QVERIFY(rx != 0);
+ QString s(str1);
+ int offsetVector[3];
+ QBENCHMARK{
+ numr = jsRegExpExecute(rx, s.utf16(), s.length(), 0, offsetVector, 3);
+ }
+ jsRegExpFree(rx);
+ QCOMPARE(numr, 1);
+ QCOMPARE(offsetVector[0], 11);
+#else
+ QSKIP("JSC is not enabled for this platform");
+#endif
+}
+
+void tst_qregexp::rangeReplaceJSC()
+{
+#ifdef HAVE_JSC
+ QScriptValue r;
+ QScriptEngine engine;
+ engine.globalObject().setProperty("s", str1);
+ QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/[a-f]/g, '-') } )");
+ QVERIFY(replaceFunc.isFunction());
+ QBENCHMARK{
+ r = replaceFunc.call(QScriptValue());
+ }
+ QCOMPARE(r.toString(), QString("W- -r- -ll h-ppy monk-ys"));
+#else
+ QSKIP("JSC is not enabled for this platform");
+#endif
+}
+
+void tst_qregexp::matchReplaceJSC()
+{
+#ifdef HAVE_JSC
+ QScriptValue r;
+ QScriptEngine engine;
+ engine.globalObject().setProperty("s", str1);
+ QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/[^a-f]*([a-f]+)[^a-f]*/g, '$1') } )");
+ QVERIFY(replaceFunc.isFunction());
+ QBENCHMARK{
+ r = replaceFunc.call(QScriptValue());
+ }
+ QCOMPARE(r.toString(), QString("eaeaae"));
+#else
+ QSKIP("JSC is not enabled for this platform");
+#endif
+}
+
+void tst_qregexp::horribleWrongReplaceJSC()
+{
+#ifdef HAVE_JSC
+ QScriptValue r;
+ QScriptEngine engine;
+ engine.globalObject().setProperty("s", str2);
+ QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/.*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*/gm, '$1.$2.$3') } )");
+ QVERIFY(replaceFunc.isFunction());
+ QBENCHMARK{
+ r = replaceFunc.call(QScriptValue());
+ }
+ QCOMPARE(r.toString(), str2);
+#else
+ QSKIP("JSC is not enabled for this platform");
+#endif
+}
+
+void tst_qregexp::horribleReplaceJSC()
+{
+#ifdef HAVE_JSC
+ QScriptValue r;
+ QScriptEngine engine;
+ // the m flag doesn't actually work here; dunno
+ engine.globalObject().setProperty("s", str2.replace('\n', ' '));
+ QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/.*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*/gm, '$1.$2.$3') } )");
+ QVERIFY(replaceFunc.isFunction());
+ QBENCHMARK{
+ r = replaceFunc.call(QScriptValue());
+ }
+ QCOMPARE(r.toString(), QString("1.2.3"));
+#else
+ QSKIP("JSC is not enabled for this platform");
+#endif
+}
+
+void tst_qregexp::simpleFindBoost()
+{
+#ifdef HAVE_BOOST
+ int roff;
+ boost::regex rx ("happy", boost::regex_constants::perl);
+ std::string s = str1.toStdString();
+ std::string::const_iterator start, end;
+ start = s.begin();
+ end = s.end();
+ boost::match_flag_type flags = boost::match_default;
+ QBENCHMARK{
+ boost::match_results<std::string::const_iterator> what;
+ regex_search(start, end, what, rx, flags);
+ roff = (what[0].first)-start;
+ }
+ QCOMPARE(roff, 11);
+#else
+ QSKIP("Boost is not enabled for this platform");
+#endif
+
+}
+
+void tst_qregexp::rangeReplaceBoost()
+{
+#ifdef HAVE_BOOST
+ boost::regex pattern ("[a-f]", boost::regex_constants::perl);
+ std::string s = str1.toStdString();
+ std::string r;
+ QBENCHMARK{
+ r = boost::regex_replace (s, pattern, "-");
+ }
+ QCOMPARE(r, std::string("W- -r- -ll h-ppy monk-ys"));
+#else
+ QSKIP("Boost is not enabled for this platform");
+#endif
+}
+
+void tst_qregexp::matchReplaceBoost()
+{
+#ifdef HAVE_BOOST
+ boost::regex pattern ("[^a-f]*([a-f]+)[^a-f]*",boost::regex_constants::perl);
+ std::string s = str1.toStdString();
+ std::string r;
+ QBENCHMARK{
+ r = boost::regex_replace (s, pattern, "$1");
+ }
+ QCOMPARE(r, std::string("eaeaae"));
+#else
+ QSKIP("Boost is not enabled for this platform");
+#endif
+}
+
+void tst_qregexp::horribleWrongReplaceBoost()
+{
+#ifdef HAVE_BOOST
+ boost::regex pattern (".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*", boost::regex_constants::perl);
+ std::string s = str2.toStdString();
+ std::string r;
+ QBENCHMARK{
+ r = boost::regex_replace (s, pattern, "$1.$2.$3");
+ }
+ QCOMPARE(r, s);
+#else
+ QSKIP("Boost is not enabled for this platform");
+#endif
+}
+
+void tst_qregexp::horribleReplaceBoost()
+{
+#ifdef HAVE_BOOST
+ boost::regex pattern (".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*", boost::regex_constants::perl);
+ std::string s = str2.toStdString();
+ std::string r;
+ QBENCHMARK{
+ r = boost::regex_replace (s, pattern, "$1.$2.$3");
+ }
+ QCOMPARE(r, std::string("1.2.3"));
+#else
+ QSKIP("Boost is not enabled for this platform");
+#endif
+}
+
+QTEST_MAIN(tst_qregexp)
+
+#include "main.moc"