/**************************************************************************** ** ** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies). ** All rights reserved. ** Contact: Nokia Corporation (qt-info@nokia.com) ** ** This file is part of the test suite of the Qt Toolkit. ** ** $QT_BEGIN_LICENSE:LGPL$ ** GNU Lesser General Public License Usage ** This file may be used under the terms of the GNU Lesser General Public ** License version 2.1 as published by the Free Software Foundation and ** appearing in the file LICENSE.LGPL included in the packaging of this ** file. Please review the following information to ensure the GNU Lesser ** General Public License version 2.1 requirements will be met: ** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. ** ** In addition, as a special exception, Nokia gives you certain additional ** rights. These rights are described in the Nokia Qt LGPL Exception ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. ** ** GNU General Public License Usage ** Alternatively, this file may be used under the terms of the GNU General ** Public License version 3.0 as published by the Free Software Foundation ** and appearing in the file LICENSE.GPL included in the packaging of this ** file. Please review the following information to ensure the GNU General ** Public License version 3.0 requirements will be met: ** http://www.gnu.org/copyleft/gpl.html. ** ** Other Usage ** Alternatively, this file may be used in accordance with the terms and ** conditions contained in a signed written agreement between you and Nokia. ** ** ** ** ** ** $QT_END_LICENSE$ ** ****************************************************************************/ #include #include #include #include #include #ifdef HAVE_BOOST #include #endif #ifdef HAVE_JSC #include #include "pcre/pcre.h" #endif #define ZLIB_VERSION "1.2.3.4" class tst_qregexp : public QObject { Q_OBJECT public: tst_qregexp(); private slots: void escape_old(); void escape_old_data() { escape_data(); } void escape_new1(); void escape_new1_data() { escape_data(); } void escape_new2(); void escape_new2_data() { escape_data(); } void escape_new3(); void escape_new3_data() { escape_data(); } void escape_new4(); void escape_new4_data() { escape_data(); } /* JSC outperforms everything. Boost is less impressive then expected. */ void simpleFind1(); void rangeReplace1(); void matchReplace1(); void simpleFind2(); void rangeReplace2(); void matchReplace2(); #ifdef HAVE_JSC void simpleFindJSC(); void rangeReplaceJSC(); void matchReplaceJSC(); #endif #ifdef HAVE_BOOST void simpleFindBoost(); void rangeReplaceBoost(); void matchReplaceBoost(); #endif /* those apply an (incorrect) regexp on entire source (this main.cpp). JSC appears to handle this (ab)use case best. QRegExp performs extremly bad. */ void horribleWrongReplace1(); void horribleReplace1(); void horribleReplace2(); void horribleWrongReplace2(); #ifdef HAVE_JSC void horribleWrongReplaceJSC(); void horribleReplaceJSC(); #endif #ifdef HAVE_BOOST void horribleWrongReplaceBoost(); void horribleReplaceBoost(); #endif private: QString str1; QString str2; void escape_data(); }; tst_qregexp::tst_qregexp() :QObject() ,str1("We are all happy monkeys") { QFile f(":/main.cpp"); f.open(QFile::ReadOnly); str2=f.readAll(); } static void verify(const QString "ed, const QString &expected) { if (quoted != expected) qDebug() << "ERROR:" << quoted << expected; } void tst_qregexp::escape_data() { QTest::addColumn("pattern"); QTest::addColumn("expected"); QTest::newRow("escape 0") << "Hello world" << "Hello world"; QTest::newRow("escape 1") << "(Hello world)" << "\\(Hello world\\)"; { QString s; for (int i = 0; i < 10; ++i) s += "(escape)"; QTest::newRow("escape 10") << s << QRegExp::escape(s); } { QString s; for (int i = 0; i < 100; ++i) s += "(escape)"; QTest::newRow("escape 100") << s << QRegExp::escape(s); } } void tst_qregexp::escape_old() { QFETCH(QString, pattern); QFETCH(QString, expected); QBENCHMARK { static const char meta[] = "$()*+.?[\\]^{|}"; QString quoted = pattern; int i = 0; while (i < quoted.length()) { if (strchr(meta, quoted.at(i).toLatin1()) != 0) quoted.insert(i++, QLatin1Char('\\')); ++i; } verify(quoted, expected); } } void tst_qregexp::escape_new1() { QFETCH(QString, pattern); QFETCH(QString, expected); QBENCHMARK { QString quoted; const int count = pattern.count(); quoted.reserve(count * 2); const QLatin1Char backslash('\\'); for (int i = 0; i < count; i++) { switch (pattern.at(i).toLatin1()) { case '$': case '(': case ')': case '*': case '+': case '.': case '?': case '[': case '\\': case ']': case '^': case '{': case '|': case '}': quoted.append(backslash); } quoted.append(pattern.at(i)); } verify(quoted, expected); } } void tst_qregexp::escape_new2() { QFETCH(QString, pattern); QFETCH(QString, expected); QBENCHMARK { int count = pattern.count(); const QLatin1Char backslash('\\'); QString quoted(count * 2, backslash); const QChar *patternData = pattern.data(); QChar *quotedData = quoted.data(); int escaped = 0; for ( ; --count >= 0; ++patternData) { const QChar c = *patternData; switch (c.unicode()) { case '$': case '(': case ')': case '*': case '+': case '.': case '?': case '[': case '\\': case ']': case '^': case '{': case '|': case '}': ++escaped; ++quotedData; } *quotedData = c; ++quotedData; } quoted.resize(pattern.size() + escaped); verify(quoted, expected); } } void tst_qregexp::escape_new3() { QFETCH(QString, pattern); QFETCH(QString, expected); QBENCHMARK { QString quoted; const int count = pattern.count(); quoted.reserve(count * 2); const QLatin1Char backslash('\\'); for (int i = 0; i < count; i++) { switch (pattern.at(i).toLatin1()) { case '$': case '(': case ')': case '*': case '+': case '.': case '?': case '[': case '\\': case ']': case '^': case '{': case '|': case '}': quoted += backslash; } quoted += pattern.at(i); } verify(quoted, expected); } } static inline bool needsEscaping(int c) { switch (c) { case '$': case '(': case ')': case '*': case '+': case '.': case '?': case '[': case '\\': case ']': case '^': case '{': case '|': case '}': return true; } return false; } void tst_qregexp::escape_new4() { QFETCH(QString, pattern); QFETCH(QString, expected); QBENCHMARK { const int n = pattern.size(); const QChar *patternData = pattern.data(); // try to prevent copy if no escape is needed int i = 0; for (int i = 0; i != n; ++i) { const QChar c = patternData[i]; if (needsEscaping(c.unicode())) break; } if (i == n) { verify(pattern, expected); // no escaping needed, "return pattern" should be done here. return; } const QLatin1Char backslash('\\'); QString quoted(n * 2, backslash); QChar *quotedData = quoted.data(); for (int j = 0; j != i; ++j) *quotedData++ = *patternData++; int escaped = 0; for (; i != n; ++i) { const QChar c = *patternData; if (needsEscaping(c.unicode())) { ++escaped; ++quotedData; } *quotedData = c; ++quotedData; ++patternData; } quoted.resize(n + escaped); verify(quoted, expected); // "return quoted" } } void tst_qregexp::simpleFind1() { int roff; QRegExp rx("happy"); rx.setPatternSyntax(QRegExp::RegExp); QBENCHMARK{ roff = rx.indexIn(str1); } QCOMPARE(roff, 11); } void tst_qregexp::rangeReplace1() { QString r; QRegExp rx("[a-f]"); rx.setPatternSyntax(QRegExp::RegExp); QBENCHMARK{ r = QString(str1).replace(rx, "-"); } QCOMPARE(r, QString("W- -r- -ll h-ppy monk-ys")); } void tst_qregexp::matchReplace1() { QString r; QRegExp rx("[^a-f]*([a-f]+)[^a-f]*"); rx.setPatternSyntax(QRegExp::RegExp); QBENCHMARK{ r = QString(str1).replace(rx, "\\1"); } QCOMPARE(r, QString("eaeaae")); } void tst_qregexp::horribleWrongReplace1() { QString r; QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*"); rx.setPatternSyntax(QRegExp::RegExp); QBENCHMARK{ r = QString(str2).replace(rx, "\\1.\\2.\\3"); } QCOMPARE(r, str2); } void tst_qregexp::horribleReplace1() { QString r; QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*"); rx.setPatternSyntax(QRegExp::RegExp); QBENCHMARK{ r = QString(str2).replace(rx, "\\1.\\2.\\3"); } QCOMPARE(r, QString("1.2.3")); } void tst_qregexp::simpleFind2() { int roff; QRegExp rx("happy"); rx.setPatternSyntax(QRegExp::RegExp2); QBENCHMARK{ roff = rx.indexIn(str1); } QCOMPARE(roff, 11); } void tst_qregexp::rangeReplace2() { QString r; QRegExp rx("[a-f]"); rx.setPatternSyntax(QRegExp::RegExp2); QBENCHMARK{ r = QString(str1).replace(rx, "-"); } QCOMPARE(r, QString("W- -r- -ll h-ppy monk-ys")); } void tst_qregexp::matchReplace2() { QString r; QRegExp rx("[^a-f]*([a-f]+)[^a-f]*"); rx.setPatternSyntax(QRegExp::RegExp2); QBENCHMARK{ r = QString(str1).replace(rx, "\\1"); } QCOMPARE(r, QString("eaeaae")); } void tst_qregexp::horribleWrongReplace2() { QString r; QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*"); rx.setPatternSyntax(QRegExp::RegExp2); QBENCHMARK{ r = QString(str2).replace(rx, "\\1.\\2.\\3"); } QCOMPARE(r, str2); } void tst_qregexp::horribleReplace2() { QString r; QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*"); rx.setPatternSyntax(QRegExp::RegExp2); QBENCHMARK{ r = QString(str2).replace(rx, "\\1.\\2.\\3"); } QCOMPARE(r, QString("1.2.3")); } #ifdef HAVE_JSC void tst_qregexp::simpleFindJSC() { int numr; const char * errmsg=" "; QString rxs("happy"); JSRegExp *rx = jsRegExpCompile(rxs.utf16(), rxs.length(), JSRegExpDoNotIgnoreCase, JSRegExpSingleLine, 0, &errmsg); QVERIFY(rx != 0); QString s(str1); int offsetVector[3]; QBENCHMARK{ numr = jsRegExpExecute(rx, s.utf16(), s.length(), 0, offsetVector, 3); } jsRegExpFree(rx); QCOMPARE(numr, 1); QCOMPARE(offsetVector[0], 11); } void tst_qregexp::rangeReplaceJSC() { QScriptValue r; QScriptEngine engine; engine.globalObject().setProperty("s", str1); QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/[a-f]/g, '-') } )"); QVERIFY(replaceFunc.isFunction()); QBENCHMARK{ r = replaceFunc.call(QScriptValue()); } QCOMPARE(r.toString(), QString("W- -r- -ll h-ppy monk-ys")); } void tst_qregexp::matchReplaceJSC() { QScriptValue r; QScriptEngine engine; engine.globalObject().setProperty("s", str1); QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/[^a-f]*([a-f]+)[^a-f]*/g, '$1') } )"); QVERIFY(replaceFunc.isFunction()); QBENCHMARK{ r = replaceFunc.call(QScriptValue()); } QCOMPARE(r.toString(), QString("eaeaae")); } void tst_qregexp::horribleWrongReplaceJSC() { QScriptValue r; QScriptEngine engine; engine.globalObject().setProperty("s", str2); QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/.*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*/gm, '$1.$2.$3') } )"); QVERIFY(replaceFunc.isFunction()); QBENCHMARK{ r = replaceFunc.call(QScriptValue()); } QCOMPARE(r.toString(), str2); } void tst_qregexp::horribleReplaceJSC() { QScriptValue r; QScriptEngine engine; // the m flag doesnt actually work here; dunno engine.globalObject().setProperty("s", str2.replace('\n', ' ')); QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/.*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*/gm, '$1.$2.$3') } )"); QVERIFY(replaceFunc.isFunction()); QBENCHMARK{ r = replaceFunc.call(QScriptValue()); } QCOMPARE(r.toString(), QString("1.2.3")); } #endif #ifdef HAVE_BOOST void tst_qregexp::simpleFindBoost(){ int roff; boost::regex rx ("happy", boost::regex_constants::perl); std::string s = str1.toStdString(); std::string::const_iterator start, end; start = s.begin(); end = s.end(); boost::match_flag_type flags = boost::match_default; QBENCHMARK{ boost::match_results what; regex_search(start, end, what, rx, flags); roff = (what[0].first)-start; } QCOMPARE(roff, 11); } void tst_qregexp::rangeReplaceBoost() { boost::regex pattern ("[a-f]", boost::regex_constants::perl); std::string s = str1.toStdString(); std::string r; QBENCHMARK{ r = boost::regex_replace (s, pattern, "-"); } QCOMPARE(r, std::string("W- -r- -ll h-ppy monk-ys")); } void tst_qregexp::matchReplaceBoost() { boost::regex pattern ("[^a-f]*([a-f]+)[^a-f]*",boost::regex_constants::perl); std::string s = str1.toStdString(); std::string r; QBENCHMARK{ r = boost::regex_replace (s, pattern, "$1"); } QCOMPARE(r, std::string("eaeaae")); } void tst_qregexp::horribleWrongReplaceBoost() { boost::regex pattern (".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*", boost::regex_constants::perl); std::string s = str2.toStdString(); std::string r; QBENCHMARK{ r = boost::regex_replace (s, pattern, "$1.$2.$3"); } QCOMPARE(r, s); } void tst_qregexp::horribleReplaceBoost() { boost::regex pattern (".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*", boost::regex_constants::perl); std::string s = str2.toStdString(); std::string r; QBENCHMARK{ r = boost::regex_replace (s, pattern, "$1.$2.$3"); } QCOMPARE(r, std::string("1.2.3")); } #endif //HAVE_BOOST QTEST_MAIN(tst_qregexp) #include "main.moc"