summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSona Kurazyan <sona.kurazyan@qt.io>2020-06-29 16:54:15 +0200
committerSona Kurazyan <sona.kurazyan@qt.io>2020-07-13 10:53:23 +0200
commit361dc074f2301b4b68435c05ccaa7279c0170776 (patch)
tree5b22e926cae55437c46aa9507d8dd36bb252ebb0
parentac14858e85cfee06c1e19843b92d50e38bc969dd (diff)
Move QRegExp and its remaining mentions out of QtCore
Task-number: QTBUG-85235 Change-Id: Ibd6c98d952c1bb9916b64715c6430fb0d3fe3843 Reviewed-by: Lars Knoll <lars.knoll@qt.io>
-rw-r--r--qmake/.prev_CMakeLists.txt1
-rw-r--r--qmake/CMakeLists.txt2
-rw-r--r--qmake/Makefile.unix6
-rw-r--r--qmake/Makefile.win321
-rw-r--r--qmake/doc/src/qmake-manual.qdoc3
-rw-r--r--qmake/generators/makefile.cpp1
-rw-r--r--qmake/main.cpp2
-rw-r--r--qmake/qmake.pro2
-rw-r--r--qmake/qmake_pch.h1
-rw-r--r--src/corelib/.prev_CMakeLists.txt1
-rw-r--r--src/corelib/CMakeLists.txt1
-rw-r--r--src/corelib/doc/snippets/code/src_corelib_text_qregexp.cpp243
-rw-r--r--src/corelib/text/qregexp.cpp5039
-rw-r--r--src/corelib/text/qregexp.h151
-rw-r--r--src/corelib/text/text.pri2
-rw-r--r--src/tools/uic/qclass_lib_map.h1
-rw-r--r--tests/auto/corelib/text/.prev_CMakeLists.txt1
-rw-r--r--tests/auto/corelib/text/CMakeLists.txt1
-rw-r--r--tests/auto/corelib/text/qregexp/.gitignore1
-rw-r--r--tests/auto/corelib/text/qregexp/CMakeLists.txt24
-rw-r--r--tests/auto/corelib/text/qregexp/data/qdatastream_4.9.binbin30 -> 0 bytes
-rw-r--r--tests/auto/corelib/text/qregexp/data/qdatastream_5.0.binbin30 -> 0 bytes
-rw-r--r--tests/auto/corelib/text/qregexp/qregexp.pro5
-rw-r--r--tests/auto/corelib/text/qregexp/qregexp.qrc6
-rw-r--r--tests/auto/corelib/text/qregexp/tst_qregexp.cpp1726
-rw-r--r--tests/auto/corelib/text/qstring/tst_qstring.cpp2
-rw-r--r--tests/auto/corelib/text/text.pro1
-rw-r--r--tests/benchmarks/corelib/text/qregexp/CMakeLists.txt46
-rw-r--r--tests/benchmarks/corelib/text/qregexp/main.cpp615
-rw-r--r--tests/benchmarks/corelib/text/qregexp/qregexp.pro20
-rw-r--r--tests/benchmarks/corelib/text/qregexp/qregexp.qrc6
31 files changed, 5 insertions, 7906 deletions
diff --git a/qmake/.prev_CMakeLists.txt b/qmake/.prev_CMakeLists.txt
index 2bfc56b37f..127da3d71f 100644
--- a/qmake/.prev_CMakeLists.txt
+++ b/qmake/.prev_CMakeLists.txt
@@ -77,7 +77,6 @@ qt_add_tool(${target_name}
../src/corelib/text/qchar.h
../src/corelib/text/qlocale.cpp ../src/corelib/text/qlocale.h
../src/corelib/text/qlocale_tools.cpp ../src/corelib/text/qlocale_tools_p.h
- ../src/corelib/text/qregexp.cpp ../src/corelib/text/qregexp.h
../src/corelib/text/qregularexpression.cpp ../src/corelib/text/qregularexpression.h
../src/corelib/text/qstring.cpp ../src/corelib/text/qstring.h
../src/corelib/text/qstringbuilder.cpp ../src/corelib/text/qstringbuilder.h
diff --git a/qmake/CMakeLists.txt b/qmake/CMakeLists.txt
index 9931a2e14a..2a64d49c54 100644
--- a/qmake/CMakeLists.txt
+++ b/qmake/CMakeLists.txt
@@ -85,7 +85,6 @@ qt_add_tool(${target_name}
../src/corelib/text/qchar.h
../src/corelib/text/qlocale.cpp ../src/corelib/text/qlocale.h
../src/corelib/text/qlocale_tools.cpp ../src/corelib/text/qlocale_tools_p.h
- ../src/corelib/text/qregexp.cpp ../src/corelib/text/qregexp.h
../src/corelib/text/qregularexpression.cpp ../src/corelib/text/qregularexpression.h
../src/corelib/text/qstring.cpp ../src/corelib/text/qstring.h
../src/corelib/text/qstringbuilder.cpp ../src/corelib/text/qstringbuilder.h
@@ -114,7 +113,6 @@ qt_add_tool(${target_name}
../src/corelib/text/qlocale.cpp ../src/corelib/text/qlocale.h
../src/corelib/text/qlocale_tools.cpp ../src/corelib/text/qlocale_tools_p.h
../src/corelib/tools/qmap.cpp ../src/corelib/tools/qmap.h
- ../src/corelib/text/qregexp.cpp ../src/corelib/text/qregexp.h
../src/corelib/text/qregularexpression.cpp ../src/corelib/text/qregularexpression.h
../src/corelib/tools/qringbuffer.cpp # special case
../src/corelib/text/qstring.cpp ../src/corelib/text/qstring.h
diff --git a/qmake/Makefile.unix b/qmake/Makefile.unix
index 660a21daa0..75d3fe3ca7 100644
--- a/qmake/Makefile.unix
+++ b/qmake/Makefile.unix
@@ -30,7 +30,7 @@ QOBJS = \
qarraydata.o qbitarray.o qbytearray.o qbytearraylist.o qbytearraymatcher.o \
qcalendar.o qgregoriancalendar.o qromancalendar.o \
qcryptographichash.o qdatetime.o qhash.o \
- qlocale.o qlocale_tools.o qmap.o qregularexpression.o qregexp.o qringbuffer.o \
+ qlocale.o qlocale_tools.o qmap.o qregularexpression.o qringbuffer.o \
qstringbuilder.o qstring.o qstringconverter.o qstringlist.o qversionnumber.o \
qvsnprintf.o \
pcre2_auto_possess.o pcre2_chartables.o pcre2_compile.o pcre2_config.o \
@@ -116,7 +116,6 @@ DEPEND_SRC = \
$(SOURCE_PATH)/src/corelib/text/qlocale.cpp \
$(SOURCE_PATH)/src/corelib/text/qlocale_tools.cpp \
$(SOURCE_PATH)/src/corelib/text/qregularexpression.cpp \
- $(SOURCE_PATH)/src/corelib/text/qregexp.cpp \
$(SOURCE_PATH)/src/corelib/text/qstringbuilder.cpp \
$(SOURCE_PATH)/src/corelib/text/qstringconverter.cpp \
$(SOURCE_PATH)/src/corelib/text/qstring.cpp \
@@ -449,9 +448,6 @@ qtemporaryfile.o: $(SOURCE_PATH)/src/corelib/io/qtemporaryfile.cpp
qregularexpression.o: $(SOURCE_PATH)/src/corelib/text/qregularexpression.cpp
$(CXX) -c -o $@ $(CXXFLAGS) $<
-qregexp.o: $(SOURCE_PATH)/src/corelib/text/qregexp.cpp
- $(CXX) -c -o $@ $(CXXFLAGS) $<
-
qbitarray.o: $(SOURCE_PATH)/src/corelib/tools/qbitarray.cpp
$(CXX) -c -o $@ $(CXXFLAGS) $<
diff --git a/qmake/Makefile.win32 b/qmake/Makefile.win32
index 6478c43edd..05ea1fb338 100644
--- a/qmake/Makefile.win32
+++ b/qmake/Makefile.win32
@@ -102,7 +102,6 @@ QTOBJS= \
qmap.obj \
qoperatingsystemversion.obj \
qoperatingsystemversion_win.obj \
- qregexp.obj \
qromancalendar.obj \
qstring.obj \
qstringconverter.obj \
diff --git a/qmake/doc/src/qmake-manual.qdoc b/qmake/doc/src/qmake-manual.qdoc
index 9b37756b79..085cd5c461 100644
--- a/qmake/doc/src/qmake-manual.qdoc
+++ b/qmake/doc/src/qmake-manual.qdoc
@@ -3535,7 +3535,8 @@
\section2 re_escape(string)
Returns the \c string with every special regular expression character
- escaped with a backslash. This function is a wrapper around QRegExp::escape.
+ escaped with a backslash. This function is a wrapper around
+ QRegularExpression::escape.
\section2 read_registry(tree, key[, flag])
diff --git a/qmake/generators/makefile.cpp b/qmake/generators/makefile.cpp
index dfe9a1c0b9..d38b8ad9f4 100644
--- a/qmake/generators/makefile.cpp
+++ b/qmake/generators/makefile.cpp
@@ -41,7 +41,6 @@
#include <qdebug.h>
#include <qbuffer.h>
#include <qdatetime.h>
-#include <qregexp.h>
#if defined(Q_OS_UNIX)
#include <unistd.h>
diff --git a/qmake/main.cpp b/qmake/main.cpp
index 3cd79145e5..36ba631127 100644
--- a/qmake/main.cpp
+++ b/qmake/main.cpp
@@ -110,7 +110,7 @@ static int doSed(int argc, char **argv)
&& (c == QLatin1Char('+') || c == QLatin1Char('?') || c == QLatin1Char('|')
|| c == QLatin1Char('{') || c == QLatin1Char('}')
|| c == QLatin1Char('(') || c == QLatin1Char(')'))) {
- // translate sed rx to QRegExp
+ // translate sed rx to QRegularExpression
escaped ^= 1;
}
if (escaped) {
diff --git a/qmake/qmake.pro b/qmake/qmake.pro
index ee190a820a..98eb3d1f6f 100644
--- a/qmake/qmake.pro
+++ b/qmake/qmake.pro
@@ -152,7 +152,6 @@ SOURCES += \
qmap.cpp \
qmetatype.cpp \
qnumeric.cpp \
- qregexp.cpp \
qregularexpression.cpp \
qromancalendar.cpp \
qsettings.cpp \
@@ -209,7 +208,6 @@ HEADERS += \
qmap.h \
qmetatype.h \
qnumeric.h \
- qregexp.h \
qregularexpression.h \
qromancalendar_p.h \
qstring.h \
diff --git a/qmake/qmake_pch.h b/qmake/qmake_pch.h
index c97c872311..fd8c78d7b6 100644
--- a/qmake/qmake_pch.h
+++ b/qmake/qmake_pch.h
@@ -52,7 +52,6 @@
#include <qhash.h>
#include <time.h>
#include <stdlib.h>
-#include <qregexp.h>
//#include <qdir.h>
//#include "option.h"
diff --git a/src/corelib/.prev_CMakeLists.txt b/src/corelib/.prev_CMakeLists.txt
index f93c934c85..23b44f739d 100644
--- a/src/corelib/.prev_CMakeLists.txt
+++ b/src/corelib/.prev_CMakeLists.txt
@@ -145,7 +145,6 @@ qt_add_module(Core
text/qlocale.cpp text/qlocale.h text/qlocale_p.h
text/qlocale_data_p.h
text/qlocale_tools.cpp text/qlocale_tools_p.h
- text/qregexp.cpp text/qregexp.h
text/qstring.cpp text/qstring.h
text/qstring_compat.cpp
text/qstringalgorithms.h text/qstringalgorithms_p.h
diff --git a/src/corelib/CMakeLists.txt b/src/corelib/CMakeLists.txt
index 9deda274b7..84d1a79b9b 100644
--- a/src/corelib/CMakeLists.txt
+++ b/src/corelib/CMakeLists.txt
@@ -164,7 +164,6 @@ qt_add_module(Core
text/qlocale.cpp text/qlocale.h text/qlocale_p.h
text/qlocale_data_p.h
text/qlocale_tools.cpp text/qlocale_tools_p.h
- text/qregexp.cpp text/qregexp.h
text/qstring.cpp text/qstring.h
text/qstring_compat.cpp
text/qstringalgorithms.h text/qstringalgorithms_p.h
diff --git a/src/corelib/doc/snippets/code/src_corelib_text_qregexp.cpp b/src/corelib/doc/snippets/code/src_corelib_text_qregexp.cpp
deleted file mode 100644
index 8339ea413e..0000000000
--- a/src/corelib/doc/snippets/code/src_corelib_text_qregexp.cpp
+++ /dev/null
@@ -1,243 +0,0 @@
-/****************************************************************************
-**
-** Copyright (C) 2016 The Qt Company Ltd.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the documentation of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:BSD$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** BSD License Usage
-** Alternatively, you may use this file under the terms of the BSD license
-** as follows:
-**
-** "Redistribution and use in source and binary forms, with or without
-** modification, are permitted provided that the following conditions are
-** met:
-** * Redistributions of source code must retain the above copyright
-** notice, this list of conditions and the following disclaimer.
-** * Redistributions in binary form must reproduce the above copyright
-** notice, this list of conditions and the following disclaimer in
-** the documentation and/or other materials provided with the
-** distribution.
-** * Neither the name of The Qt Company Ltd nor the names of its
-** contributors may be used to endorse or promote products derived
-** from this software without specific prior written permission.
-**
-**
-** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-** "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-** LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-** A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-** OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-** SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-** LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-** OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
-
-//! [0]
-QRegExp rx("(\\d+)");
-QString str = "Offsets: 12 14 99 231 7";
-QStringList list;
-int pos = 0;
-
-while ((pos = rx.indexIn(str, pos)) != -1) {
- list << rx.cap(1);
- pos += rx.matchedLength();
-}
-// list: ["12", "14", "99", "231", "7"]
-//! [0]
-
-
-//! [1]
-QRegExp rx("*.txt");
-rx.setPatternSyntax(QRegExp::Wildcard);
-rx.exactMatch("README.txt"); // returns true
-rx.exactMatch("welcome.txt.bak"); // returns false
-//! [1]
-
-
-//! [2]
-QRegExp rx("ro+m");
-rx.setMinimal(true);
-//! [2]
-
-
-//! [3]
-QRegExp mark("\\b" // word boundary
- "[Mm]ark" // the word we want to match
- );
-//! [3]
-
-
-//! [4]
-QRegExp rx("^\\d\\d?$"); // match integers 0 to 99
-rx.indexIn("123"); // returns -1 (no match)
-rx.indexIn("-6"); // returns -1 (no match)
-rx.indexIn("6"); // returns 0 (matched at position 0)
-//! [4]
-
-
-//! [5]
-QRegExp rx("^\\S+$"); // match strings without whitespace
-rx.indexIn("Hello world"); // returns -1 (no match)
-rx.indexIn("This_is-OK"); // returns 0 (matched at position 0)
-//! [5]
-
-
-//! [6]
-QRegExp rx("\\b(mail|letter|correspondence)\\b");
-rx.indexIn("I sent you an email"); // returns -1 (no match)
-rx.indexIn("Please write the letter"); // returns 17
-//! [6]
-
-
-//! [7]
-QString captured = rx.cap(1); // captured == "letter"
-//! [7]
-
-
-//! [8]
-QRegExp rx("&(?!amp;)"); // match ampersands but not &amp;
-QString line1 = "This & that";
-line1.replace(rx, "&amp;");
-// line1 == "This &amp; that"
-QString line2 = "His &amp; hers & theirs";
-line2.replace(rx, "&amp;");
-// line2 == "His &amp; hers &amp; theirs"
-//! [8]
-
-
-//! [9]
-QString str = "One Eric another Eirik, and an Ericsson. "
- "How many Eiriks, Eric?";
-QRegExp rx("\\b(Eric|Eirik)\\b"); // match Eric or Eirik
-int pos = 0; // where we are in the string
-int count = 0; // how many Eric and Eirik's we've counted
-while (pos >= 0) {
- pos = rx.indexIn(str, pos);
- if (pos >= 0) {
- ++pos; // move along in str
- ++count; // count our Eric or Eirik
- }
-}
-//! [9]
-
-
-//! [10]
-str = "The Qt Company Ltd\tqt.io\tFinland";
-QString company, web, country;
-rx.setPattern("^([^\t]+)\t([^\t]+)\t([^\t]+)$");
-if (rx.indexIn(str) != -1) {
- company = rx.cap(1);
- web = rx.cap(2);
- country = rx.cap(3);
-}
-//! [10]
-
-
-//! [11]
-QStringList field = str.split("\t");
-//! [11]
-
-
-//! [12]
-QRegExp rx("*.html");
-rx.setPatternSyntax(QRegExp::Wildcard);
-rx.exactMatch("index.html"); // returns true
-rx.exactMatch("default.htm"); // returns false
-rx.exactMatch("readme.txt"); // returns false
-//! [12]
-
-
-//! [13]
-QString str = "offsets: 1.23 .50 71.00 6.00";
-QRegExp rx("\\d*\\.\\d+"); // primitive floating point matching
-int count = 0;
-int pos = 0;
-while ((pos = rx.indexIn(str, pos)) != -1) {
- ++count;
- pos += rx.matchedLength();
-}
-// pos will be 9, 14, 18 and finally 24; count will end up as 4
-//! [13]
-
-
-//! [14]
-QRegExp rx("(\\d+)(\\s*)(cm|inch(es)?)");
-int pos = rx.indexIn("Length: 36 inches");
-QStringList list = rx.capturedTexts();
-// list is now ("36 inches", "36", " ", "inches", "es")
-//! [14]
-
-
-//! [15]
-QRegExp rx("(\\d+)(?:\\s*)(cm|inch(?:es)?)");
-int pos = rx.indexIn("Length: 36 inches");
-QStringList list = rx.capturedTexts();
-// list is now ("36 inches", "36", "inches")
-//! [15]
-
-
-//! [16]
-QStringList list = rx.capturedTexts();
-QStringList::iterator it = list.begin();
-while (it != list.end()) {
- myProcessing(*it);
- ++it;
-}
-//! [16]
-
-
-//! [17]
-QRegExp rxlen("(\\d+)(?:\\s*)(cm|inch)");
-int pos = rxlen.indexIn("Length: 189cm");
-if (pos > -1) {
- QString value = rxlen.cap(1); // "189"
- QString unit = rxlen.cap(2); // "cm"
- // ...
-}
-//! [17]
-
-
-//! [18]
-QRegExp rx("/([a-z]+)/([a-z]+)");
-rx.indexIn("Output /dev/null"); // returns 7 (position of /dev/null)
-rx.pos(0); // returns 7 (position of /dev/null)
-rx.pos(1); // returns 8 (position of dev)
-rx.pos(2); // returns 12 (position of null)
-//! [18]
-
-
-//! [19]
-s1 = QRegExp::escape("bingo"); // s1 == "bingo"
-s2 = QRegExp::escape("f(x)"); // s2 == "f\\(x\\)"
-//! [19]
-
-
-//! [20]
-QRegExp rx("(" + QRegExp::escape(name) +
- "|" + QRegExp::escape(alias) + ")");
-//! [20]
-
-{
-//! [21]
-QString p("a .*|pattern");
-
-// re matches exactly the pattern string p
-QRegularExpression re(QRegularExpression::anchoredPattern(p));
-//! [21]
-}
diff --git a/src/corelib/text/qregexp.cpp b/src/corelib/text/qregexp.cpp
deleted file mode 100644
index d7a2434b52..0000000000
--- a/src/corelib/text/qregexp.cpp
+++ /dev/null
@@ -1,5039 +0,0 @@
-/****************************************************************************
-**
-** Copyright (C) 2016 The Qt Company Ltd.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the QtCore module of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:LGPL$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU Lesser General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU Lesser
-** General Public License version 3 as published by the Free Software
-** Foundation and appearing in the file LICENSE.LGPL3 included in the
-** packaging of this file. Please review the following information to
-** ensure the GNU Lesser General Public License version 3 requirements
-** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 2.0 or (at your option) the GNU General
-** Public license version 3 or any later version approved by the KDE Free
-** Qt Foundation. The licenses are as published by the Free Software
-** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-2.0.html and
-** https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
-
-#include "qregexp.h"
-
-#include "qalgorithms.h"
-#include "qbitarray.h"
-#include "qcache.h"
-#include "qdatastream.h"
-#include "qdebug.h"
-#include "qhashfunctions.h"
-#include "qlist.h"
-#include "qmap.h"
-#include "qmutex.h"
-#include "qstring.h"
-#include "qstringlist.h"
-#include "qstringmatcher.h"
-#include "private/qlocking_p.h"
-
-#include <limits.h>
-#include <algorithm>
-
-QT_BEGIN_NAMESPACE
-
-// error strings for the regexp parser
-#define RXERR_OK QT_TRANSLATE_NOOP("QRegExp", "no error occurred")
-#define RXERR_DISABLED QT_TRANSLATE_NOOP("QRegExp", "disabled feature used")
-#define RXERR_CHARCLASS QT_TRANSLATE_NOOP("QRegExp", "bad char class syntax")
-#define RXERR_LOOKAHEAD QT_TRANSLATE_NOOP("QRegExp", "bad lookahead syntax")
-#define RXERR_LOOKBEHIND QT_TRANSLATE_NOOP("QRegExp", "lookbehinds not supported, see QTBUG-2371")
-#define RXERR_REPETITION QT_TRANSLATE_NOOP("QRegExp", "bad repetition syntax")
-#define RXERR_OCTAL QT_TRANSLATE_NOOP("QRegExp", "invalid octal value")
-#define RXERR_LEFTDELIM QT_TRANSLATE_NOOP("QRegExp", "missing left delim")
-#define RXERR_END QT_TRANSLATE_NOOP("QRegExp", "unexpected end")
-#define RXERR_LIMIT QT_TRANSLATE_NOOP("QRegExp", "met internal limit")
-#define RXERR_INTERVAL QT_TRANSLATE_NOOP("QRegExp", "invalid interval")
-#define RXERR_CATEGORY QT_TRANSLATE_NOOP("QRegExp", "invalid category")
-
-/*!
- \class QRegExp
- \inmodule QtCore
- \obsolete Use QRegularExpression instead
- \reentrant
- \brief The QRegExp class provides pattern matching using regular expressions.
-
- \ingroup tools
- \ingroup shared
-
- \keyword regular expression
-
- This class is deprecated in Qt 6. Please use QRegularExpression instead
- for all new code. For guidelines on porting old code from QRegExp to
- QRegularExpression, see {Porting to QRegularExpression}
-
- A regular expression, or "regexp", is a pattern for matching
- substrings in a text. This is useful in many contexts, e.g.,
-
- \table
- \row \li Validation
- \li A regexp can test whether a substring meets some criteria,
- e.g. is an integer or contains no whitespace.
- \row \li Searching
- \li A regexp provides more powerful pattern matching than
- simple substring matching, e.g., match one of the words
- \e{mail}, \e{letter} or \e{correspondence}, but none of the
- words \e{email}, \e{mailman}, \e{mailer}, \e{letterbox}, etc.
- \row \li Search and Replace
- \li A regexp can replace all occurrences of a substring with a
- different substring, e.g., replace all occurrences of \e{&}
- with \e{\&amp;} except where the \e{&} is already followed by
- an \e{amp;}.
- \row \li String Splitting
- \li A regexp can be used to identify where a string should be
- split apart, e.g. splitting tab-delimited strings.
- \endtable
-
- A brief introduction to regexps is presented, a description of
- Qt's regexp language, some examples, and the function
- documentation itself. QRegExp is modeled on Perl's regexp
- language. It fully supports Unicode. QRegExp can also be used in a
- simpler, \e{wildcard mode} that is similar to the functionality
- found in command shells. The syntax rules used by QRegExp can be
- changed with setPatternSyntax(). In particular, the pattern syntax
- can be set to QRegExp::FixedString, which means the pattern to be
- matched is interpreted as a plain string, i.e., special characters
- (e.g., backslash) are not escaped.
-
- A good text on regexps is \e {Mastering Regular Expressions}
- (Third Edition) by Jeffrey E. F. Friedl, ISBN 0-596-52812-4.
-
- \note In Qt 5, the new QRegularExpression class provides a Perl
- compatible implementation of regular expressions and is recommended
- in place of QRegExp.
-
- \tableofcontents
-
- \section1 Introduction
-
- Regexps are built up from expressions, quantifiers, and
- assertions. The simplest expression is a character, e.g. \b{x}
- or \b{5}. An expression can also be a set of characters
- enclosed in square brackets. \b{[ABCD]} will match an \b{A}
- or a \b{B} or a \b{C} or a \b{D}. We can write this same
- expression as \b{[A-D]}, and an expression to match any
- capital letter in the English alphabet is written as
- \b{[A-Z]}.
-
- A quantifier specifies the number of occurrences of an expression
- that must be matched. \b{x{1,1}} means match one and only one
- \b{x}. \b{x{1,5}} means match a sequence of \b{x}
- characters that contains at least one \b{x} but no more than
- five.
-
- Note that in general regexps cannot be used to check for balanced
- brackets or tags. For example, a regexp can be written to match an
- opening html \c{<b>} and its closing \c{</b>}, if the \c{<b>} tags
- are not nested, but if the \c{<b>} tags are nested, that same
- regexp will match an opening \c{<b>} tag with the wrong closing
- \c{</b>}. For the fragment \c{<b>bold <b>bolder</b></b>}, the
- first \c{<b>} would be matched with the first \c{</b>}, which is
- not correct. However, it is possible to write a regexp that will
- match nested brackets or tags correctly, but only if the number of
- nesting levels is fixed and known. If the number of nesting levels
- is not fixed and known, it is impossible to write a regexp that
- will not fail.
-
- Suppose we want a regexp to match integers in the range 0 to 99.
- At least one digit is required, so we start with the expression
- \b{[0-9]{1,1}}, which matches a single digit exactly once. This
- regexp matches integers in the range 0 to 9. To match integers up
- to 99, increase the maximum number of occurrences to 2, so the
- regexp becomes \b{[0-9]{1,2}}. This regexp satisfies the
- original requirement to match integers from 0 to 99, but it will
- also match integers that occur in the middle of strings. If we
- want the matched integer to be the whole string, we must use the
- anchor assertions, \b{^} (caret) and \b{$} (dollar). When
- \b{^} is the first character in a regexp, it means the regexp
- must match from the beginning of the string. When \b{$} is the
- last character of the regexp, it means the regexp must match to
- the end of the string. The regexp becomes \b{^[0-9]{1,2}$}.
- Note that assertions, e.g. \b{^} and \b{$}, do not match
- characters but locations in the string.
-
- If you have seen regexps described elsewhere, they may have looked
- different from the ones shown here. This is because some sets of
- characters and some quantifiers are so common that they have been
- given special symbols to represent them. \b{[0-9]} can be
- replaced with the symbol \b{\\d}. The quantifier to match
- exactly one occurrence, \b{{1,1}}, can be replaced with the
- expression itself, i.e. \b{x{1,1}} is the same as \b{x}. So
- our 0 to 99 matcher could be written as \b{^\\d{1,2}$}. It can
- also be written \b{^\\d\\d{0,1}$}, i.e. \e{From the start of
- the string, match a digit, followed immediately by 0 or 1 digits}.
- In practice, it would be written as \b{^\\d\\d?$}. The \b{?}
- is shorthand for the quantifier \b{{0,1}}, i.e. 0 or 1
- occurrences. \b{?} makes an expression optional. The regexp
- \b{^\\d\\d?$} means \e{From the beginning of the string, match
- one digit, followed immediately by 0 or 1 more digit, followed
- immediately by end of string}.
-
- To write a regexp that matches one of the words 'mail' \e or
- 'letter' \e or 'correspondence' but does not match words that
- contain these words, e.g., 'email', 'mailman', 'mailer', and
- 'letterbox', start with a regexp that matches 'mail'. Expressed
- fully, the regexp is \b{m{1,1}a{1,1}i{1,1}l{1,1}}, but because
- a character expression is automatically quantified by
- \b{{1,1}}, we can simplify the regexp to \b{mail}, i.e., an
- 'm' followed by an 'a' followed by an 'i' followed by an 'l'. Now
- we can use the vertical bar \b{|}, which means \b{or}, to
- include the other two words, so our regexp for matching any of the
- three words becomes \b{mail|letter|correspondence}. Match
- 'mail' \b{or} 'letter' \b{or} 'correspondence'. While this
- regexp will match one of the three words we want to match, it will
- also match words we don't want to match, e.g., 'email'. To
- prevent the regexp from matching unwanted words, we must tell it
- to begin and end the match at word boundaries. First we enclose
- our regexp in parentheses, \b{(mail|letter|correspondence)}.
- Parentheses group expressions together, and they identify a part
- of the regexp that we wish to \l{capturing text}{capture}.
- Enclosing the expression in parentheses allows us to use it as a
- component in more complex regexps. It also allows us to examine
- which of the three words was actually matched. To force the match
- to begin and end on word boundaries, we enclose the regexp in
- \b{\\b} \e{word boundary} assertions:
- \b{\\b(mail|letter|correspondence)\\b}. Now the regexp means:
- \e{Match a word boundary, followed by the regexp in parentheses,
- followed by a word boundary}. The \b{\\b} assertion matches a
- \e position in the regexp, not a \e character. A word boundary is
- any non-word character, e.g., a space, newline, or the beginning
- or ending of a string.
-
- If we want to replace ampersand characters with the HTML entity
- \b{\&amp;}, the regexp to match is simply \b{\&}. But this
- regexp will also match ampersands that have already been converted
- to HTML entities. We want to replace only ampersands that are not
- already followed by \b{amp;}. For this, we need the negative
- lookahead assertion, \b{(?!}__\b{)}. The regexp can then be
- written as \b{\&(?!amp;)}, i.e. \e{Match an ampersand that is}
- \b{not} \e{followed by} \b{amp;}.
-
- If we want to count all the occurrences of 'Eric' and 'Eirik' in a
- string, two valid solutions are \b{\\b(Eric|Eirik)\\b} and
- \b{\\bEi?ri[ck]\\b}. The word boundary assertion '\\b' is
- required to avoid matching words that contain either name,
- e.g. 'Ericsson'. Note that the second regexp matches more
- spellings than we want: 'Eric', 'Erik', 'Eiric' and 'Eirik'.
-
- Some of the examples discussed above are implemented in the
- \l{#code-examples}{code examples} section.
-
- \target characters-and-abbreviations-for-sets-of-characters
- \section1 Characters and Abbreviations for Sets of Characters
-
- \table
- \header \li Element \li Meaning
- \row \li \b{c}
- \li A character represents itself unless it has a special
- regexp meaning. e.g. \b{c} matches the character \e c.
- \row \li \b{\\c}
- \li A character that follows a backslash matches the character
- itself, except as specified below. e.g., To match a literal
- caret at the beginning of a string, write \b{\\^}.
- \row \li \b{\\a}
- \li Matches the ASCII bell (BEL, 0x07).
- \row \li \b{\\f}
- \li Matches the ASCII form feed (FF, 0x0C).
- \row \li \b{\\n}
- \li Matches the ASCII line feed (LF, 0x0A, Unix newline).
- \row \li \b{\\r}
- \li Matches the ASCII carriage return (CR, 0x0D).
- \row \li \b{\\t}
- \li Matches the ASCII horizontal tab (HT, 0x09).
- \row \li \b{\\v}
- \li Matches the ASCII vertical tab (VT, 0x0B).
- \row \li \b{\\x\e{hhhh}}
- \li Matches the Unicode character corresponding to the
- hexadecimal number \e{hhhh} (between 0x0000 and 0xFFFF).
- \row \li \b{\\0\e{ooo}} (i.e., \\zero \e{ooo})
- \li matches the ASCII/Latin1 character for the octal number
- \e{ooo} (between 0 and 0377).
- \row \li \b{. (dot)}
- \li Matches any character (including newline).
- \row \li \b{\\d}
- \li Matches a digit (QChar::isDigit()).
- \row \li \b{\\D}
- \li Matches a non-digit.
- \row \li \b{\\s}
- \li Matches a whitespace character (QChar::isSpace()).
- \row \li \b{\\S}
- \li Matches a non-whitespace character.
- \row \li \b{\\w}
- \li Matches a word character (QChar::isLetterOrNumber(), QChar::isMark(), or '_').
- \row \li \b{\\W}
- \li Matches a non-word character.
- \row \li \b{\\\e{n}}
- \li The \e{n}-th backreference, e.g. \\1, \\2, etc.
- \endtable
-
- \b{Note:} The C++ compiler transforms backslashes in strings.
- To include a \b{\\} in a regexp, enter it twice, i.e. \c{\\}.
- To match the backslash character itself, enter it four times, i.e.
- \c{\\\\}.
-
- \target sets-of-characters
- \section1 Sets of Characters
-
- Square brackets mean match any character contained in the square
- brackets. The character set abbreviations described above can
- appear in a character set in square brackets. Except for the
- character set abbreviations and the following two exceptions,
- characters do not have special meanings in square brackets.
-
- \table
- \row \li \b{^}
-
- \li The caret negates the character set if it occurs as the
- first character (i.e. immediately after the opening square
- bracket). \b{[abc]} matches 'a' or 'b' or 'c', but
- \b{[^abc]} matches anything \e but 'a' or 'b' or 'c'.
-
- \row \li \b{-}
-
- \li The dash indicates a range of characters. \b{[W-Z]}
- matches 'W' or 'X' or 'Y' or 'Z'.
-
- \endtable
-
- Using the predefined character set abbreviations is more portable
- than using character ranges across platforms and languages. For
- example, \b{[0-9]} matches a digit in Western alphabets but
- \b{\\d} matches a digit in \e any alphabet.
-
- Note: In other regexp documentation, sets of characters are often
- called "character classes".
-
- \target quantifiers
- \section1 Quantifiers
-
- By default, an expression is automatically quantified by
- \b{{1,1}}, i.e. it should occur exactly once. In the following
- list, \b{\e {E}} stands for expression. An expression is a
- character, or an abbreviation for a set of characters, or a set of
- characters in square brackets, or an expression in parentheses.
-
- \table
- \row \li \b{\e {E}?}
-
- \li Matches zero or one occurrences of \e E. This quantifier
- means \e{The previous expression is optional}, because it
- will match whether or not the expression is found. \b{\e
- {E}?} is the same as \b{\e {E}{0,1}}. e.g., \b{dents?}
- matches 'dent' or 'dents'.
-
- \row \li \b{\e {E}+}
-
- \li Matches one or more occurrences of \e E. \b{\e {E}+} is
- the same as \b{\e {E}{1,}}. e.g., \b{0+} matches '0',
- '00', '000', etc.
-
- \row \li \b{\e {E}*}
-
- \li Matches zero or more occurrences of \e E. It is the same
- as \b{\e {E}{0,}}. The \b{*} quantifier is often used
- in error where \b{+} should be used. For example, if
- \b{\\s*$} is used in an expression to match strings that
- end in whitespace, it will match every string because
- \b{\\s*$} means \e{Match zero or more whitespaces followed
- by end of string}. The correct regexp to match strings that
- have at least one trailing whitespace character is
- \b{\\s+$}.
-
- \row \li \b{\e {E}{n}}
-
- \li Matches exactly \e n occurrences of \e E. \b{\e {E}{n}}
- is the same as repeating \e E \e n times. For example,
- \b{x{5}} is the same as \b{xxxxx}. It is also the same
- as \b{\e {E}{n,n}}, e.g. \b{x{5,5}}.
-
- \row \li \b{\e {E}{n,}}
- \li Matches at least \e n occurrences of \e E.
-
- \row \li \b{\e {E}{,m}}
- \li Matches at most \e m occurrences of \e E. \b{\e {E}{,m}}
- is the same as \b{\e {E}{0,m}}.
-
- \row \li \b{\e {E}{n,m}}
- \li Matches at least \e n and at most \e m occurrences of \e E.
- \endtable
-
- To apply a quantifier to more than just the preceding character,
- use parentheses to group characters together in an expression. For
- example, \b{tag+} matches a 't' followed by an 'a' followed by
- at least one 'g', whereas \b{(tag)+} matches at least one
- occurrence of 'tag'.
-
- Note: Quantifiers are normally "greedy". They always match as much
- text as they can. For example, \b{0+} matches the first zero it
- finds and all the consecutive zeros after the first zero. Applied
- to '20005', it matches '2\underline{000}5'. Quantifiers can be made
- non-greedy, see setMinimal().
-
- \target capturing parentheses
- \target backreferences
- \section1 Capturing Text
-
- Parentheses allow us to group elements together so that we can
- quantify and capture them. For example if we have the expression
- \b{mail|letter|correspondence} that matches a string we know
- that \e one of the words matched but not which one. Using
- parentheses allows us to "capture" whatever is matched within
- their bounds, so if we used \b{(mail|letter|correspondence)}
- and matched this regexp against the string "I sent you some email"
- we can use the cap() or capturedTexts() functions to extract the
- matched characters, in this case 'mail'.
-
- We can use captured text within the regexp itself. To refer to the
- captured text we use \e backreferences which are indexed from 1,
- the same as for cap(). For example we could search for duplicate
- words in a string using \b{\\b(\\w+)\\W+\\1\\b} which means match a
- word boundary followed by one or more word characters followed by
- one or more non-word characters followed by the same text as the
- first parenthesized expression followed by a word boundary.
-
- If we want to use parentheses purely for grouping and not for
- capturing we can use the non-capturing syntax, e.g.
- \b{(?:green|blue)}. Non-capturing parentheses begin '(?:' and
- end ')'. In this example we match either 'green' or 'blue' but we
- do not capture the match so we only know whether or not we matched
- but not which color we actually found. Using non-capturing
- parentheses is more efficient than using capturing parentheses
- since the regexp engine has to do less book-keeping.
-
- Both capturing and non-capturing parentheses may be nested.
-
- \target greedy quantifiers
-
- For historical reasons, quantifiers (e.g. \b{*}) that apply to
- capturing parentheses are more "greedy" than other quantifiers.
- For example, \b{a*(a*)} will match "aaa" with cap(1) == "aaa".
- This behavior is different from what other regexp engines do
- (notably, Perl). To obtain a more intuitive capturing behavior,
- specify QRegExp::RegExp2 to the QRegExp constructor or call
- setPatternSyntax(QRegExp::RegExp2).
-
- \target cap_in_a_loop
-
- When the number of matches cannot be determined in advance, a
- common idiom is to use cap() in a loop. For example:
-
- \snippet code/src_corelib_text_qregexp.cpp 0
-
- \target assertions
- \section1 Assertions
-
- Assertions make some statement about the text at the point where
- they occur in the regexp but they do not match any characters. In
- the following list \b{\e {E}} stands for any expression.
-
- \table
- \row \li \b{^}
- \li The caret signifies the beginning of the string. If you
- wish to match a literal \c{^} you must escape it by
- writing \c{\\^}. For example, \b{^#include} will only
- match strings which \e begin with the characters '#include'.
- (When the caret is the first character of a character set it
- has a special meaning, see \l{#sets-of-characters}{Sets of Characters}.)
-
- \row \li \b{$}
- \li The dollar signifies the end of the string. For example
- \b{\\d\\s*$} will match strings which end with a digit
- optionally followed by whitespace. If you wish to match a
- literal \c{$} you must escape it by writing
- \c{\\$}.
-
- \row \li \b{\\b}
- \li A word boundary. For example the regexp
- \b{\\bOK\\b} means match immediately after a word
- boundary (e.g. start of string or whitespace) the letter 'O'
- then the letter 'K' immediately before another word boundary
- (e.g. end of string or whitespace). But note that the
- assertion does not actually match any whitespace so if we
- write \b{(\\bOK\\b)} and we have a match it will only
- contain 'OK' even if the string is "It's \underline{OK} now".
-
- \row \li \b{\\B}
- \li A non-word boundary. This assertion is true wherever
- \b{\\b} is false. For example if we searched for
- \b{\\Bon\\B} in "Left on" the match would fail (space
- and end of string aren't non-word boundaries), but it would
- match in "t\underline{on}ne".
-
- \row \li \b{(?=\e E)}
- \li Positive lookahead. This assertion is true if the
- expression matches at this point in the regexp. For example,
- \b{const(?=\\s+char)} matches 'const' whenever it is
- followed by 'char', as in 'static \underline{const} char *'.
- (Compare with \b{const\\s+char}, which matches 'static
- \underline{const char} *'.)
-
- \row \li \b{(?!\e E)}
- \li Negative lookahead. This assertion is true if the
- expression does not match at this point in the regexp. For
- example, \b{const(?!\\s+char)} matches 'const' \e except
- when it is followed by 'char'.
- \endtable
-
- \target QRegExp wildcard matching
- \section1 Wildcard Matching
-
- Most command shells such as \e bash or \e cmd.exe support "file
- globbing", the ability to identify a group of files by using
- wildcards. The setPatternSyntax() function is used to switch
- between regexp and wildcard mode. Wildcard matching is much
- simpler than full regexps and has only four features:
-
- \table
- \row \li \b{c}
- \li Any character represents itself apart from those mentioned
- below. Thus \b{c} matches the character \e c.
- \row \li \b{?}
- \li Matches any single character. It is the same as
- \b{.} in full regexps.
- \row \li \b{*}
- \li Matches zero or more of any characters. It is the
- same as \b{.*} in full regexps.
- \row \li \b{[...]}
- \li Sets of characters can be represented in square brackets,
- similar to full regexps. Within the character class, like
- outside, backslash has no special meaning.
- \endtable
-
- In the mode Wildcard, the wildcard characters cannot be
- escaped. In the mode WildcardUnix, the character '\\' escapes the
- wildcard.
-
- For example if we are in wildcard mode and have strings which
- contain filenames we could identify HTML files with \b{*.html}.
- This will match zero or more characters followed by a dot followed
- by 'h', 't', 'm' and 'l'.
-
- To test a string against a wildcard expression, use exactMatch().
- For example:
-
- \snippet code/src_corelib_text_qregexp.cpp 1
-
- \target perl-users
- \section1 Notes for Perl Users
-
- Most of the character class abbreviations supported by Perl are
- supported by QRegExp, see \l{#characters-and-abbreviations-for-sets-of-characters}
- {characters and abbreviations for sets of characters}.
-
- In QRegExp, apart from within character classes, \c{^} always
- signifies the start of the string, so carets must always be
- escaped unless used for that purpose. In Perl the meaning of caret
- varies automagically depending on where it occurs so escaping it
- is rarely necessary. The same applies to \c{$} which in
- QRegExp always signifies the end of the string.
-
- QRegExp's quantifiers are the same as Perl's greedy quantifiers
- (but see the \l{greedy quantifiers}{note above}). Non-greedy
- matching cannot be applied to individual quantifiers, but can be
- applied to all the quantifiers in the pattern. For example, to
- match the Perl regexp \b{ro+?m} requires:
-
- \snippet code/src_corelib_text_qregexp.cpp 2
-
- The equivalent of Perl's \c{/i} option is
- setCaseSensitivity(Qt::CaseInsensitive).
-
- Perl's \c{/g} option can be emulated using a \l{#cap_in_a_loop}{loop}.
-
- In QRegExp \b{.} matches any character, therefore all QRegExp
- regexps have the equivalent of Perl's \c{/s} option. QRegExp
- does not have an equivalent to Perl's \c{/m} option, but this
- can be emulated in various ways for example by splitting the input
- into lines or by looping with a regexp that searches for newlines.
-
- Because QRegExp is string oriented, there are no \\A, \\Z, or \\z
- assertions. The \\G assertion is not supported but can be emulated
- in a loop.
-
- Perl's $& is cap(0) or capturedTexts()[0]. There are no QRegExp
- equivalents for $`, $' or $+. Perl's capturing variables, $1, $2,
- ... correspond to cap(1) or capturedTexts()[1], cap(2) or
- capturedTexts()[2], etc.
-
- To substitute a pattern use QString::replace().
-
- Perl's extended \c{/x} syntax is not supported, nor are
- directives, e.g. (?i), or regexp comments, e.g. (?#comment). On
- the other hand, C++'s rules for literal strings can be used to
- achieve the same:
-
- \snippet code/src_corelib_text_qregexp.cpp 3
-
- Both zero-width positive and zero-width negative lookahead
- assertions (?=pattern) and (?!pattern) are supported with the same
- syntax as Perl. Perl's lookbehind assertions, "independent"
- subexpressions and conditional expressions are not supported.
-
- Non-capturing parentheses are also supported, with the same
- (?:pattern) syntax.
-
- See QString::split() and QStringList::join() for equivalents
- to Perl's split and join functions.
-
- Note: because C++ transforms \\'s they must be written \e twice in
- code, e.g. \b{\\b} must be written \b{\\\\b}.
-
- \target code-examples
- \section1 Code Examples
-
- \snippet code/src_corelib_text_qregexp.cpp 4
-
- The third string matches '\underline{6}'. This is a simple validation
- regexp for integers in the range 0 to 99.
-
- \snippet code/src_corelib_text_qregexp.cpp 5
-
- The second string matches '\underline{This_is-OK}'. We've used the
- character set abbreviation '\\S' (non-whitespace) and the anchors
- to match strings which contain no whitespace.
-
- In the following example we match strings containing 'mail' or
- 'letter' or 'correspondence' but only match whole words i.e. not
- 'email'
-
- \snippet code/src_corelib_text_qregexp.cpp 6
-
- The second string matches "Please write the \underline{letter}". The
- word 'letter' is also captured (because of the parentheses). We
- can see what text we've captured like this:
-
- \snippet code/src_corelib_text_qregexp.cpp 7
-
- This will capture the text from the first set of capturing
- parentheses (counting capturing left parentheses from left to
- right). The parentheses are counted from 1 since cap(0) is the
- whole matched regexp (equivalent to '&' in most regexp engines).
-
- \snippet code/src_corelib_text_qregexp.cpp 8
-
- Here we've passed the QRegExp to QString's replace() function to
- replace the matched text with new text.
-
- \snippet code/src_corelib_text_qregexp.cpp 9
-
- We've used the indexIn() function to repeatedly match the regexp in
- the string. Note that instead of moving forward by one character
- at a time \c pos++ we could have written \c {pos +=
- rx.matchedLength()} to skip over the already matched string. The
- count will equal 3, matching 'One \underline{Eric} another
- \underline{Eirik}, and an Ericsson. How many Eiriks, \underline{Eric}?'; it
- doesn't match 'Ericsson' or 'Eiriks' because they are not bounded
- by non-word boundaries.
-
- One common use of regexps is to split lines of delimited data into
- their component fields.
-
- \snippet code/src_corelib_text_qregexp.cpp 10
-
- In this example our input lines have the format company name, web
- address and country. Unfortunately the regexp is rather long and
- not very versatile -- the code will break if we add any more
- fields. A simpler and better solution is to look for the
- separator, '\\t' in this case, and take the surrounding text. The
- QString::split() function can take a separator string or regexp
- as an argument and split a string accordingly.
-
- \snippet code/src_corelib_text_qregexp.cpp 11
-
- Here field[0] is the company, field[1] the web address and so on.
-
- To imitate the matching of a shell we can use wildcard mode.
-
- \snippet code/src_corelib_text_qregexp.cpp 12
-
- Wildcard matching can be convenient because of its simplicity, but
- any wildcard regexp can be defined using full regexps, e.g.
- \b{.*\\.html$}. Notice that we can't match both \c .html and \c
- .htm files with a wildcard unless we use \b{*.htm*} which will
- also match 'test.html.bak'. A full regexp gives us the precision
- we need, \b{.*\\.html?$}.
-
- QRegExp can match case insensitively using setCaseSensitivity(),
- and can use non-greedy matching, see setMinimal(). By
- default QRegExp uses full regexps but this can be changed with
- setPatternSyntax(). Searching can be done forward with indexIn() or backward
- with lastIndexIn(). Captured text can be accessed using
- capturedTexts() which returns a string list of all captured
- strings, or using cap() which returns the captured string for the
- given index. The pos() function takes a match index and returns
- the position in the string where the match was made (or -1 if
- there was no match).
-
- \sa QString, QStringList, QSortFilterProxyModel,
- {tools/regexp}{Regular Expression Example}
-
-
- \section1 Porting to QRegularExpression
-
- The QRegularExpression class introduced in Qt 5 is a big improvement upon
- QRegExp, in terms of APIs offered, supported pattern syntax and speed of
- execution. The biggest difference is that QRegularExpression simply holds a
- regular expression, and it's \e{not} modified when a match is requested.
- Instead, a QRegularExpressionMatch object is returned, in order to check
- the result of a match and extract the captured substring. The same applies
- with global matching and QRegularExpressionMatchIterator.
-
- Other differences are outlined below.
-
- \section2 Different pattern syntax
-
- Porting a regular expression from QRegExp to QRegularExpression may require
- changes to the pattern itself.
-
- In certain scenarios, QRegExp was too lenient and accepted patterns that
- are simply invalid when using QRegularExpression. These are somehow easy
- to detect, because the QRegularExpression objects built with these patterns
- are not valid (cf. QRegularExpression::isValid()).
-
- In other cases, a pattern ported from QRegExp to QRegularExpression may
- silently change semantics. Therefore, it is necessary to review the
- patterns used. The most notable cases of silent incompatibility are:
-
- \list
-
- \li Curly braces are needed in order to use a hexadecimal escape like
- \c{\xHHHH} with more than 2 digits. A pattern like \c{\x2022} neeeds to
- be ported to \c{\x{2022}}, or it will match a space (\c{0x20}) followed
- by the string \c{"22"}. In general, it is highly recommended to always use
- curly braces with the \c{\x} escape, no matter the amount of digits
- specified.
-
- \li A 0-to-n quantification like \c{{,n}} needs to be ported to \c{{0,n}} to
- preserve semantics. Otherwise, a pattern such as \c{\d{,3}} would
- actually match a digit followed by the exact string \c{"{,3}"}.
-
- \li QRegExp by default does Unicode-aware matching, while
- QRegularExpression requires a separate option; see below for more details.
-
- \li c{.} in QRegExp does by default match all characters, including the
- newline character. QRegularExpression excludes the newline character by
- default. To include the newline character, set the
- QRegularExpression::DotMatchesEverythingOption pattern option.
-
- \endlist
-
- \section2 Porting from QRegExp::exactMatch()
-
- QRegExp::exactMatch() in Qt 4 served two purposes: it exactly matched
- a regular expression against a subject string, and it implemented partial
- matching.
-
- \section3 Porting from QRegExp's Exact Matching
-
- Exact matching indicates whether the regular expression matches the entire
- subject string. For example, the classes yield on the subject string \c{"abc123"}:
-
- \table
- \header \li \li QRegExp::exactMatch() \li QRegularExpressionMatch::hasMatch()
- \row \li \c{"\\d+"} \li \b false \li \b true
- \row \li \c{"[a-z]+\\d+"} \li \b true \li \b true
- \endtable
-
- Exact matching is not reflected in QRegularExpression. If you want
- to be sure that the subject string matches the regular expression
- exactly, you can wrap the pattern using the QRegularExpression::anchoredPattern()
- function:
-
- \snippet code/src_corelib_text_qregexp.cpp 21
-
- \section3 Porting from QRegExp's Partial Matching
-
- When using QRegExp::exactMatch(), if an exact match was not found, one
- could still find out how much of the subject string was matched by the
- regular expression by calling QRegExp::matchedLength(). If the returned length
- was equal to the subject string's length, then one could conclude that a partial
- match was found.
-
- QRegularExpression supports partial matching explicitly by means of the
- appropriate MatchType.
-
- \section2 Global matching
-
- Due to limitations of the QRegExp API it was impossible to implement global
- matching correctly (that is, like Perl does). In particular, patterns that
- can match 0 characters (like \c{"a*"}) are problematic.
-
- QRegularExpression::globalMatch() implements Perl global match correctly, and
- the returned iterator can be used to examine each result.
-
- \section2 Unicode properties support
-
- When using QRegExp, character classes such as \c{\w}, \c{\d}, etc. match
- characters with the corresponding Unicode property: for instance, \c{\d}
- matches any character with the Unicode Nd (decimal digit) property.
-
- Those character classes only match ASCII characters by default when using
- QRegularExpression: for instance, \c{\d} matches exactly a character in the
- \c{0-9} ASCII range. It is possible to change this behavior by using the
- UseUnicodePropertiesOption pattern option.
-
- \section2 Wildcard matching
-
- There is no direct way to do wildcard matching in QRegularExpression.
- However, the wildcardToRegularExpression method is provided to translate
- glob patterns into a Perl-compatible regular expression that can be used
- for that purpose.
-
- \section2 Other pattern syntaxes
-
- QRegularExpression supports only Perl-compatible regular expressions.
-
- \section2 Minimal matching
-
- QRegExp::setMinimal() implemented minimal matching by simply reversing the
- greediness of the quantifiers (QRegExp did not support lazy quantifiers,
- like \c{*?}, \c{+?}, etc.). QRegularExpression instead does support greedy,
- lazy and possessive quantifiers. The InvertedGreedinessOption
- pattern option can be useful to emulate the effects of QRegExp::setMinimal():
- if enabled, it inverts the greediness of quantifiers (greedy ones become
- lazy and vice versa).
-
- \section2 Caret modes
-
- The AnchorAtOffsetMatchOption match option can be used to emulate the
- QRegExp::CaretAtOffset behavior. There is no equivalent for the other
- QRegExp::CaretMode modes.
-*/
-
-#if defined(Q_OS_VXWORKS) && defined(EOS)
-# undef EOS
-#endif
-
-const int NumBadChars = 64;
-#define BadChar(ch) ((ch).unicode() % NumBadChars)
-
-const int NoOccurrence = INT_MAX;
-const int EmptyCapture = INT_MAX;
-const int InftyLen = INT_MAX;
-const int InftyRep = 1025;
-const int EOS = -1;
-
-static bool isWord(QChar ch)
-{
- return ch.isLetterOrNumber() || ch.isMark() || ch == QLatin1Char('_');
-}
-
-/*
- Merges two vectors of ints and puts the result into the first
- one.
-*/
-static void mergeInto(QList<int> *a, const QList<int> &b)
-{
- int asize = a->size();
- int bsize = b.size();
- if (asize == 0) {
- *a = b;
-#ifndef QT_NO_REGEXP_OPTIM
- } else if (bsize == 1 && a->at(asize - 1) < b.at(0)) {
- a->resize(asize + 1);
- (*a)[asize] = b.at(0);
-#endif
- } else if (bsize >= 1) {
- int csize = asize + bsize;
- QList<int> c(csize);
- int i = 0, j = 0, k = 0;
- while (i < asize) {
- if (j < bsize) {
- if (a->at(i) == b.at(j)) {
- ++i;
- --csize;
- } else if (a->at(i) < b.at(j)) {
- c[k++] = a->at(i++);
- } else {
- c[k++] = b.at(j++);
- }
- } else {
- memcpy(c.data() + k, a->constData() + i, (asize - i) * sizeof(int));
- break;
- }
- }
- c.resize(csize);
- if (j < bsize)
- memcpy(c.data() + k, b.constData() + j, (bsize - j) * sizeof(int));
- *a = c;
- }
-}
-
-#ifndef QT_NO_REGEXP_WILDCARD
-/*
- Translates a wildcard pattern to an equivalent regular expression
- pattern (e.g., *.cpp to .*\.cpp).
-
- If enableEscaping is true, it is possible to escape the wildcard
- characters with \
-*/
-static QString wc2rx(const QString &wc_str, const bool enableEscaping)
-{
- const int wclen = wc_str.length();
- QString rx;
- int i = 0;
- bool isEscaping = false; // the previous character is '\'
- const QChar *wc = wc_str.unicode();
-
- while (i < wclen) {
- const QChar c = wc[i++];
- switch (c.unicode()) {
- case '\\':
- if (enableEscaping) {
- if (isEscaping) {
- rx += QLatin1String("\\\\");
- } // we insert the \\ later if necessary
- if (i == wclen) { // the end
- rx += QLatin1String("\\\\");
- }
- } else {
- rx += QLatin1String("\\\\");
- }
- isEscaping = true;
- break;
- case '*':
- if (isEscaping) {
- rx += QLatin1String("\\*");
- isEscaping = false;
- } else {
- rx += QLatin1String(".*");
- }
- break;
- case '?':
- if (isEscaping) {
- rx += QLatin1String("\\?");
- isEscaping = false;
- } else {
- rx += QLatin1Char('.');
- }
-
- break;
- case '$':
- case '(':
- case ')':
- case '+':
- case '.':
- case '^':
- case '{':
- case '|':
- case '}':
- if (isEscaping) {
- isEscaping = false;
- rx += QLatin1String("\\\\");
- }
- rx += QLatin1Char('\\');
- rx += c;
- break;
- case '[':
- if (isEscaping) {
- isEscaping = false;
- rx += QLatin1String("\\[");
- } else {
- rx += c;
- if (wc[i] == QLatin1Char('^'))
- rx += wc[i++];
- if (i < wclen) {
- if (wc[i] == QLatin1Char(']'))
- rx += wc[i++];
- while (i < wclen && wc[i] != QLatin1Char(']')) {
- if (wc[i] == QLatin1Char('\\'))
- rx += QLatin1Char('\\');
- rx += wc[i++];
- }
- }
- }
- break;
-
- case ']':
- if(isEscaping){
- isEscaping = false;
- rx += QLatin1String("\\");
- }
- rx += c;
- break;
-
- default:
- if(isEscaping){
- isEscaping = false;
- rx += QLatin1String("\\\\");
- }
- rx += c;
- }
- }
- return rx;
-}
-#endif
-
-static int caretIndex(int offset, QRegExp::CaretMode caretMode)
-{
- if (caretMode == QRegExp::CaretAtZero) {
- return 0;
- } else if (caretMode == QRegExp::CaretAtOffset) {
- return offset;
- } else { // QRegExp::CaretWontMatch
- return -1;
- }
-}
-
-/*
- The QRegExpEngineKey struct uniquely identifies an engine.
-*/
-struct QRegExpEngineKey
-{
- QString pattern;
- QRegExp::PatternSyntax patternSyntax;
- Qt::CaseSensitivity cs;
-
- inline QRegExpEngineKey(const QString &pattern, QRegExp::PatternSyntax patternSyntax,
- Qt::CaseSensitivity cs)
- : pattern(pattern), patternSyntax(patternSyntax), cs(cs) {}
-
- inline void clear() {
- pattern.clear();
- patternSyntax = QRegExp::RegExp;
- cs = Qt::CaseSensitive;
- }
-};
-
-static bool operator==(const QRegExpEngineKey &key1, const QRegExpEngineKey &key2)
-{
- return key1.pattern == key2.pattern && key1.patternSyntax == key2.patternSyntax
- && key1.cs == key2.cs;
-}
-
-static size_t qHash(const QRegExpEngineKey &key, size_t seed = 0) noexcept
-{
- return qHashMulti(seed, key.pattern, key.patternSyntax, key.cs);
-}
-
-class QRegExpEngine;
-
-/*
- This is the engine state during matching.
-*/
-struct QRegExpMatchState
-{
- const QChar *in; // a pointer to the input string data
- int pos; // the current position in the string
- int caretPos;
- int len; // the length of the input string
- bool minimal; // minimal matching?
- int *bigArray; // big array holding the data for the next pointers
- int *inNextStack; // is state is nextStack?
- int *curStack; // stack of current states
- int *nextStack; // stack of next states
- int *curCapBegin; // start of current states' captures
- int *nextCapBegin; // start of next states' captures
- int *curCapEnd; // end of current states' captures
- int *nextCapEnd; // end of next states' captures
- int *tempCapBegin; // start of temporary captures
- int *tempCapEnd; // end of temporary captures
- int *capBegin; // start of captures for a next state
- int *capEnd; // end of captures for a next state
- int *slideTab; // bump-along slide table for bad-character heuristic
- int *captured; // what match() returned last
- int slideTabSize; // size of slide table
- int capturedSize;
-#ifndef QT_NO_REGEXP_BACKREF
- QList<QList<int>> sleeping; // list of back-reference sleepers
-#endif
- int matchLen; // length of match
- int oneTestMatchedLen; // length of partial match
-
- const QRegExpEngine *eng;
-
- inline QRegExpMatchState() : bigArray(nullptr), captured(nullptr) {}
- inline ~QRegExpMatchState() { free(bigArray); }
-
- void drain() { free(bigArray); bigArray = nullptr; captured = nullptr; } // to save memory
- void prepareForMatch(QRegExpEngine *eng);
- void match(const QChar *str, int len, int pos, bool minimal,
- bool oneTest, int caretIndex);
- bool matchHere();
- bool testAnchor(int i, int a, const int *capBegin);
-};
-
-/*
- The struct QRegExpAutomatonState represents one state in a modified NFA. The
- input characters matched are stored in the state instead of on
- the transitions, something possible for an automaton
- constructed from a regular expression.
-*/
-struct QRegExpAutomatonState
-{
-#ifndef QT_NO_REGEXP_CAPTURE
- int atom; // which atom does this state belong to?
-#endif
- int match; // what does it match? (see CharClassBit and BackRefBit)
- QList<int> outs; // out-transitions
- QMap<int, int> reenter; // atoms reentered when transiting out
- QMap<int, int> anchors; // anchors met when transiting out
-
- inline QRegExpAutomatonState() { }
-#ifndef QT_NO_REGEXP_CAPTURE
- inline QRegExpAutomatonState(int a, int m)
- : atom(a), match(m) { }
-#else
- inline QRegExpAutomatonState(int m)
- : match(m) { }
-#endif
-};
-
-Q_DECLARE_TYPEINFO(QRegExpAutomatonState, Q_MOVABLE_TYPE);
-
-/*
- The struct QRegExpCharClassRange represents a range of characters (e.g.,
- [0-9] denotes range 48 to 57).
-*/
-struct QRegExpCharClassRange
-{
- ushort from; // 48
- ushort len; // 10
-};
-
-Q_DECLARE_TYPEINFO(QRegExpCharClassRange, Q_PRIMITIVE_TYPE);
-
-#ifndef QT_NO_REGEXP_CAPTURE
-/*
- The struct QRegExpAtom represents one node in the hierarchy of regular
- expression atoms.
-*/
-struct QRegExpAtom
-{
- enum { NoCapture = -1, OfficialCapture = -2, UnofficialCapture = -3 };
-
- int parent; // index of parent in array of atoms
- int capture; // index of capture, from 1 to ncap - 1
-};
-
-Q_DECLARE_TYPEINFO(QRegExpAtom, Q_PRIMITIVE_TYPE);
-#endif
-
-struct QRegExpLookahead;
-
-#ifndef QT_NO_REGEXP_ANCHOR_ALT
-/*
- The struct QRegExpAnchorAlternation represents a pair of anchors with
- OR semantics.
-*/
-struct QRegExpAnchorAlternation
-{
- int a; // this anchor...
- int b; // ...or this one
-};
-
-Q_DECLARE_TYPEINFO(QRegExpAnchorAlternation, Q_PRIMITIVE_TYPE);
-#endif
-
-#ifndef QT_NO_REGEXP_CCLASS
-
-#define FLAG(x) (1 << (x))
-/*
- The class QRegExpCharClass represents a set of characters, such as can
- be found in regular expressions (e.g., [a-z] denotes the set
- {a, b, ..., z}).
-*/
-class QRegExpCharClass
-{
-public:
- QRegExpCharClass();
-
- void clear();
- bool negative() const { return n; }
- void setNegative(bool negative);
- void addCategories(uint cats);
- void addRange(ushort from, ushort to);
- void addSingleton(ushort ch) { addRange(ch, ch); }
-
- bool in(QChar ch) const;
-#ifndef QT_NO_REGEXP_OPTIM
- const QList<int> &firstOccurrence() const { return occ1; }
-#endif
-
-#if defined(QT_DEBUG)
- void dump() const;
-#endif
-
-private:
- QList<QRegExpCharClassRange> r; // character ranges
-#ifndef QT_NO_REGEXP_OPTIM
- QList<int> occ1; // first-occurrence array
-#endif
- uint c; // character classes
- bool n; // negative?
-};
-#else
-struct QRegExpCharClass
-{
- int dummy;
-
-#ifndef QT_NO_REGEXP_OPTIM
- QRegExpCharClass() { occ1.fill(0, NumBadChars); }
-
- const QList<int> &firstOccurrence() const { return occ1; }
- QList<int> occ1;
-#endif
-};
-#endif
-
-Q_DECLARE_TYPEINFO(QRegExpCharClass, Q_MOVABLE_TYPE);
-
-/*
- The QRegExpEngine class encapsulates a modified nondeterministic
- finite automaton (NFA).
-*/
-class QRegExpEngine
-{
-public:
- QRegExpEngine(Qt::CaseSensitivity cs, bool greedyQuantifiers)
- : cs(cs), greedyQuantifiers(greedyQuantifiers) { setup(); }
-
- QRegExpEngine(const QRegExpEngineKey &key);
- ~QRegExpEngine();
-
- bool isValid() const { return valid; }
- const QString &errorString() const { return yyError; }
- int captureCount() const { return officialncap; }
-
- int createState(QChar ch);
- int createState(const QRegExpCharClass &cc);
-#ifndef QT_NO_REGEXP_BACKREF
- int createState(int bref);
-#endif
-
- void addCatTransitions(const QList<int> &from, const QList<int> &to);
-#ifndef QT_NO_REGEXP_CAPTURE
- void addPlusTransitions(const QList<int> &from, const QList<int> &to, int atom);
-#endif
-
-#ifndef QT_NO_REGEXP_ANCHOR_ALT
- int anchorAlternation(int a, int b);
- int anchorConcatenation(int a, int b);
-#else
- int anchorAlternation(int a, int b) { return a & b; }
- int anchorConcatenation(int a, int b) { return a | b; }
-#endif
- void addAnchors(int from, int to, int a);
-
-#ifndef QT_NO_REGEXP_OPTIM
- void heuristicallyChooseHeuristic();
-#endif
-
-#if defined(QT_DEBUG)
- void dump() const;
-#endif
-
- QAtomicInt ref;
-
-private:
- enum { CharClassBit = 0x10000, BackRefBit = 0x20000 };
- enum { InitialState = 0, FinalState = 1 };
-
- void setup();
- int setupState(int match);
-
- /*
- Let's hope that 13 lookaheads and 14 back-references are
- enough.
- */
- enum { MaxLookaheads = 13, MaxBackRefs = 14 };
- enum { Anchor_Dollar = 0x00000001, Anchor_Caret = 0x00000002, Anchor_Word = 0x00000004,
- Anchor_NonWord = 0x00000008, Anchor_FirstLookahead = 0x00000010,
- Anchor_BackRef1Empty = Anchor_FirstLookahead << MaxLookaheads,
- Anchor_BackRef0Empty = Anchor_BackRef1Empty >> 1,
- Anchor_Alternation = unsigned(Anchor_BackRef1Empty) << MaxBackRefs,
-
- Anchor_LookaheadMask = (Anchor_FirstLookahead - 1) ^
- ((Anchor_FirstLookahead << MaxLookaheads) - 1) };
-#ifndef QT_NO_REGEXP_CAPTURE
- int startAtom(bool officialCapture);
- void finishAtom(int atom, bool needCapture);
-#endif
-
-#ifndef QT_NO_REGEXP_LOOKAHEAD
- int addLookahead(QRegExpEngine *eng, bool negative);
-#endif
-
-#ifndef QT_NO_REGEXP_OPTIM
- bool goodStringMatch(QRegExpMatchState &matchState) const;
- bool badCharMatch(QRegExpMatchState &matchState) const;
-#else
- bool bruteMatch(QRegExpMatchState &matchState) const;
-#endif
-
- QList<QRegExpAutomatonState> s; // array of states
-#ifndef QT_NO_REGEXP_CAPTURE
- QList<QRegExpAtom> f; // atom hierarchy
- int nf; // number of atoms
- int cf; // current atom
- QList<int> captureForOfficialCapture;
-#endif
- int officialncap; // number of captures, seen from the outside
- int ncap; // number of captures, seen from the inside
-#ifndef QT_NO_REGEXP_CCLASS
- QList<QRegExpCharClass> cl; // array of character classes
-#endif
-#ifndef QT_NO_REGEXP_LOOKAHEAD
- QList<QRegExpLookahead *> ahead; // array of lookaheads
-#endif
-#ifndef QT_NO_REGEXP_ANCHOR_ALT
- QList<QRegExpAnchorAlternation> aa; // array of (a, b) pairs of anchors
-#endif
-#ifndef QT_NO_REGEXP_OPTIM
- bool caretAnchored; // does the regexp start with ^?
- bool trivial; // is the good-string all that needs to match?
-#endif
- bool valid; // is the regular expression valid?
- Qt::CaseSensitivity cs; // case sensitive?
- bool greedyQuantifiers; // RegExp2?
- bool xmlSchemaExtensions;
-#ifndef QT_NO_REGEXP_BACKREF
- int nbrefs; // number of back-references
-#endif
-
-#ifndef QT_NO_REGEXP_OPTIM
- bool useGoodStringHeuristic; // use goodStringMatch? otherwise badCharMatch
-
- int goodEarlyStart; // the index where goodStr can first occur in a match
- int goodLateStart; // the index where goodStr can last occur in a match
- QString goodStr; // the string that any match has to contain
-
- int minl; // the minimum length of a match
- QList<int> occ1; // first-occurrence array
-#endif
-
- /*
- The class Box is an abstraction for a regular expression
- fragment. It can also be seen as one node in the syntax tree of
- a regular expression with synthetized attributes.
-
- Its interface is ugly for performance reasons.
- */
- class Box
- {
- public:
- Box(QRegExpEngine *engine);
- Box(const Box &b) { operator=(b); }
-
- Box &operator=(const Box &b);
-
- void clear() { operator=(Box(eng)); }
- void set(QChar ch);
- void set(const QRegExpCharClass &cc);
-#ifndef QT_NO_REGEXP_BACKREF
- void set(int bref);
-#endif
-
- void cat(const Box &b);
- void orx(const Box &b);
- void plus(int atom);
- void opt();
- void catAnchor(int a);
-#ifndef QT_NO_REGEXP_OPTIM
- void setupHeuristics();
-#endif
-
-#if defined(QT_DEBUG)
- void dump() const;
-#endif
-
- private:
- void addAnchorsToEngine(const Box &to) const;
-
- QRegExpEngine *eng; // the automaton under construction
- QList<int> ls; // the left states (firstpos)
- QList<int> rs; // the right states (lastpos)
- QMap<int, int> lanchors; // the left anchors
- QMap<int, int> ranchors; // the right anchors
- int skipanchors; // the anchors to match if the box is skipped
-
-#ifndef QT_NO_REGEXP_OPTIM
- int earlyStart; // the index where str can first occur
- int lateStart; // the index where str can last occur
- QString str; // a string that has to occur in any match
- QString leftStr; // a string occurring at the left of this box
- QString rightStr; // a string occurring at the right of this box
- int maxl; // the maximum length of this box (possibly InftyLen)
-#endif
-
- int minl; // the minimum length of this box
-#ifndef QT_NO_REGEXP_OPTIM
- QList<int> occ1; // first-occurrence array
-#endif
- };
-
- friend class Box;
-
- /*
- This is the lexical analyzer for regular expressions.
- */
- enum { Tok_Eos, Tok_Dollar, Tok_LeftParen, Tok_MagicLeftParen, Tok_PosLookahead,
- Tok_NegLookahead, Tok_RightParen, Tok_CharClass, Tok_Caret, Tok_Quantifier, Tok_Bar,
- Tok_Word, Tok_NonWord, Tok_Char = 0x10000, Tok_BackRef = 0x20000 };
- int getChar();
- int getEscape();
-#ifndef QT_NO_REGEXP_INTERVAL
- int getRep(int def);
-#endif
-#ifndef QT_NO_REGEXP_LOOKAHEAD
- void skipChars(int n);
-#endif
- void error(const char *msg);
- void startTokenizer(const QChar *rx, int len);
- int getToken();
-
- const QChar *yyIn; // a pointer to the input regular expression pattern
- int yyPos0; // the position of yyTok in the input pattern
- int yyPos; // the position of the next character to read
- int yyLen; // the length of yyIn
- int yyCh; // the last character read
- QScopedPointer<QRegExpCharClass> yyCharClass; // attribute for Tok_CharClass tokens
- int yyMinRep; // attribute for Tok_Quantifier
- int yyMaxRep; // ditto
- QString yyError; // syntax error or overflow during parsing?
-
- /*
- This is the syntactic analyzer for regular expressions.
- */
- int parse(const QChar *rx, int len);
- void parseAtom(Box *box);
- void parseFactor(Box *box);
- void parseTerm(Box *box);
- void parseExpression(Box *box);
-
- int yyTok; // the last token read
- bool yyMayCapture; // set this to false to disable capturing
-
- friend struct QRegExpMatchState;
-};
-
-#ifndef QT_NO_REGEXP_LOOKAHEAD
-/*
- The struct QRegExpLookahead represents a lookahead a la Perl (e.g.,
- (?=foo) and (?!bar)).
-*/
-struct QRegExpLookahead
-{
- QRegExpEngine *eng; // NFA representing the embedded regular expression
- bool neg; // negative lookahead?
-
- inline QRegExpLookahead(QRegExpEngine *eng0, bool neg0)
- : eng(eng0), neg(neg0) { }
- inline ~QRegExpLookahead() { delete eng; }
-};
-#endif
-
-/*!
- \internal
- convert the pattern string to the RegExp syntax.
-
- This is also used by QScriptEngine::newRegExp to convert to a pattern that JavaScriptCore can understan
- */
-Q_CORE_EXPORT QString qt_regexp_toCanonical(const QString &pattern, QRegExp::PatternSyntax patternSyntax)
-{
- switch (patternSyntax) {
-#ifndef QT_NO_REGEXP_WILDCARD
- case QRegExp::Wildcard:
- return wc2rx(pattern, false);
- case QRegExp::WildcardUnix:
- return wc2rx(pattern, true);
-#endif
- case QRegExp::FixedString:
- return QRegExp::escape(pattern);
- case QRegExp::W3CXmlSchema11:
- default:
- return pattern;
- }
-}
-
-QRegExpEngine::QRegExpEngine(const QRegExpEngineKey &key)
- : cs(key.cs), greedyQuantifiers(key.patternSyntax == QRegExp::RegExp2),
- xmlSchemaExtensions(key.patternSyntax == QRegExp::W3CXmlSchema11)
-{
- setup();
-
- QString rx = qt_regexp_toCanonical(key.pattern, key.patternSyntax);
-
- valid = (parse(rx.unicode(), rx.length()) == rx.length());
- if (!valid) {
-#ifndef QT_NO_REGEXP_OPTIM
- trivial = false;
-#endif
- error(RXERR_LEFTDELIM);
- }
-}
-
-QRegExpEngine::~QRegExpEngine()
-{
-#ifndef QT_NO_REGEXP_LOOKAHEAD
- qDeleteAll(ahead);
-#endif
-}
-
-void QRegExpMatchState::prepareForMatch(QRegExpEngine *eng)
-{
- /*
- We use one QList<int> for all the big data used a lot in
- matchHere() and friends.
- */
- int ns = eng->s.size(); // number of states
- int ncap = eng->ncap;
-#ifndef QT_NO_REGEXP_OPTIM
- int newSlideTabSize = qMax(eng->minl + 1, 16);
-#else
- int newSlideTabSize = 0;
-#endif
- int numCaptures = eng->captureCount();
- int newCapturedSize = 2 + 2 * numCaptures;
- bigArray = q_check_ptr((int *)realloc(bigArray, ((3 + 4 * ncap) * ns + 4 * ncap + newSlideTabSize + newCapturedSize)*sizeof(int)));
-
- // set all internal variables only _after_ bigArray is realloc'ed
- // to prevent a broken regexp in oom case
-
- slideTabSize = newSlideTabSize;
- capturedSize = newCapturedSize;
- inNextStack = bigArray;
- memset(inNextStack, -1, ns * sizeof(int));
- curStack = inNextStack + ns;
- nextStack = inNextStack + 2 * ns;
-
- curCapBegin = inNextStack + 3 * ns;
- nextCapBegin = curCapBegin + ncap * ns;
- curCapEnd = curCapBegin + 2 * ncap * ns;
- nextCapEnd = curCapBegin + 3 * ncap * ns;
-
- tempCapBegin = curCapBegin + 4 * ncap * ns;
- tempCapEnd = tempCapBegin + ncap;
- capBegin = tempCapBegin + 2 * ncap;
- capEnd = tempCapBegin + 3 * ncap;
-
- slideTab = tempCapBegin + 4 * ncap;
- captured = slideTab + slideTabSize;
- memset(captured, -1, capturedSize*sizeof(int));
- this->eng = eng;
-}
-
-/*
- Tries to match in str and returns an array of (begin, length) pairs
- for captured text. If there is no match, all pairs are (-1, -1).
-*/
-void QRegExpMatchState::match(const QChar *str0, int len0, int pos0,
- bool minimal0, bool oneTest, int caretIndex)
-{
- bool matched = false;
- QChar char_null;
-
-#ifndef QT_NO_REGEXP_OPTIM
- if (eng->trivial && !oneTest) {
- // ### Qt6: qsizetype
- pos = int(QtPrivate::findString(QStringView(str0, len0), pos0, QStringView(eng->goodStr.unicode(), eng->goodStr.length()), eng->cs));
- matchLen = eng->goodStr.length();
- matched = (pos != -1);
- } else
-#endif
- {
- in = str0;
- if (in == nullptr)
- in = &char_null;
- pos = pos0;
- caretPos = caretIndex;
- len = len0;
- minimal = minimal0;
- matchLen = 0;
- oneTestMatchedLen = 0;
-
- if (eng->valid && pos >= 0 && pos <= len) {
-#ifndef QT_NO_REGEXP_OPTIM
- if (oneTest) {
- matched = matchHere();
- } else {
- if (pos <= len - eng->minl) {
- if (eng->caretAnchored) {
- matched = matchHere();
- } else if (eng->useGoodStringHeuristic) {
- matched = eng->goodStringMatch(*this);
- } else {
- matched = eng->badCharMatch(*this);
- }
- }
- }
-#else
- matched = oneTest ? matchHere() : eng->bruteMatch(*this);
-#endif
- }
- }
-
- if (matched) {
- int *c = captured;
- *c++ = pos;
- *c++ = matchLen;
-
- int numCaptures = (capturedSize - 2) >> 1;
-#ifndef QT_NO_REGEXP_CAPTURE
- for (int i = 0; i < numCaptures; ++i) {
- int j = eng->captureForOfficialCapture.at(i);
- if (capBegin[j] != EmptyCapture) {
- int len = capEnd[j] - capBegin[j];
- *c++ = (len > 0) ? pos + capBegin[j] : 0;
- *c++ = len;
- } else {
- *c++ = -1;
- *c++ = -1;
- }
- }
-#endif
- } else {
- // we rely on 2's complement here
- memset(captured, -1, capturedSize * sizeof(int));
- }
-}
-
-/*
- The three following functions add one state to the automaton and
- return the number of the state.
-*/
-
-int QRegExpEngine::createState(QChar ch)
-{
- return setupState(ch.unicode());
-}
-
-int QRegExpEngine::createState(const QRegExpCharClass &cc)
-{
-#ifndef QT_NO_REGEXP_CCLASS
- int n = cl.size();
- cl += QRegExpCharClass(cc);
- return setupState(CharClassBit | n);
-#else
- Q_UNUSED(cc);
- return setupState(CharClassBit);
-#endif
-}
-
-#ifndef QT_NO_REGEXP_BACKREF
-int QRegExpEngine::createState(int bref)
-{
- if (bref > nbrefs) {
- nbrefs = bref;
- if (nbrefs > MaxBackRefs) {
- error(RXERR_LIMIT);
- return 0;
- }
- }
- return setupState(BackRefBit | bref);
-}
-#endif
-
-/*
- The two following functions add a transition between all pairs of
- states (i, j) where i is found in from, and j is found in to.
-
- Cat-transitions are distinguished from plus-transitions for
- capturing.
-*/
-
-void QRegExpEngine::addCatTransitions(const QList<int> &from, const QList<int> &to)
-{
- for (int i = 0; i < from.size(); i++)
- mergeInto(&s[from.at(i)].outs, to);
-}
-
-#ifndef QT_NO_REGEXP_CAPTURE
-void QRegExpEngine::addPlusTransitions(const QList<int> &from, const QList<int> &to, int atom)
-{
- for (int i = 0; i < from.size(); i++) {
- QRegExpAutomatonState &st = s[from.at(i)];
- const QList<int> oldOuts = st.outs;
- mergeInto(&st.outs, to);
- if (f.at(atom).capture != QRegExpAtom::NoCapture) {
- for (int j = 0; j < to.size(); j++) {
- // ### st.reenter.contains(to.at(j)) check looks suspicious
- if (!st.reenter.contains(to.at(j)) &&
- !std::binary_search(oldOuts.constBegin(), oldOuts.constEnd(), to.at(j)))
- st.reenter.insert(to.at(j), atom);
- }
- }
- }
-}
-#endif
-
-#ifndef QT_NO_REGEXP_ANCHOR_ALT
-/*
- Returns an anchor that means a OR b.
-*/
-int QRegExpEngine::anchorAlternation(int a, int b)
-{
- if (((a & b) == a || (a & b) == b) && ((a | b) & Anchor_Alternation) == 0)
- return a & b;
-
- int n = aa.size();
-#ifndef QT_NO_REGEXP_OPTIM
- if (n > 0 && aa.at(n - 1).a == a && aa.at(n - 1).b == b)
- return Anchor_Alternation | (n - 1);
-#endif
-
- QRegExpAnchorAlternation element = {a, b};
- aa.append(element);
- return Anchor_Alternation | n;
-}
-
-/*
- Returns an anchor that means a AND b.
-*/
-int QRegExpEngine::anchorConcatenation(int a, int b)
-{
- if (((a | b) & Anchor_Alternation) == 0)
- return a | b;
- if ((b & Anchor_Alternation) != 0)
- qSwap(a, b);
-
- int aprime = anchorConcatenation(aa.at(a ^ Anchor_Alternation).a, b);
- int bprime = anchorConcatenation(aa.at(a ^ Anchor_Alternation).b, b);
- return anchorAlternation(aprime, bprime);
-}
-#endif
-
-/*
- Adds anchor a on a transition caracterised by its from state and
- its to state.
-*/
-void QRegExpEngine::addAnchors(int from, int to, int a)
-{
- QRegExpAutomatonState &st = s[from];
- if (st.anchors.contains(to))
- a = anchorAlternation(st.anchors.value(to), a);
- st.anchors.insert(to, a);
-}
-
-#ifndef QT_NO_REGEXP_OPTIM
-/*
- This function chooses between the good-string and the bad-character
- heuristics. It computes two scores and chooses the heuristic with
- the highest score.
-
- Here are some common-sense constraints on the scores that should be
- respected if the formulas are ever modified: (1) If goodStr is
- empty, the good-string heuristic scores 0. (2) If the regular
- expression is trivial, the good-string heuristic should be used.
- (3) If the search is case insensitive, the good-string heuristic
- should be used, unless it scores 0. (Case insensitivity turns all
- entries of occ1 to 0.) (4) If (goodLateStart - goodEarlyStart) is
- big, the good-string heuristic should score less.
-*/
-void QRegExpEngine::heuristicallyChooseHeuristic()
-{
- if (minl == 0) {
- useGoodStringHeuristic = false;
- } else if (trivial) {
- useGoodStringHeuristic = true;
- } else {
- /*
- Magic formula: The good string has to constitute a good
- proportion of the minimum-length string, and appear at a
- more-or-less known index.
- */
- int goodStringScore = (64 * goodStr.length() / minl) -
- (goodLateStart - goodEarlyStart);
- /*
- Less magic formula: We pick some characters at random, and
- check whether they are good or bad.
- */
- int badCharScore = 0;
- int step = qMax(1, NumBadChars / 32);
- for (int i = 1; i < NumBadChars; i += step) {
- if (occ1.at(i) == NoOccurrence)
- badCharScore += minl;
- else
- badCharScore += occ1.at(i);
- }
- badCharScore /= minl;
- useGoodStringHeuristic = (goodStringScore > badCharScore);
- }
-}
-#endif
-
-#if defined(QT_DEBUG)
-void QRegExpEngine::dump() const
-{
- int i, j;
- qDebug("Case %ssensitive engine", cs ? "" : "in");
- qDebug(" States");
- for (i = 0; i < s.size(); i++) {
- qDebug(" %d%s", i, i == InitialState ? " (initial)" : i == FinalState ? " (final)" : "");
-#ifndef QT_NO_REGEXP_CAPTURE
- if (nf > 0)
- qDebug(" in atom %d", s[i].atom);
-#endif
- int m = s[i].match;
- if ((m & CharClassBit) != 0) {
- qDebug(" match character class %d", m ^ CharClassBit);
-#ifndef QT_NO_REGEXP_CCLASS
- cl[m ^ CharClassBit].dump();
-#else
- qDebug(" negative character class");
-#endif
- } else if ((m & BackRefBit) != 0) {
- qDebug(" match back-reference %d", m ^ BackRefBit);
- } else if (m >= 0x20 && m <= 0x7e) {
- qDebug(" match 0x%.4x (%c)", m, m);
- } else {
- qDebug(" match 0x%.4x", m);
- }
- for (j = 0; j < s[i].outs.size(); j++) {
- int next = s[i].outs[j];
- qDebug(" -> %d", next);
- if (s[i].reenter.contains(next))
- qDebug(" [reenter %d]", s[i].reenter[next]);
- if (s[i].anchors.value(next) != 0)
- qDebug(" [anchors 0x%.8x]", s[i].anchors[next]);
- }
- }
-#ifndef QT_NO_REGEXP_CAPTURE
- if (nf > 0) {
- qDebug(" Atom Parent Capture");
- for (i = 0; i < nf; i++) {
- if (f[i].capture == QRegExpAtom::NoCapture) {
- qDebug(" %6d %6d nil", i, f[i].parent);
- } else {
- int cap = f[i].capture;
- bool official = captureForOfficialCapture.contains(cap);
- qDebug(" %6d %6d %6d %s", i, f[i].parent, f[i].capture,
- official ? "official" : "");
- }
- }
- }
-#endif
-#ifndef QT_NO_REGEXP_ANCHOR_ALT
- for (i = 0; i < aa.size(); i++)
- qDebug(" Anchor alternation 0x%.8x: 0x%.8x 0x%.9x", i, aa[i].a, aa[i].b);
-#endif
-}
-#endif
-
-void QRegExpEngine::setup()
-{
- ref.storeRelaxed(1);
-#ifndef QT_NO_REGEXP_CAPTURE
- f.resize(32);
- nf = 0;
- cf = -1;
-#endif
- officialncap = 0;
- ncap = 0;
-#ifndef QT_NO_REGEXP_OPTIM
- caretAnchored = true;
- trivial = true;
-#endif
- valid = false;
-#ifndef QT_NO_REGEXP_BACKREF
- nbrefs = 0;
-#endif
-#ifndef QT_NO_REGEXP_OPTIM
- useGoodStringHeuristic = true;
- minl = 0;
- occ1.fill(0, NumBadChars);
-#endif
-}
-
-int QRegExpEngine::setupState(int match)
-{
-#ifndef QT_NO_REGEXP_CAPTURE
- s += QRegExpAutomatonState(cf, match);
-#else
- s += QRegExpAutomatonState(match);
-#endif
- return s.size() - 1;
-}
-
-#ifndef QT_NO_REGEXP_CAPTURE
-/*
- Functions startAtom() and finishAtom() should be called to delimit
- atoms. When a state is created, it is assigned to the current atom.
- The information is later used for capturing.
-*/
-int QRegExpEngine::startAtom(bool officialCapture)
-{
- if ((nf & (nf + 1)) == 0 && nf + 1 >= f.size())
- f.resize((nf + 1) << 1);
- f[nf].parent = cf;
- cf = nf++;
- f[cf].capture = officialCapture ? QRegExpAtom::OfficialCapture : QRegExpAtom::NoCapture;
- return cf;
-}
-
-void QRegExpEngine::finishAtom(int atom, bool needCapture)
-{
- if (greedyQuantifiers && needCapture && f[atom].capture == QRegExpAtom::NoCapture)
- f[atom].capture = QRegExpAtom::UnofficialCapture;
- cf = f.at(atom).parent;
-}
-#endif
-
-#ifndef QT_NO_REGEXP_LOOKAHEAD
-/*
- Creates a lookahead anchor.
-*/
-int QRegExpEngine::addLookahead(QRegExpEngine *eng, bool negative)
-{
- int n = ahead.size();
- if (n == MaxLookaheads) {
- error(RXERR_LIMIT);
- return 0;
- }
- ahead += new QRegExpLookahead(eng, negative);
- return Anchor_FirstLookahead << n;
-}
-#endif
-
-#ifndef QT_NO_REGEXP_CAPTURE
-/*
- We want the longest leftmost captures.
-*/
-static bool isBetterCapture(int ncap, const int *begin1, const int *end1, const int *begin2,
- const int *end2)
-{
- for (int i = 0; i < ncap; i++) {
- int delta = begin2[i] - begin1[i]; // it has to start early...
- if (delta == 0)
- delta = end1[i] - end2[i]; // ...and end late
-
- if (delta != 0)
- return delta > 0;
- }
- return false;
-}
-#endif
-
-/*
- Returns \c true if anchor a matches at position pos + i in the input
- string, otherwise false.
-*/
-bool QRegExpMatchState::testAnchor(int i, int a, const int *capBegin)
-{
- int j;
-
-#ifndef QT_NO_REGEXP_ANCHOR_ALT
- if ((a & QRegExpEngine::Anchor_Alternation) != 0)
- return testAnchor(i, eng->aa.at(a ^ QRegExpEngine::Anchor_Alternation).a, capBegin)
- || testAnchor(i, eng->aa.at(a ^ QRegExpEngine::Anchor_Alternation).b, capBegin);
-#endif
-
- if ((a & QRegExpEngine::Anchor_Caret) != 0) {
- if (pos + i != caretPos)
- return false;
- }
- if ((a & QRegExpEngine::Anchor_Dollar) != 0) {
- if (pos + i != len)
- return false;
- }
-#ifndef QT_NO_REGEXP_ESCAPE
- if ((a & (QRegExpEngine::Anchor_Word | QRegExpEngine::Anchor_NonWord)) != 0) {
- bool before = false;
- bool after = false;
- if (pos + i != 0)
- before = isWord(in[pos + i - 1]);
- if (pos + i != len)
- after = isWord(in[pos + i]);
- if ((a & QRegExpEngine::Anchor_Word) != 0 && (before == after))
- return false;
- if ((a & QRegExpEngine::Anchor_NonWord) != 0 && (before != after))
- return false;
- }
-#endif
-#ifndef QT_NO_REGEXP_LOOKAHEAD
- if ((a & QRegExpEngine::Anchor_LookaheadMask) != 0) {
- const QList<QRegExpLookahead *> &ahead = eng->ahead;
- for (j = 0; j < ahead.size(); j++) {
- if ((a & (QRegExpEngine::Anchor_FirstLookahead << j)) != 0) {
- QRegExpMatchState matchState;
- matchState.prepareForMatch(ahead[j]->eng);
- matchState.match(in + pos + i, len - pos - i, 0,
- true, true, caretPos - pos - i);
- if ((matchState.captured[0] == 0) == ahead[j]->neg)
- return false;
- }
- }
- }
-#endif
-#ifndef QT_NO_REGEXP_CAPTURE
-#ifndef QT_NO_REGEXP_BACKREF
- for (j = 0; j < eng->nbrefs; j++) {
- if ((a & (QRegExpEngine::Anchor_BackRef1Empty << j)) != 0) {
- int i = eng->captureForOfficialCapture.at(j);
- if (capBegin[i] != EmptyCapture)
- return false;
- }
- }
-#endif
-#endif
- return true;
-}
-
-#ifndef QT_NO_REGEXP_OPTIM
-/*
- The three following functions are what Jeffrey Friedl would call
- transmissions (or bump-alongs). Using one or the other should make
- no difference except in performance.
-*/
-
-bool QRegExpEngine::goodStringMatch(QRegExpMatchState &matchState) const
-{
- int k = matchState.pos + goodEarlyStart;
- QStringMatcher matcher(goodStr.unicode(), goodStr.length(), cs);
- while ((k = matcher.indexIn(matchState.in, matchState.len, k)) != -1) {
- int from = k - goodLateStart;
- int to = k - goodEarlyStart;
- if (from > matchState.pos)
- matchState.pos = from;
-
- while (matchState.pos <= to) {
- if (matchState.matchHere())
- return true;
- ++matchState.pos;
- }
- ++k;
- }
- return false;
-}
-
-bool QRegExpEngine::badCharMatch(QRegExpMatchState &matchState) const
-{
- int slideHead = 0;
- int slideNext = 0;
- int i;
- int lastPos = matchState.len - minl;
- memset(matchState.slideTab, 0, matchState.slideTabSize * sizeof(int));
-
- /*
- Set up the slide table, used for the bad-character heuristic,
- using the table of first occurrence of each character.
- */
- for (i = 0; i < minl; i++) {
- int sk = occ1[BadChar(matchState.in[matchState.pos + i])];
- if (sk == NoOccurrence)
- sk = i + 1;
- if (sk > 0) {
- int k = i + 1 - sk;
- if (k < 0) {
- sk = i + 1;
- k = 0;
- }
- if (sk > matchState.slideTab[k])
- matchState.slideTab[k] = sk;
- }
- }
-
- if (matchState.pos > lastPos)
- return false;
-
- for (;;) {
- if (++slideNext >= matchState.slideTabSize)
- slideNext = 0;
- if (matchState.slideTab[slideHead] > 0) {
- if (matchState.slideTab[slideHead] - 1 > matchState.slideTab[slideNext])
- matchState.slideTab[slideNext] = matchState.slideTab[slideHead] - 1;
- matchState.slideTab[slideHead] = 0;
- } else {
- if (matchState.matchHere())
- return true;
- }
-
- if (matchState.pos == lastPos)
- break;
-
- /*
- Update the slide table. This code has much in common with
- the initialization code.
- */
- int sk = occ1[BadChar(matchState.in[matchState.pos + minl])];
- if (sk == NoOccurrence) {
- matchState.slideTab[slideNext] = minl;
- } else if (sk > 0) {
- int k = slideNext + minl - sk;
- if (k >= matchState.slideTabSize)
- k -= matchState.slideTabSize;
- if (sk > matchState.slideTab[k])
- matchState.slideTab[k] = sk;
- }
- slideHead = slideNext;
- ++matchState.pos;
- }
- return false;
-}
-#else
-bool QRegExpEngine::bruteMatch(QRegExpMatchState &matchState) const
-{
- while (matchState.pos <= matchState.len) {
- if (matchState.matchHere())
- return true;
- ++matchState.pos;
- }
- return false;
-}
-#endif
-
-/*
- Here's the core of the engine. It tries to do a match here and now.
-*/
-bool QRegExpMatchState::matchHere()
-{
- int ncur = 1, nnext = 0;
- int i = 0, j, k, m;
- bool stop = false;
-
- matchLen = -1;
- oneTestMatchedLen = -1;
- curStack[0] = QRegExpEngine::InitialState;
-
- int ncap = eng->ncap;
-#ifndef QT_NO_REGEXP_CAPTURE
- if (ncap > 0) {
- for (j = 0; j < ncap; j++) {
- curCapBegin[j] = EmptyCapture;
- curCapEnd[j] = EmptyCapture;
- }
- }
-#endif
-
-#ifndef QT_NO_REGEXP_BACKREF
- while ((ncur > 0 || !sleeping.isEmpty()) && i <= len - pos && !stop)
-#else
- while (ncur > 0 && i <= len - pos && !stop)
-#endif
- {
- int ch = (i < len - pos) ? in[pos + i].unicode() : 0;
- for (j = 0; j < ncur; j++) {
- int cur = curStack[j];
- const QRegExpAutomatonState &scur = eng->s.at(cur);
- const QList<int> &outs = scur.outs;
- for (k = 0; k < outs.size(); k++) {
- int next = outs.at(k);
- const QRegExpAutomatonState &snext = eng->s.at(next);
- bool inside = true;
-#if !defined(QT_NO_REGEXP_BACKREF) && !defined(QT_NO_REGEXP_CAPTURE)
- int needSomeSleep = 0;
-#endif
-
- /*
- First, check if the anchors are anchored properly.
- */
- int a = scur.anchors.value(next);
- if (a != 0 && !testAnchor(i, a, curCapBegin + j * ncap))
- inside = false;
-
- /*
- If indeed they are, check if the input character is
- correct for this transition.
- */
- if (inside) {
- m = snext.match;
- if ((m & (QRegExpEngine::CharClassBit | QRegExpEngine::BackRefBit)) == 0) {
- if (eng->cs)
- inside = (m == ch);
- else
- inside = (QChar(m).toLower() == QChar(ch).toLower());
- } else if (next == QRegExpEngine::FinalState) {
- matchLen = i;
- stop = minimal;
- inside = true;
- } else if ((m & QRegExpEngine::CharClassBit) != 0) {
-#ifndef QT_NO_REGEXP_CCLASS
- const QRegExpCharClass &cc = eng->cl.at(m ^ QRegExpEngine::CharClassBit);
- if (eng->cs)
- inside = cc.in(QChar(ch));
- else if (cc.negative())
- inside = cc.in(QChar(ch).toLower()) &&
- cc.in(QChar(ch).toUpper());
- else
- inside = cc.in(QChar(ch).toLower()) ||
- cc.in(QChar(ch).toUpper());
-#endif
-#if !defined(QT_NO_REGEXP_BACKREF) && !defined(QT_NO_REGEXP_CAPTURE)
- } else { /* ((m & QRegExpEngine::BackRefBit) != 0) */
- int bref = m ^ QRegExpEngine::BackRefBit;
- int ell = j * ncap + eng->captureForOfficialCapture.at(bref - 1);
-
- inside = bref <= ncap && curCapBegin[ell] != EmptyCapture;
- if (inside) {
- if (eng->cs)
- inside = (in[pos + curCapBegin[ell]] == QChar(ch));
- else
- inside = (in[pos + curCapBegin[ell]].toLower()
- == QChar(ch).toLower());
- }
-
- if (inside) {
- int delta;
- if (curCapEnd[ell] == EmptyCapture)
- delta = i - curCapBegin[ell];
- else
- delta = curCapEnd[ell] - curCapBegin[ell];
-
- inside = (delta <= len - (pos + i));
- if (inside && delta > 1) {
- int n = 1;
- if (eng->cs) {
- while (n < delta) {
- if (in[pos + curCapBegin[ell] + n]
- != in[pos + i + n])
- break;
- ++n;
- }
- } else {
- while (n < delta) {
- QChar a = in[pos + curCapBegin[ell] + n];
- QChar b = in[pos + i + n];
- if (a.toLower() != b.toLower())
- break;
- ++n;
- }
- }
- inside = (n == delta);
- if (inside)
- needSomeSleep = delta - 1;
- }
- }
-#endif
- }
- }
-
- /*
- We must now update our data structures.
- */
- if (inside) {
-#ifndef QT_NO_REGEXP_CAPTURE
- int *capBegin, *capEnd;
-#endif
- /*
- If the next state was not encountered yet, all
- is fine.
- */
- if ((m = inNextStack[next]) == -1) {
- m = nnext++;
- nextStack[m] = next;
- inNextStack[next] = m;
-#ifndef QT_NO_REGEXP_CAPTURE
- capBegin = nextCapBegin + m * ncap;
- capEnd = nextCapEnd + m * ncap;
-
- /*
- Otherwise, we'll first maintain captures in
- temporary arrays, and decide at the end whether
- it's best to keep the previous capture zones or
- the new ones.
- */
- } else {
- capBegin = tempCapBegin;
- capEnd = tempCapEnd;
-#endif
- }
-
-#ifndef QT_NO_REGEXP_CAPTURE
- /*
- Updating the capture zones is much of a task.
- */
- if (ncap > 0) {
- memcpy(capBegin, curCapBegin + j * ncap, ncap * sizeof(int));
- memcpy(capEnd, curCapEnd + j * ncap, ncap * sizeof(int));
- int c = scur.atom, n = snext.atom;
- int p = -1, q = -1;
- int cap;
-
- /*
- Lemma 1. For any x in the range [0..nf), we
- have f[x].parent < x.
-
- Proof. By looking at startAtom(), it is
- clear that cf < nf holds all the time, and
- thus that f[nf].parent < nf.
- */
-
- /*
- If we are reentering an atom, we empty all
- capture zones inside it.
- */
- if ((q = scur.reenter.value(next)) != 0) {
- QBitArray b(eng->nf, false);
- b.setBit(q, true);
- for (int ell = q + 1; ell < eng->nf; ell++) {
- if (b.testBit(eng->f.at(ell).parent)) {
- b.setBit(ell, true);
- cap = eng->f.at(ell).capture;
- if (cap >= 0) {
- capBegin[cap] = EmptyCapture;
- capEnd[cap] = EmptyCapture;
- }
- }
- }
- p = eng->f.at(q).parent;
-
- /*
- Otherwise, close the capture zones we are
- leaving. We are leaving f[c].capture,
- f[f[c].parent].capture,
- f[f[f[c].parent].parent].capture, ...,
- until f[x].capture, with x such that
- f[x].parent is the youngest common ancestor
- for c and n.
-
- We go up along c's and n's ancestry until
- we find x.
- */
- } else {
- p = c;
- q = n;
- while (p != q) {
- if (p > q) {
- cap = eng->f.at(p).capture;
- if (cap >= 0) {
- if (capBegin[cap] == i) {
- capBegin[cap] = EmptyCapture;
- capEnd[cap] = EmptyCapture;
- } else {
- capEnd[cap] = i;
- }
- }
- p = eng->f.at(p).parent;
- } else {
- q = eng->f.at(q).parent;
- }
- }
- }
-
- /*
- In any case, we now open the capture zones
- we are entering. We work upwards from n
- until we reach p (the parent of the atom we
- reenter or the youngest common ancestor).
- */
- while (n > p) {
- cap = eng->f.at(n).capture;
- if (cap >= 0) {
- capBegin[cap] = i;
- capEnd[cap] = EmptyCapture;
- }
- n = eng->f.at(n).parent;
- }
- /*
- If the next state was already in
- nextStack, we must choose carefully which
- capture zones we want to keep.
- */
- if (capBegin == tempCapBegin &&
- isBetterCapture(ncap, capBegin, capEnd, nextCapBegin + m * ncap,
- nextCapEnd + m * ncap)) {
- memcpy(nextCapBegin + m * ncap, capBegin, ncap * sizeof(int));
- memcpy(nextCapEnd + m * ncap, capEnd, ncap * sizeof(int));
- }
- }
-#ifndef QT_NO_REGEXP_BACKREF
- /*
- We are done with updating the capture zones.
- It's now time to put the next state to sleep,
- if it needs to, and to remove it from
- nextStack.
- */
- if (needSomeSleep > 0) {
- QList<int> zzZ(2 + 2 * ncap);
- zzZ[0] = i + needSomeSleep;
- zzZ[1] = next;
- if (ncap > 0) {
- memcpy(zzZ.data() + 2, capBegin, ncap * sizeof(int));
- memcpy(zzZ.data() + 2 + ncap, capEnd, ncap * sizeof(int));
- }
- inNextStack[nextStack[--nnext]] = -1;
- sleeping.append(zzZ);
- }
-#endif
-#endif
- }
- }
- }
-#ifndef QT_NO_REGEXP_CAPTURE
- /*
- If we reached the final state, hurray! Copy the captured
- zone.
- */
- if (ncap > 0 && (m = inNextStack[QRegExpEngine::FinalState]) != -1) {
- memcpy(capBegin, nextCapBegin + m * ncap, ncap * sizeof(int));
- memcpy(capEnd, nextCapEnd + m * ncap, ncap * sizeof(int));
- }
-#ifndef QT_NO_REGEXP_BACKREF
- /*
- It's time to wake up the sleepers.
- */
- j = 0;
- while (j < sleeping.count()) {
- if (sleeping.at(j)[0] == i) {
- const QList<int> &zzZ = sleeping.at(j);
- int next = zzZ[1];
- const int *capBegin = zzZ.data() + 2;
- const int *capEnd = zzZ.data() + 2 + ncap;
- bool copyOver = true;
-
- if ((m = inNextStack[next]) == -1) {
- m = nnext++;
- nextStack[m] = next;
- inNextStack[next] = m;
- } else {
- copyOver = isBetterCapture(ncap, nextCapBegin + m * ncap, nextCapEnd + m * ncap,
- capBegin, capEnd);
- }
- if (copyOver) {
- memcpy(nextCapBegin + m * ncap, capBegin, ncap * sizeof(int));
- memcpy(nextCapEnd + m * ncap, capEnd, ncap * sizeof(int));
- }
-
- sleeping.removeAt(j);
- } else {
- ++j;
- }
- }
-#endif
-#endif
- for (j = 0; j < nnext; j++)
- inNextStack[nextStack[j]] = -1;
-
- // avoid needless iteration that confuses oneTestMatchedLen
- if (nnext == 1 && nextStack[0] == QRegExpEngine::FinalState
-#ifndef QT_NO_REGEXP_BACKREF
- && sleeping.isEmpty()
-#endif
- )
- stop = true;
-
- qSwap(curStack, nextStack);
-#ifndef QT_NO_REGEXP_CAPTURE
- qSwap(curCapBegin, nextCapBegin);
- qSwap(curCapEnd, nextCapEnd);
-#endif
- ncur = nnext;
- nnext = 0;
- ++i;
- }
-
-#ifndef QT_NO_REGEXP_BACKREF
- /*
- If minimal matching is enabled, we might have some sleepers
- left.
- */
- if (!sleeping.isEmpty())
- sleeping.clear();
-#endif
-
- oneTestMatchedLen = i - 1;
- return (matchLen >= 0);
-}
-
-#ifndef QT_NO_REGEXP_CCLASS
-
-QRegExpCharClass::QRegExpCharClass()
- : c(0), n(false)
-{
-#ifndef QT_NO_REGEXP_OPTIM
- occ1.fill(NoOccurrence, NumBadChars);
-#endif
-}
-
-void QRegExpCharClass::clear()
-{
- c = 0;
- r.clear();
- n = false;
-}
-
-void QRegExpCharClass::setNegative(bool negative)
-{
- n = negative;
-#ifndef QT_NO_REGEXP_OPTIM
- occ1.fill(0, NumBadChars);
-#endif
-}
-
-void QRegExpCharClass::addCategories(uint cats)
-{
- static const int all_cats = FLAG(QChar::Mark_NonSpacing) |
- FLAG(QChar::Mark_SpacingCombining) |
- FLAG(QChar::Mark_Enclosing) |
- FLAG(QChar::Number_DecimalDigit) |
- FLAG(QChar::Number_Letter) |
- FLAG(QChar::Number_Other) |
- FLAG(QChar::Separator_Space) |
- FLAG(QChar::Separator_Line) |
- FLAG(QChar::Separator_Paragraph) |
- FLAG(QChar::Other_Control) |
- FLAG(QChar::Other_Format) |
- FLAG(QChar::Other_Surrogate) |
- FLAG(QChar::Other_PrivateUse) |
- FLAG(QChar::Other_NotAssigned) |
- FLAG(QChar::Letter_Uppercase) |
- FLAG(QChar::Letter_Lowercase) |
- FLAG(QChar::Letter_Titlecase) |
- FLAG(QChar::Letter_Modifier) |
- FLAG(QChar::Letter_Other) |
- FLAG(QChar::Punctuation_Connector) |
- FLAG(QChar::Punctuation_Dash) |
- FLAG(QChar::Punctuation_Open) |
- FLAG(QChar::Punctuation_Close) |
- FLAG(QChar::Punctuation_InitialQuote) |
- FLAG(QChar::Punctuation_FinalQuote) |
- FLAG(QChar::Punctuation_Other) |
- FLAG(QChar::Symbol_Math) |
- FLAG(QChar::Symbol_Currency) |
- FLAG(QChar::Symbol_Modifier) |
- FLAG(QChar::Symbol_Other);
- c |= (all_cats & cats);
-#ifndef QT_NO_REGEXP_OPTIM
- occ1.fill(0, NumBadChars);
-#endif
-}
-
-void QRegExpCharClass::addRange(ushort from, ushort to)
-{
- if (from > to)
- qSwap(from, to);
- int m = r.size();
- r.resize(m + 1);
- r[m].from = from;
- r[m].len = to - from + 1;
-
-#ifndef QT_NO_REGEXP_OPTIM
- int i;
-
- if (to - from < NumBadChars) {
- if (from % NumBadChars <= to % NumBadChars) {
- for (i = from % NumBadChars; i <= to % NumBadChars; i++)
- occ1[i] = 0;
- } else {
- for (i = 0; i <= to % NumBadChars; i++)
- occ1[i] = 0;
- for (i = from % NumBadChars; i < NumBadChars; i++)
- occ1[i] = 0;
- }
- } else {
- occ1.fill(0, NumBadChars);
- }
-#endif
-}
-
-bool QRegExpCharClass::in(QChar ch) const
-{
-#ifndef QT_NO_REGEXP_OPTIM
- if (occ1.at(BadChar(ch)) == NoOccurrence)
- return n;
-#endif
-
- if (c != 0 && (c & FLAG(ch.category())) != 0)
- return !n;
-
- const int uc = ch.unicode();
- int size = r.size();
-
- for (int i = 0; i < size; ++i) {
- const QRegExpCharClassRange &range = r.at(i);
- if (uint(uc - range.from) < uint(r.at(i).len))
- return !n;
- }
- return n;
-}
-
-#if defined(QT_DEBUG)
-void QRegExpCharClass::dump() const
-{
- int i;
- qDebug(" %stive character class", n ? "nega" : "posi");
-#ifndef QT_NO_REGEXP_CCLASS
- if (c != 0)
- qDebug(" categories 0x%.8x", c);
-#endif
- for (i = 0; i < r.size(); i++)
- qDebug(" 0x%.4x through 0x%.4x", r[i].from, r[i].from + r[i].len - 1);
-}
-#endif
-#endif
-
-QRegExpEngine::Box::Box(QRegExpEngine *engine)
- : eng(engine), skipanchors(0)
-#ifndef QT_NO_REGEXP_OPTIM
- , earlyStart(0), lateStart(0), maxl(0)
-#endif
-{
-#ifndef QT_NO_REGEXP_OPTIM
- occ1.fill(NoOccurrence, NumBadChars);
-#endif
- minl = 0;
-}
-
-QRegExpEngine::Box &QRegExpEngine::Box::operator=(const Box &b)
-{
- eng = b.eng;
- ls = b.ls;
- rs = b.rs;
- lanchors = b.lanchors;
- ranchors = b.ranchors;
- skipanchors = b.skipanchors;
-#ifndef QT_NO_REGEXP_OPTIM
- earlyStart = b.earlyStart;
- lateStart = b.lateStart;
- str = b.str;
- leftStr = b.leftStr;
- rightStr = b.rightStr;
- maxl = b.maxl;
- occ1 = b.occ1;
-#endif
- minl = b.minl;
- return *this;
-}
-
-void QRegExpEngine::Box::set(QChar ch)
-{
- ls.resize(1);
- ls[0] = eng->createState(ch);
- rs = ls;
-#ifndef QT_NO_REGEXP_OPTIM
- str = ch;
- leftStr = ch;
- rightStr = ch;
- maxl = 1;
- occ1[BadChar(ch)] = 0;
-#endif
- minl = 1;
-}
-
-void QRegExpEngine::Box::set(const QRegExpCharClass &cc)
-{
- ls.resize(1);
- ls[0] = eng->createState(cc);
- rs = ls;
-#ifndef QT_NO_REGEXP_OPTIM
- maxl = 1;
- occ1 = cc.firstOccurrence();
-#endif
- minl = 1;
-}
-
-#ifndef QT_NO_REGEXP_BACKREF
-void QRegExpEngine::Box::set(int bref)
-{
- ls.resize(1);
- ls[0] = eng->createState(bref);
- rs = ls;
- if (bref >= 1 && bref <= MaxBackRefs)
- skipanchors = Anchor_BackRef0Empty << bref;
-#ifndef QT_NO_REGEXP_OPTIM
- maxl = InftyLen;
-#endif
- minl = 0;
-}
-#endif
-
-void QRegExpEngine::Box::cat(const Box &b)
-{
- eng->addCatTransitions(rs, b.ls);
- addAnchorsToEngine(b);
- if (minl == 0) {
- lanchors.insert(b.lanchors);
- if (skipanchors != 0) {
- for (int i = 0; i < b.ls.size(); i++) {
- int a = eng->anchorConcatenation(lanchors.value(b.ls.at(i), 0), skipanchors);
- lanchors.insert(b.ls.at(i), a);
- }
- }
- mergeInto(&ls, b.ls);
- }
- if (b.minl == 0) {
- ranchors.insert(b.ranchors);
- if (b.skipanchors != 0) {
- for (int i = 0; i < rs.size(); i++) {
- int a = eng->anchorConcatenation(ranchors.value(rs.at(i), 0), b.skipanchors);
- ranchors.insert(rs.at(i), a);
- }
- }
- mergeInto(&rs, b.rs);
- } else {
- ranchors = b.ranchors;
- rs = b.rs;
- }
-
-#ifndef QT_NO_REGEXP_OPTIM
- if (maxl != InftyLen) {
- if (rightStr.length() + b.leftStr.length() >
- qMax(str.length(), b.str.length())) {
- earlyStart = minl - rightStr.length();
- lateStart = maxl - rightStr.length();
- str = rightStr + b.leftStr;
- } else if (b.str.length() > str.length()) {
- earlyStart = minl + b.earlyStart;
- lateStart = maxl + b.lateStart;
- str = b.str;
- }
- }
-
- if (leftStr.length() == maxl)
- leftStr += b.leftStr;
-
- if (b.rightStr.length() == b.maxl) {
- rightStr += b.rightStr;
- } else {
- rightStr = b.rightStr;
- }
-
- if (maxl == InftyLen || b.maxl == InftyLen) {
- maxl = InftyLen;
- } else {
- maxl += b.maxl;
- }
-
- for (int i = 0; i < NumBadChars; i++) {
- if (b.occ1.at(i) != NoOccurrence && minl + b.occ1.at(i) < occ1.at(i))
- occ1[i] = minl + b.occ1.at(i);
- }
-#endif
-
- minl += b.minl;
- if (minl == 0)
- skipanchors = eng->anchorConcatenation(skipanchors, b.skipanchors);
- else
- skipanchors = 0;
-}
-
-void QRegExpEngine::Box::orx(const Box &b)
-{
- mergeInto(&ls, b.ls);
- lanchors.insert(b.lanchors);
- mergeInto(&rs, b.rs);
- ranchors.insert(b.ranchors);
-
- if (b.minl == 0) {
- if (minl == 0)
- skipanchors = eng->anchorAlternation(skipanchors, b.skipanchors);
- else
- skipanchors = b.skipanchors;
- }
-
-#ifndef QT_NO_REGEXP_OPTIM
- for (int i = 0; i < NumBadChars; i++) {
- if (occ1.at(i) > b.occ1.at(i))
- occ1[i] = b.occ1.at(i);
- }
- earlyStart = 0;
- lateStart = 0;
- str = QString();
- leftStr = QString();
- rightStr = QString();
- if (b.maxl > maxl)
- maxl = b.maxl;
-#endif
- if (b.minl < minl)
- minl = b.minl;
-}
-
-void QRegExpEngine::Box::plus(int atom)
-{
-#ifndef QT_NO_REGEXP_CAPTURE
- eng->addPlusTransitions(rs, ls, atom);
-#else
- Q_UNUSED(atom);
- eng->addCatTransitions(rs, ls);
-#endif
- addAnchorsToEngine(*this);
-#ifndef QT_NO_REGEXP_OPTIM
- maxl = InftyLen;
-#endif
-}
-
-void QRegExpEngine::Box::opt()
-{
-#ifndef QT_NO_REGEXP_OPTIM
- earlyStart = 0;
- lateStart = 0;
- str = QString();
- leftStr = QString();
- rightStr = QString();
-#endif
- skipanchors = 0;
- minl = 0;
-}
-
-void QRegExpEngine::Box::catAnchor(int a)
-{
- if (a != 0) {
- for (int i = 0; i < rs.size(); i++) {
- a = eng->anchorConcatenation(ranchors.value(rs.at(i), 0), a);
- ranchors.insert(rs.at(i), a);
- }
- if (minl == 0)
- skipanchors = eng->anchorConcatenation(skipanchors, a);
- }
-}
-
-#ifndef QT_NO_REGEXP_OPTIM
-void QRegExpEngine::Box::setupHeuristics()
-{
- eng->goodEarlyStart = earlyStart;
- eng->goodLateStart = lateStart;
- eng->goodStr = eng->cs ? str : str.toLower();
-
- eng->minl = minl;
- if (eng->cs) {
- /*
- A regular expression such as 112|1 has occ1['2'] = 2 and minl =
- 1 at this point. An entry of occ1 has to be at most minl or
- infinity for the rest of the algorithm to go well.
-
- We waited until here before normalizing these cases (instead of
- doing it in Box::orx()) because sometimes things improve by
- themselves. Consider for example (112|1)34.
- */
- for (int i = 0; i < NumBadChars; i++) {
- if (occ1.at(i) != NoOccurrence && occ1.at(i) >= minl)
- occ1[i] = minl;
- }
- eng->occ1 = occ1;
- } else {
- eng->occ1.fill(0, NumBadChars);
- }
-
- eng->heuristicallyChooseHeuristic();
-}
-#endif
-
-#if defined(QT_DEBUG)
-void QRegExpEngine::Box::dump() const
-{
- int i;
- qDebug("Box of at least %d character%s", minl, minl == 1 ? "" : "s");
- qDebug(" Left states:");
- for (i = 0; i < ls.size(); i++) {
- if (lanchors.value(ls[i], 0) == 0)
- qDebug(" %d", ls[i]);
- else
- qDebug(" %d [anchors 0x%.8x]", ls[i], lanchors[ls[i]]);
- }
- qDebug(" Right states:");
- for (i = 0; i < rs.size(); i++) {
- if (ranchors.value(rs[i], 0) == 0)
- qDebug(" %d", rs[i]);
- else
- qDebug(" %d [anchors 0x%.8x]", rs[i], ranchors[rs[i]]);
- }
- qDebug(" Skip anchors: 0x%.8x", skipanchors);
-}
-#endif
-
-void QRegExpEngine::Box::addAnchorsToEngine(const Box &to) const
-{
- for (int i = 0; i < to.ls.size(); i++) {
- for (int j = 0; j < rs.size(); j++) {
- int a = eng->anchorConcatenation(ranchors.value(rs.at(j), 0),
- to.lanchors.value(to.ls.at(i), 0));
- eng->addAnchors(rs[j], to.ls[i], a);
- }
- }
-}
-
-#ifndef QT_NO_REGEXP_CCLASS
-// fast lookup hash for xml schema extensions
-// sorted by name for b-search
-static const struct CategoriesRangeMapEntry {
- const char name[40];
- uint first, second;
-} categoriesRangeMap[] = {
- { "AegeanNumbers", 0x10100, 0x1013F },
- { "AlphabeticPresentationForms", 0xFB00, 0xFB4F },
- { "AncientGreekMusicalNotation", 0x1D200, 0x1D24F },
- { "AncientGreekNumbers", 0x10140, 0x1018F },
- { "Arabic", 0x0600, 0x06FF },
- { "ArabicPresentationForms-A", 0xFB50, 0xFDFF },
- { "ArabicPresentationForms-B", 0xFE70, 0xFEFF },
- { "ArabicSupplement", 0x0750, 0x077F },
- { "Armenian", 0x0530, 0x058F },
- { "Arrows", 0x2190, 0x21FF },
- { "BasicLatin", 0x0000, 0x007F },
- { "Bengali", 0x0980, 0x09FF },
- { "BlockElements", 0x2580, 0x259F },
- { "Bopomofo", 0x3100, 0x312F },
- { "BopomofoExtended", 0x31A0, 0x31BF },
- { "BoxDrawing", 0x2500, 0x257F },
- { "BraillePatterns", 0x2800, 0x28FF },
- { "Buginese", 0x1A00, 0x1A1F },
- { "Buhid", 0x1740, 0x175F },
- { "ByzantineMusicalSymbols", 0x1D000, 0x1D0FF },
- { "CJKCompatibility", 0x3300, 0x33FF },
- { "CJKCompatibilityForms", 0xFE30, 0xFE4F },
- { "CJKCompatibilityIdeographs", 0xF900, 0xFAFF },
- { "CJKCompatibilityIdeographsSupplement", 0x2F800, 0x2FA1F },
- { "CJKRadicalsSupplement", 0x2E80, 0x2EFF },
- { "CJKStrokes", 0x31C0, 0x31EF },
- { "CJKSymbolsandPunctuation", 0x3000, 0x303F },
- { "CJKUnifiedIdeographs", 0x4E00, 0x9FFF },
- { "CJKUnifiedIdeographsExtensionA", 0x3400, 0x4DB5 },
- { "CJKUnifiedIdeographsExtensionB", 0x20000, 0x2A6DF },
- { "Cherokee", 0x13A0, 0x13FF },
- { "CombiningDiacriticalMarks", 0x0300, 0x036F },
- { "CombiningDiacriticalMarksSupplement", 0x1DC0, 0x1DFF },
- { "CombiningHalfMarks", 0xFE20, 0xFE2F },
- { "CombiningMarksforSymbols", 0x20D0, 0x20FF },
- { "ControlPictures", 0x2400, 0x243F },
- { "Coptic", 0x2C80, 0x2CFF },
- { "CurrencySymbols", 0x20A0, 0x20CF },
- { "CypriotSyllabary", 0x10800, 0x1083F },
- { "Cyrillic", 0x0400, 0x04FF },
- { "CyrillicSupplement", 0x0500, 0x052F },
- { "Deseret", 0x10400, 0x1044F },
- { "Devanagari", 0x0900, 0x097F },
- { "Dingbats", 0x2700, 0x27BF },
- { "EnclosedAlphanumerics", 0x2460, 0x24FF },
- { "EnclosedCJKLettersandMonths", 0x3200, 0x32FF },
- { "Ethiopic", 0x1200, 0x137F },
- { "EthiopicExtended", 0x2D80, 0x2DDF },
- { "EthiopicSupplement", 0x1380, 0x139F },
- { "GeneralPunctuation", 0x2000, 0x206F },
- { "GeometricShapes", 0x25A0, 0x25FF },
- { "Georgian", 0x10A0, 0x10FF },
- { "GeorgianSupplement", 0x2D00, 0x2D2F },
- { "Glagolitic", 0x2C00, 0x2C5F },
- { "Gothic", 0x10330, 0x1034F },
- { "Greek", 0x0370, 0x03FF },
- { "GreekExtended", 0x1F00, 0x1FFF },
- { "Gujarati", 0x0A80, 0x0AFF },
- { "Gurmukhi", 0x0A00, 0x0A7F },
- { "HalfwidthandFullwidthForms", 0xFF00, 0xFFEF },
- { "HangulCompatibilityJamo", 0x3130, 0x318F },
- { "HangulJamo", 0x1100, 0x11FF },
- { "HangulSyllables", 0xAC00, 0xD7A3 },
- { "Hanunoo", 0x1720, 0x173F },
- { "Hebrew", 0x0590, 0x05FF },
- { "Hiragana", 0x3040, 0x309F },
- { "IPAExtensions", 0x0250, 0x02AF },
- { "IdeographicDescriptionCharacters", 0x2FF0, 0x2FFF },
- { "Kanbun", 0x3190, 0x319F },
- { "KangxiRadicals", 0x2F00, 0x2FDF },
- { "Kannada", 0x0C80, 0x0CFF },
- { "Katakana", 0x30A0, 0x30FF },
- { "KatakanaPhoneticExtensions", 0x31F0, 0x31FF },
- { "Kharoshthi", 0x10A00, 0x10A5F },
- { "Khmer", 0x1780, 0x17FF },
- { "KhmerSymbols", 0x19E0, 0x19FF },
- { "Lao", 0x0E80, 0x0EFF },
- { "Latin-1Supplement", 0x0080, 0x00FF },
- { "LatinExtended-A", 0x0100, 0x017F },
- { "LatinExtended-B", 0x0180, 0x024F },
- { "LatinExtendedAdditional", 0x1E00, 0x1EFF },
- { "LetterlikeSymbols", 0x2100, 0x214F },
- { "Limbu", 0x1900, 0x194F },
- { "LinearBIdeograms", 0x10080, 0x100FF },
- { "LinearBSyllabary", 0x10000, 0x1007F },
- { "Malayalam", 0x0D00, 0x0D7F },
- { "MathematicalAlphanumericSymbols", 0x1D400, 0x1D7FF },
- { "MathematicalOperators", 0x2200, 0x22FF },
- { "MiscellaneousMathematicalSymbols-A", 0x27C0, 0x27EF },
- { "MiscellaneousMathematicalSymbols-B", 0x2980, 0x29FF },
- { "MiscellaneousSymbols", 0x2600, 0x26FF },
- { "MiscellaneousSymbolsandArrows", 0x2B00, 0x2BFF },
- { "MiscellaneousTechnical", 0x2300, 0x23FF },
- { "ModifierToneLetters", 0xA700, 0xA71F },
- { "Mongolian", 0x1800, 0x18AF },
- { "MusicalSymbols", 0x1D100, 0x1D1FF },
- { "Myanmar", 0x1000, 0x109F },
- { "NewTaiLue", 0x1980, 0x19DF },
- { "NumberForms", 0x2150, 0x218F },
- { "Ogham", 0x1680, 0x169F },
- { "OldItalic", 0x10300, 0x1032F },
- { "OldPersian", 0x103A0, 0x103DF },
- { "OpticalCharacterRecognition", 0x2440, 0x245F },
- { "Oriya", 0x0B00, 0x0B7F },
- { "Osmanya", 0x10480, 0x104AF },
- { "PhoneticExtensions", 0x1D00, 0x1D7F },
- { "PhoneticExtensionsSupplement", 0x1D80, 0x1DBF },
- { "PrivateUse", 0xE000, 0xF8FF },
- { "Runic", 0x16A0, 0x16FF },
- { "Shavian", 0x10450, 0x1047F },
- { "Sinhala", 0x0D80, 0x0DFF },
- { "SmallFormVariants", 0xFE50, 0xFE6F },
- { "SpacingModifierLetters", 0x02B0, 0x02FF },
- { "Specials", 0xFFF0, 0xFFFF },
- { "SuperscriptsandSubscripts", 0x2070, 0x209F },
- { "SupplementalArrows-A", 0x27F0, 0x27FF },
- { "SupplementalArrows-B", 0x2900, 0x297F },
- { "SupplementalMathematicalOperators", 0x2A00, 0x2AFF },
- { "SupplementalPunctuation", 0x2E00, 0x2E7F },
- { "SupplementaryPrivateUseArea-A", 0xF0000, 0xFFFFF },
- { "SupplementaryPrivateUseArea-B", 0x100000, 0x10FFFF },
- { "SylotiNagri", 0xA800, 0xA82F },
- { "Syriac", 0x0700, 0x074F },
- { "Tagalog", 0x1700, 0x171F },
- { "Tagbanwa", 0x1760, 0x177F },
- { "Tags", 0xE0000, 0xE007F },
- { "TaiLe", 0x1950, 0x197F },
- { "TaiXuanJingSymbols", 0x1D300, 0x1D35F },
- { "Tamil", 0x0B80, 0x0BFF },
- { "Telugu", 0x0C00, 0x0C7F },
- { "Thaana", 0x0780, 0x07BF },
- { "Thai", 0x0E00, 0x0E7F },
- { "Tibetan", 0x0F00, 0x0FFF },
- { "Tifinagh", 0x2D30, 0x2D7F },
- { "Ugaritic", 0x10380, 0x1039F },
- { "UnifiedCanadianAboriginalSyllabics", 0x1400, 0x167F },
- { "VariationSelectors", 0xFE00, 0xFE0F },
- { "VariationSelectorsSupplement", 0xE0100, 0xE01EF },
- { "VerticalForms", 0xFE10, 0xFE1F },
- { "YiRadicals", 0xA490, 0xA4CF },
- { "YiSyllables", 0xA000, 0xA48F },
- { "YijingHexagramSymbols", 0x4DC0, 0x4DFF }
-};
-
-inline bool operator<(const CategoriesRangeMapEntry &entry1, const CategoriesRangeMapEntry &entry2)
-{ return qstrcmp(entry1.name, entry2.name) < 0; }
-inline bool operator<(const char *name, const CategoriesRangeMapEntry &entry)
-{ return qstrcmp(name, entry.name) < 0; }
-inline bool operator<(const CategoriesRangeMapEntry &entry, const char *name)
-{ return qstrcmp(entry.name, name) < 0; }
-#endif // QT_NO_REGEXP_CCLASS
-
-int QRegExpEngine::getChar()
-{
- return (yyPos == yyLen) ? EOS : yyIn[yyPos++].unicode();
-}
-
-int QRegExpEngine::getEscape()
-{
-#ifndef QT_NO_REGEXP_ESCAPE
- const char tab[] = "afnrtv"; // no b, as \b means word boundary
- const char backTab[] = "\a\f\n\r\t\v";
- ushort low;
- int i;
-#endif
- ushort val;
- int prevCh = yyCh;
-
- if (prevCh == EOS) {
- error(RXERR_END);
- return Tok_Char | '\\';
- }
- yyCh = getChar();
-#ifndef QT_NO_REGEXP_ESCAPE
- if ((prevCh & ~0xff) == 0) {
- const char *p = strchr(tab, prevCh);
- if (p != nullptr)
- return Tok_Char | backTab[p - tab];
- }
-#endif
-
- switch (prevCh) {
-#ifndef QT_NO_REGEXP_ESCAPE
- case '0':
- val = 0;
- for (i = 0; i < 3; i++) {
- if (yyCh >= '0' && yyCh <= '7')
- val = (val << 3) | (yyCh - '0');
- else
- break;
- yyCh = getChar();
- }
- if ((val & ~0377) != 0)
- error(RXERR_OCTAL);
- return Tok_Char | val;
-#endif
-#ifndef QT_NO_REGEXP_ESCAPE
- case 'B':
- return Tok_NonWord;
-#endif
-#ifndef QT_NO_REGEXP_CCLASS
- case 'D':
- // see QChar::isDigit()
- yyCharClass->addCategories(uint(-1) ^ FLAG(QChar::Number_DecimalDigit));
- return Tok_CharClass;
- case 'S':
- // see QChar::isSpace()
- yyCharClass->addCategories(uint(-1) ^ (FLAG(QChar::Separator_Space) |
- FLAG(QChar::Separator_Line) |
- FLAG(QChar::Separator_Paragraph) |
- FLAG(QChar::Other_Control)));
- yyCharClass->addRange(0x0000, 0x0008);
- yyCharClass->addRange(0x000e, 0x001f);
- yyCharClass->addRange(0x007f, 0x0084);
- yyCharClass->addRange(0x0086, 0x009f);
- return Tok_CharClass;
- case 'W':
- // see QChar::isLetterOrNumber() and QChar::isMark()
- yyCharClass->addCategories(uint(-1) ^ (FLAG(QChar::Mark_NonSpacing) |
- FLAG(QChar::Mark_SpacingCombining) |
- FLAG(QChar::Mark_Enclosing) |
- FLAG(QChar::Number_DecimalDigit) |
- FLAG(QChar::Number_Letter) |
- FLAG(QChar::Number_Other) |
- FLAG(QChar::Letter_Uppercase) |
- FLAG(QChar::Letter_Lowercase) |
- FLAG(QChar::Letter_Titlecase) |
- FLAG(QChar::Letter_Modifier) |
- FLAG(QChar::Letter_Other) |
- FLAG(QChar::Punctuation_Connector)));
- yyCharClass->addRange(0x203f, 0x2040);
- yyCharClass->addSingleton(0x2040);
- yyCharClass->addSingleton(0x2054);
- yyCharClass->addSingleton(0x30fb);
- yyCharClass->addRange(0xfe33, 0xfe34);
- yyCharClass->addRange(0xfe4d, 0xfe4f);
- yyCharClass->addSingleton(0xff3f);
- yyCharClass->addSingleton(0xff65);
- return Tok_CharClass;
-#endif
-#ifndef QT_NO_REGEXP_ESCAPE
- case 'b':
- return Tok_Word;
-#endif
-#ifndef QT_NO_REGEXP_CCLASS
- case 'd':
- // see QChar::isDigit()
- yyCharClass->addCategories(FLAG(QChar::Number_DecimalDigit));
- return Tok_CharClass;
- case 's':
- // see QChar::isSpace()
- yyCharClass->addCategories(FLAG(QChar::Separator_Space) |
- FLAG(QChar::Separator_Line) |
- FLAG(QChar::Separator_Paragraph));
- yyCharClass->addRange(0x0009, 0x000d);
- yyCharClass->addSingleton(0x0085);
- return Tok_CharClass;
- case 'w':
- // see QChar::isLetterOrNumber() and QChar::isMark()
- yyCharClass->addCategories(FLAG(QChar::Mark_NonSpacing) |
- FLAG(QChar::Mark_SpacingCombining) |
- FLAG(QChar::Mark_Enclosing) |
- FLAG(QChar::Number_DecimalDigit) |
- FLAG(QChar::Number_Letter) |
- FLAG(QChar::Number_Other) |
- FLAG(QChar::Letter_Uppercase) |
- FLAG(QChar::Letter_Lowercase) |
- FLAG(QChar::Letter_Titlecase) |
- FLAG(QChar::Letter_Modifier) |
- FLAG(QChar::Letter_Other));
- yyCharClass->addSingleton(0x005f); // '_'
- return Tok_CharClass;
- case 'I':
- if (!xmlSchemaExtensions)
- break;
- yyCharClass->setNegative(!yyCharClass->negative());
- Q_FALLTHROUGH();
- case 'i':
- if (xmlSchemaExtensions) {
- yyCharClass->addCategories(FLAG(QChar::Mark_NonSpacing) |
- FLAG(QChar::Mark_SpacingCombining) |
- FLAG(QChar::Mark_Enclosing) |
- FLAG(QChar::Number_DecimalDigit) |
- FLAG(QChar::Number_Letter) |
- FLAG(QChar::Number_Other) |
- FLAG(QChar::Letter_Uppercase) |
- FLAG(QChar::Letter_Lowercase) |
- FLAG(QChar::Letter_Titlecase) |
- FLAG(QChar::Letter_Modifier) |
- FLAG(QChar::Letter_Other));
- yyCharClass->addSingleton(0x003a); // ':'
- yyCharClass->addSingleton(0x005f); // '_'
- yyCharClass->addRange(0x0041, 0x005a); // [A-Z]
- yyCharClass->addRange(0x0061, 0x007a); // [a-z]
- yyCharClass->addRange(0xc0, 0xd6);
- yyCharClass->addRange(0xd8, 0xf6);
- yyCharClass->addRange(0xf8, 0x2ff);
- yyCharClass->addRange(0x370, 0x37d);
- yyCharClass->addRange(0x37f, 0x1fff);
- yyCharClass->addRange(0x200c, 0x200d);
- yyCharClass->addRange(0x2070, 0x218f);
- yyCharClass->addRange(0x2c00, 0x2fef);
- yyCharClass->addRange(0x3001, 0xd7ff);
- yyCharClass->addRange(0xf900, 0xfdcf);
- yyCharClass->addRange(0xfdf0, 0xfffd);
- yyCharClass->addRange((ushort)0x10000, (ushort)0xeffff);
- return Tok_CharClass;
- } else {
- break;
- }
- case 'C':
- if (!xmlSchemaExtensions)
- break;
- yyCharClass->setNegative(!yyCharClass->negative());
- Q_FALLTHROUGH();
- case 'c':
- if (xmlSchemaExtensions) {
- yyCharClass->addCategories(FLAG(QChar::Mark_NonSpacing) |
- FLAG(QChar::Mark_SpacingCombining) |
- FLAG(QChar::Mark_Enclosing) |
- FLAG(QChar::Number_DecimalDigit) |
- FLAG(QChar::Number_Letter) |
- FLAG(QChar::Number_Other) |
- FLAG(QChar::Letter_Uppercase) |
- FLAG(QChar::Letter_Lowercase) |
- FLAG(QChar::Letter_Titlecase) |
- FLAG(QChar::Letter_Modifier) |
- FLAG(QChar::Letter_Other));
- yyCharClass->addSingleton(0x002d); // '-'
- yyCharClass->addSingleton(0x002e); // '.'
- yyCharClass->addSingleton(0x003a); // ':'
- yyCharClass->addSingleton(0x005f); // '_'
- yyCharClass->addSingleton(0xb7);
- yyCharClass->addRange(0x0030, 0x0039); // [0-9]
- yyCharClass->addRange(0x0041, 0x005a); // [A-Z]
- yyCharClass->addRange(0x0061, 0x007a); // [a-z]
- yyCharClass->addRange(0xc0, 0xd6);
- yyCharClass->addRange(0xd8, 0xf6);
- yyCharClass->addRange(0xf8, 0x2ff);
- yyCharClass->addRange(0x370, 0x37d);
- yyCharClass->addRange(0x37f, 0x1fff);
- yyCharClass->addRange(0x200c, 0x200d);
- yyCharClass->addRange(0x2070, 0x218f);
- yyCharClass->addRange(0x2c00, 0x2fef);
- yyCharClass->addRange(0x3001, 0xd7ff);
- yyCharClass->addRange(0xf900, 0xfdcf);
- yyCharClass->addRange(0xfdf0, 0xfffd);
- yyCharClass->addRange((ushort)0x10000, (ushort)0xeffff);
- yyCharClass->addRange(0x0300, 0x036f);
- yyCharClass->addRange(0x203f, 0x2040);
- return Tok_CharClass;
- } else {
- break;
- }
- case 'P':
- if (!xmlSchemaExtensions)
- break;
- yyCharClass->setNegative(!yyCharClass->negative());
- Q_FALLTHROUGH();
- case 'p':
- if (xmlSchemaExtensions) {
- if (yyCh != '{') {
- error(RXERR_CHARCLASS);
- return Tok_CharClass;
- }
-
- QByteArray category;
- yyCh = getChar();
- while (yyCh != '}') {
- if (yyCh == EOS) {
- error(RXERR_END);
- return Tok_CharClass;
- }
- category.append(yyCh);
- yyCh = getChar();
- }
- yyCh = getChar(); // skip closing '}'
-
- int catlen = category.length();
- if (catlen == 1 || catlen == 2) {
- switch (category.at(0)) {
- case 'M':
- if (catlen == 1) {
- yyCharClass->addCategories(FLAG(QChar::Mark_NonSpacing) |
- FLAG(QChar::Mark_SpacingCombining) |
- FLAG(QChar::Mark_Enclosing));
- } else {
- switch (category.at(1)) {
- case 'n': yyCharClass->addCategories(FLAG(QChar::Mark_NonSpacing)); break; // Mn
- case 'c': yyCharClass->addCategories(FLAG(QChar::Mark_SpacingCombining)); break; // Mc
- case 'e': yyCharClass->addCategories(FLAG(QChar::Mark_Enclosing)); break; // Me
- default: error(RXERR_CATEGORY); break;
- }
- }
- break;
- case 'N':
- if (catlen == 1) {
- yyCharClass->addCategories(FLAG(QChar::Number_DecimalDigit) |
- FLAG(QChar::Number_Letter) |
- FLAG(QChar::Number_Other));
- } else {
- switch (category.at(1)) {
- case 'd': yyCharClass->addCategories(FLAG(QChar::Number_DecimalDigit)); break; // Nd
- case 'l': yyCharClass->addCategories(FLAG(QChar::Number_Letter)); break; // Hl
- case 'o': yyCharClass->addCategories(FLAG(QChar::Number_Other)); break; // No
- default: error(RXERR_CATEGORY); break;
- }
- }
- break;
- case 'Z':
- if (catlen == 1) {
- yyCharClass->addCategories(FLAG(QChar::Separator_Space) |
- FLAG(QChar::Separator_Line) |
- FLAG(QChar::Separator_Paragraph));
- } else {
- switch (category.at(1)) {
- case 's': yyCharClass->addCategories(FLAG(QChar::Separator_Space)); break; // Zs
- case 'l': yyCharClass->addCategories(FLAG(QChar::Separator_Line)); break; // Zl
- case 'p': yyCharClass->addCategories(FLAG(QChar::Separator_Paragraph)); break; // Zp
- default: error(RXERR_CATEGORY); break;
- }
- }
- break;
- case 'C':
- if (catlen == 1) {
- yyCharClass->addCategories(FLAG(QChar::Other_Control) |
- FLAG(QChar::Other_Format) |
- FLAG(QChar::Other_Surrogate) |
- FLAG(QChar::Other_PrivateUse) |
- FLAG(QChar::Other_NotAssigned));
- } else {
- switch (category.at(1)) {
- case 'c': yyCharClass->addCategories(FLAG(QChar::Other_Control)); break; // Cc
- case 'f': yyCharClass->addCategories(FLAG(QChar::Other_Format)); break; // Cf
- case 's': yyCharClass->addCategories(FLAG(QChar::Other_Surrogate)); break; // Cs
- case 'o': yyCharClass->addCategories(FLAG(QChar::Other_PrivateUse)); break; // Co
- case 'n': yyCharClass->addCategories(FLAG(QChar::Other_NotAssigned)); break; // Cn
- default: error(RXERR_CATEGORY); break;
- }
- }
- break;
- case 'L':
- if (catlen == 1) {
- yyCharClass->addCategories(FLAG(QChar::Letter_Uppercase) |
- FLAG(QChar::Letter_Lowercase) |
- FLAG(QChar::Letter_Titlecase) |
- FLAG(QChar::Letter_Modifier) |
- FLAG(QChar::Letter_Other));
- } else {
- switch (category.at(1)) {
- case 'u': yyCharClass->addCategories(FLAG(QChar::Letter_Uppercase)); break; // Lu
- case 'l': yyCharClass->addCategories(FLAG(QChar::Letter_Lowercase)); break; // Ll
- case 't': yyCharClass->addCategories(FLAG(QChar::Letter_Titlecase)); break; // Lt
- case 'm': yyCharClass->addCategories(FLAG(QChar::Letter_Modifier)); break; // Lm
- case 'o': yyCharClass->addCategories(FLAG(QChar::Letter_Other)); break; // Lo
- default: error(RXERR_CATEGORY); break;
- }
- }
- break;
- case 'P':
- if (catlen == 1) {
- yyCharClass->addCategories(FLAG(QChar::Punctuation_Connector) |
- FLAG(QChar::Punctuation_Dash) |
- FLAG(QChar::Punctuation_Open) |
- FLAG(QChar::Punctuation_Close) |
- FLAG(QChar::Punctuation_InitialQuote) |
- FLAG(QChar::Punctuation_FinalQuote) |
- FLAG(QChar::Punctuation_Other));
- } else {
- switch (category.at(1)) {
- case 'c': yyCharClass->addCategories(FLAG(QChar::Punctuation_Connector)); break; // Pc
- case 'd': yyCharClass->addCategories(FLAG(QChar::Punctuation_Dash)); break; // Pd
- case 's': yyCharClass->addCategories(FLAG(QChar::Punctuation_Open)); break; // Ps
- case 'e': yyCharClass->addCategories(FLAG(QChar::Punctuation_Close)); break; // Pe
- case 'i': yyCharClass->addCategories(FLAG(QChar::Punctuation_InitialQuote)); break; // Pi
- case 'f': yyCharClass->addCategories(FLAG(QChar::Punctuation_FinalQuote)); break; // Pf
- case 'o': yyCharClass->addCategories(FLAG(QChar::Punctuation_Other)); break; // Po
- default: error(RXERR_CATEGORY); break;
- }
- }
- break;
- case 'S':
- if (catlen == 1) {
- yyCharClass->addCategories(FLAG(QChar::Symbol_Math) |
- FLAG(QChar::Symbol_Currency) |
- FLAG(QChar::Symbol_Modifier) |
- FLAG(QChar::Symbol_Other));
- } else {
- switch (category.at(1)) {
- case 'm': yyCharClass->addCategories(FLAG(QChar::Symbol_Math)); break; // Sm
- case 'c': yyCharClass->addCategories(FLAG(QChar::Symbol_Currency)); break; // Sc
- case 'k': yyCharClass->addCategories(FLAG(QChar::Symbol_Modifier)); break; // Sk
- case 'o': yyCharClass->addCategories(FLAG(QChar::Symbol_Other)); break; // So
- default: error(RXERR_CATEGORY); break;
- }
- }
- break;
- default:
- error(RXERR_CATEGORY);
- break;
- }
- } else if (catlen > 2 && category.at(0) == 'I' && category.at(1) == 's') {
- static const int N = sizeof(categoriesRangeMap) / sizeof(categoriesRangeMap[0]);
- const char * const categoryFamily = category.constData() + 2;
- const CategoriesRangeMapEntry *r = std::lower_bound(categoriesRangeMap, categoriesRangeMap + N, categoryFamily);
- if (r != categoriesRangeMap + N && qstrcmp(r->name, categoryFamily) == 0)
- yyCharClass->addRange(r->first, r->second);
- else
- error(RXERR_CATEGORY);
- } else {
- error(RXERR_CATEGORY);
- }
- return Tok_CharClass;
- } else {
- break;
- }
-#endif
-#ifndef QT_NO_REGEXP_ESCAPE
- case 'x':
- val = 0;
- for (i = 0; i < 4; i++) {
- low = QChar(yyCh).toLower().unicode();
- if (low >= '0' && low <= '9')
- val = (val << 4) | (low - '0');
- else if (low >= 'a' && low <= 'f')
- val = (val << 4) | (low - 'a' + 10);
- else
- break;
- yyCh = getChar();
- }
- return Tok_Char | val;
-#endif
- default:
- break;
- }
- if (prevCh >= '1' && prevCh <= '9') {
-#ifndef QT_NO_REGEXP_BACKREF
- val = prevCh - '0';
- while (yyCh >= '0' && yyCh <= '9') {
- val = (val * 10) + (yyCh - '0');
- yyCh = getChar();
- }
- return Tok_BackRef | val;
-#else
- error(RXERR_DISABLED);
-#endif
- }
- return Tok_Char | prevCh;
-}
-
-#ifndef QT_NO_REGEXP_INTERVAL
-int QRegExpEngine::getRep(int def)
-{
- if (yyCh >= '0' && yyCh <= '9') {
- int rep = 0;
- do {
- rep = 10 * rep + yyCh - '0';
- if (rep >= InftyRep) {
- error(RXERR_REPETITION);
- rep = def;
- }
- yyCh = getChar();
- } while (yyCh >= '0' && yyCh <= '9');
- return rep;
- } else {
- return def;
- }
-}
-#endif
-
-#ifndef QT_NO_REGEXP_LOOKAHEAD
-void QRegExpEngine::skipChars(int n)
-{
- if (n > 0) {
- yyPos += n - 1;
- yyCh = getChar();
- }
-}
-#endif
-
-void QRegExpEngine::error(const char *msg)
-{
- if (yyError.isEmpty())
- yyError = QLatin1String(msg);
-}
-
-void QRegExpEngine::startTokenizer(const QChar *rx, int len)
-{
- yyIn = rx;
- yyPos0 = 0;
- yyPos = 0;
- yyLen = len;
- yyCh = getChar();
- yyCharClass.reset(new QRegExpCharClass);
- yyMinRep = 0;
- yyMaxRep = 0;
- yyError = QString();
-}
-
-int QRegExpEngine::getToken()
-{
-#ifndef QT_NO_REGEXP_CCLASS
- ushort pendingCh = 0;
- bool charPending;
- bool rangePending;
- int tok;
-#endif
- int prevCh = yyCh;
-
- yyPos0 = yyPos - 1;
-#ifndef QT_NO_REGEXP_CCLASS
- yyCharClass->clear();
-#endif
- yyMinRep = 0;
- yyMaxRep = 0;
- yyCh = getChar();
-
- switch (prevCh) {
- case EOS:
- yyPos0 = yyPos;
- return Tok_Eos;
- case '$':
- return Tok_Dollar;
- case '(':
- if (yyCh == '?') {
- prevCh = getChar();
- yyCh = getChar();
- switch (prevCh) {
-#ifndef QT_NO_REGEXP_LOOKAHEAD
- case '!':
- return Tok_NegLookahead;
- case '=':
- return Tok_PosLookahead;
-#endif
- case ':':
- return Tok_MagicLeftParen;
- case '<':
- error(RXERR_LOOKBEHIND);
- return Tok_MagicLeftParen;
- default:
- error(RXERR_LOOKAHEAD);
- return Tok_MagicLeftParen;
- }
- } else {
- return Tok_LeftParen;
- }
- case ')':
- return Tok_RightParen;
- case '*':
- yyMinRep = 0;
- yyMaxRep = InftyRep;
- return Tok_Quantifier;
- case '+':
- yyMinRep = 1;
- yyMaxRep = InftyRep;
- return Tok_Quantifier;
- case '.':
-#ifndef QT_NO_REGEXP_CCLASS
- yyCharClass->setNegative(true);
-#endif
- return Tok_CharClass;
- case '?':
- yyMinRep = 0;
- yyMaxRep = 1;
- return Tok_Quantifier;
- case '[':
-#ifndef QT_NO_REGEXP_CCLASS
- if (yyCh == '^') {
- yyCharClass->setNegative(true);
- yyCh = getChar();
- }
- charPending = false;
- rangePending = false;
- do {
- if (yyCh == '-' && charPending && !rangePending) {
- rangePending = true;
- yyCh = getChar();
- } else {
- if (charPending && !rangePending) {
- yyCharClass->addSingleton(pendingCh);
- charPending = false;
- }
- if (yyCh == '\\') {
- yyCh = getChar();
- tok = getEscape();
- if (tok == Tok_Word)
- tok = '\b';
- } else {
- tok = Tok_Char | yyCh;
- yyCh = getChar();
- }
- if (tok == Tok_CharClass) {
- if (rangePending) {
- yyCharClass->addSingleton('-');
- yyCharClass->addSingleton(pendingCh);
- charPending = false;
- rangePending = false;
- }
- } else if ((tok & Tok_Char) != 0) {
- if (rangePending) {
- yyCharClass->addRange(pendingCh, tok ^ Tok_Char);
- charPending = false;
- rangePending = false;
- } else {
- pendingCh = tok ^ Tok_Char;
- charPending = true;
- }
- } else {
- error(RXERR_CHARCLASS);
- }
- }
- } while (yyCh != ']' && yyCh != EOS);
- if (rangePending)
- yyCharClass->addSingleton('-');
- if (charPending)
- yyCharClass->addSingleton(pendingCh);
- if (yyCh == EOS)
- error(RXERR_END);
- else
- yyCh = getChar();
- return Tok_CharClass;
-#else
- error(RXERR_END);
- return Tok_Char | '[';
-#endif
- case '\\':
- return getEscape();
- case ']':
- error(RXERR_LEFTDELIM);
- return Tok_Char | ']';
- case '^':
- return Tok_Caret;
- case '{':
-#ifndef QT_NO_REGEXP_INTERVAL
- yyMinRep = getRep(0);
- yyMaxRep = yyMinRep;
- if (yyCh == ',') {
- yyCh = getChar();
- yyMaxRep = getRep(InftyRep);
- }
- if (yyMaxRep < yyMinRep)
- error(RXERR_INTERVAL);
- if (yyCh != '}')
- error(RXERR_REPETITION);
- yyCh = getChar();
- return Tok_Quantifier;
-#else
- error(RXERR_DISABLED);
- return Tok_Char | '{';
-#endif
- case '|':
- return Tok_Bar;
- case '}':
- error(RXERR_LEFTDELIM);
- return Tok_Char | '}';
- default:
- return Tok_Char | prevCh;
- }
-}
-
-int QRegExpEngine::parse(const QChar *pattern, int len)
-{
- valid = true;
- startTokenizer(pattern, len);
- yyTok = getToken();
-#ifndef QT_NO_REGEXP_CAPTURE
- yyMayCapture = true;
-#else
- yyMayCapture = false;
-#endif
-
-#ifndef QT_NO_REGEXP_CAPTURE
- int atom = startAtom(false);
-#endif
- QRegExpCharClass anything;
- Box box(this); // create InitialState
- box.set(anything);
- Box rightBox(this); // create FinalState
- rightBox.set(anything);
-
- Box middleBox(this);
- parseExpression(&middleBox);
-#ifndef QT_NO_REGEXP_CAPTURE
- finishAtom(atom, false);
-#endif
-#ifndef QT_NO_REGEXP_OPTIM
- middleBox.setupHeuristics();
-#endif
- box.cat(middleBox);
- box.cat(rightBox);
- yyCharClass.reset();
-
-#ifndef QT_NO_REGEXP_CAPTURE
- for (int i = 0; i < nf; ++i) {
- switch (f[i].capture) {
- case QRegExpAtom::NoCapture:
- break;
- case QRegExpAtom::OfficialCapture:
- f[i].capture = ncap;
- captureForOfficialCapture.append(ncap);
- ++ncap;
- ++officialncap;
- break;
- case QRegExpAtom::UnofficialCapture:
- f[i].capture = greedyQuantifiers ? ncap++ : QRegExpAtom::NoCapture;
- }
- }
-
-#ifndef QT_NO_REGEXP_BACKREF
-#ifndef QT_NO_REGEXP_OPTIM
- if (officialncap == 0 && nbrefs == 0) {
- ncap = nf = 0;
- f.clear();
- }
-#endif
- // handle the case where there's a \5 with no corresponding capture
- // (captureForOfficialCapture.size() != officialncap)
- for (int i = 0; i < nbrefs - officialncap; ++i) {
- captureForOfficialCapture.append(ncap);
- ++ncap;
- }
-#endif
-#endif
-
- if (!yyError.isEmpty())
- return -1;
-
-#ifndef QT_NO_REGEXP_OPTIM
- const QRegExpAutomatonState &sinit = s.at(InitialState);
- caretAnchored = !sinit.anchors.isEmpty();
- if (caretAnchored) {
- const QMap<int, int> &anchors = sinit.anchors;
- QMap<int, int>::const_iterator a;
- for (a = anchors.constBegin(); a != anchors.constEnd(); ++a) {
- if (
-#ifndef QT_NO_REGEXP_ANCHOR_ALT
- (*a & Anchor_Alternation) != 0 ||
-#endif
- (*a & Anchor_Caret) == 0)
- {
- caretAnchored = false;
- break;
- }
- }
- }
-#endif
-
- // cleanup anchors
- int numStates = s.count();
- for (int i = 0; i < numStates; ++i) {
- QRegExpAutomatonState &state = s[i];
- if (!state.anchors.isEmpty()) {
- QMap<int, int>::iterator a = state.anchors.begin();
- while (a != state.anchors.end()) {
- if (a.value() == 0)
- a = state.anchors.erase(a);
- else
- ++a;
- }
- }
- }
-
- return yyPos0;
-}
-
-void QRegExpEngine::parseAtom(Box *box)
-{
-#ifndef QT_NO_REGEXP_LOOKAHEAD
- QRegExpEngine *eng = nullptr;
- bool neg;
- int len;
-#endif
-
- if ((yyTok & Tok_Char) != 0) {
- box->set(QChar(yyTok ^ Tok_Char));
- } else {
-#ifndef QT_NO_REGEXP_OPTIM
- trivial = false;
-#endif
- switch (yyTok) {
- case Tok_Dollar:
- box->catAnchor(Anchor_Dollar);
- break;
- case Tok_Caret:
- box->catAnchor(Anchor_Caret);
- break;
-#ifndef QT_NO_REGEXP_LOOKAHEAD
- case Tok_PosLookahead:
- case Tok_NegLookahead:
- neg = (yyTok == Tok_NegLookahead);
- eng = new QRegExpEngine(cs, greedyQuantifiers);
- len = eng->parse(yyIn + yyPos - 1, yyLen - yyPos + 1);
- if (len >= 0)
- skipChars(len);
- else
- error(RXERR_LOOKAHEAD);
- box->catAnchor(addLookahead(eng, neg));
- yyTok = getToken();
- if (yyTok != Tok_RightParen)
- error(RXERR_LOOKAHEAD);
- break;
-#endif
-#ifndef QT_NO_REGEXP_ESCAPE
- case Tok_Word:
- box->catAnchor(Anchor_Word);
- break;
- case Tok_NonWord:
- box->catAnchor(Anchor_NonWord);
- break;
-#endif
- case Tok_LeftParen:
- case Tok_MagicLeftParen:
- yyTok = getToken();
- parseExpression(box);
- if (yyTok != Tok_RightParen)
- error(RXERR_END);
- break;
- case Tok_CharClass:
- box->set(*yyCharClass);
- break;
- case Tok_Quantifier:
- error(RXERR_REPETITION);
- break;
- default:
-#ifndef QT_NO_REGEXP_BACKREF
- if ((yyTok & Tok_BackRef) != 0)
- box->set(yyTok ^ Tok_BackRef);
- else
-#endif
- error(RXERR_DISABLED);
- }
- }
- yyTok = getToken();
-}
-
-void QRegExpEngine::parseFactor(Box *box)
-{
-#ifndef QT_NO_REGEXP_CAPTURE
- int outerAtom = greedyQuantifiers ? startAtom(false) : -1;
- int innerAtom = startAtom(yyMayCapture && yyTok == Tok_LeftParen);
- bool magicLeftParen = (yyTok == Tok_MagicLeftParen);
-#else
- const int innerAtom = -1;
-#endif
-
-#ifndef QT_NO_REGEXP_INTERVAL
-#define YYREDO() \
- yyIn = in, yyPos0 = pos0, yyPos = pos, yyLen = len, yyCh = ch, \
- *yyCharClass = charClass, yyMinRep = 0, yyMaxRep = 0, yyTok = tok
-
- const QChar *in = yyIn;
- int pos0 = yyPos0;
- int pos = yyPos;
- int len = yyLen;
- int ch = yyCh;
- QRegExpCharClass charClass;
- if (yyTok == Tok_CharClass)
- charClass = *yyCharClass;
- int tok = yyTok;
- bool mayCapture = yyMayCapture;
-#endif
-
- parseAtom(box);
-#ifndef QT_NO_REGEXP_CAPTURE
- finishAtom(innerAtom, magicLeftParen);
-#endif
-
- bool hasQuantifier = (yyTok == Tok_Quantifier);
- if (hasQuantifier) {
-#ifndef QT_NO_REGEXP_OPTIM
- trivial = false;
-#endif
- if (yyMaxRep == InftyRep) {
- box->plus(innerAtom);
-#ifndef QT_NO_REGEXP_INTERVAL
- } else if (yyMaxRep == 0) {
- box->clear();
-#endif
- }
- if (yyMinRep == 0)
- box->opt();
-
-#ifndef QT_NO_REGEXP_INTERVAL
- yyMayCapture = false;
- int alpha = (yyMinRep == 0) ? 0 : yyMinRep - 1;
- int beta = (yyMaxRep == InftyRep) ? 0 : yyMaxRep - (alpha + 1);
-
- Box rightBox(this);
- int i;
-
- for (i = 0; i < beta; i++) {
- YYREDO();
- Box leftBox(this);
- parseAtom(&leftBox);
- leftBox.cat(rightBox);
- leftBox.opt();
- rightBox = leftBox;
- }
- for (i = 0; i < alpha; i++) {
- YYREDO();
- Box leftBox(this);
- parseAtom(&leftBox);
- leftBox.cat(rightBox);
- rightBox = leftBox;
- }
- rightBox.cat(*box);
- *box = rightBox;
-#endif
- yyTok = getToken();
-#ifndef QT_NO_REGEXP_INTERVAL
- yyMayCapture = mayCapture;
-#endif
- }
-#undef YYREDO
-#ifndef QT_NO_REGEXP_CAPTURE
- if (greedyQuantifiers)
- finishAtom(outerAtom, hasQuantifier);
-#endif
-}
-
-void QRegExpEngine::parseTerm(Box *box)
-{
-#ifndef QT_NO_REGEXP_OPTIM
- if (yyTok != Tok_Eos && yyTok != Tok_RightParen && yyTok != Tok_Bar)
- parseFactor(box);
-#endif
- while (yyTok != Tok_Eos && yyTok != Tok_RightParen && yyTok != Tok_Bar) {
- Box rightBox(this);
- parseFactor(&rightBox);
- box->cat(rightBox);
- }
-}
-
-void QRegExpEngine::parseExpression(Box *box)
-{
- parseTerm(box);
- while (yyTok == Tok_Bar) {
-#ifndef QT_NO_REGEXP_OPTIM
- trivial = false;
-#endif
- Box rightBox(this);
- yyTok = getToken();
- parseTerm(&rightBox);
- box->orx(rightBox);
- }
-}
-
-/*
- The struct QRegExpPrivate contains the private data of a regular
- expression other than the automaton. It makes it possible for many
- QRegExp objects to use the same QRegExpEngine object with different
- QRegExpPrivate objects.
-*/
-struct QRegExpPrivate
-{
- QRegExpEngine *eng;
- QRegExpEngineKey engineKey;
- bool minimal;
-#ifndef QT_NO_REGEXP_CAPTURE
- QString t; // last string passed to QRegExp::indexIn() or lastIndexIn()
- QStringList capturedCache; // what QRegExp::capturedTexts() returned last
-#endif
- QRegExpMatchState matchState;
-
- inline QRegExpPrivate()
- : eng(nullptr), engineKey(QString(), QRegExp::RegExp, Qt::CaseSensitive), minimal(false) { }
- inline QRegExpPrivate(const QRegExpEngineKey &key)
- : eng(nullptr), engineKey(key), minimal(false) {}
-};
-
-#if !defined(QT_NO_REGEXP_OPTIM)
-struct QRECache
-{
- typedef QHash<QRegExpEngineKey, QRegExpEngine *> EngineCache;
- typedef QCache<QRegExpEngineKey, QRegExpEngine> UnusedEngineCache;
- EngineCache usedEngines;
- UnusedEngineCache unusedEngines;
-};
-Q_GLOBAL_STATIC(QRECache, engineCache)
-static QBasicMutex engineCacheMutex;
-#endif // QT_NO_REGEXP_OPTIM
-
-static void derefEngine(QRegExpEngine *eng, const QRegExpEngineKey &key)
-{
-#if !defined(QT_NO_REGEXP_OPTIM)
- const auto locker = qt_scoped_lock(engineCacheMutex);
- if (!eng->ref.deref()) {
- if (QRECache *c = engineCache()) {
- c->unusedEngines.insert(key, eng, 4 + key.pattern.length() / 4);
- c->usedEngines.remove(key);
- } else {
- delete eng;
- }
- }
-#else
- Q_UNUSED(key);
- if (!eng->ref.deref())
- delete eng;
-#endif
-}
-
-static void prepareEngine_helper(QRegExpPrivate *priv)
-{
- Q_ASSERT(!priv->eng);
-
-#if !defined(QT_NO_REGEXP_OPTIM)
- const auto locker = qt_scoped_lock(engineCacheMutex);
- if (QRECache *c = engineCache()) {
- priv->eng = c->unusedEngines.take(priv->engineKey);
- if (!priv->eng)
- priv->eng = c->usedEngines.value(priv->engineKey);
- if (!priv->eng)
- priv->eng = new QRegExpEngine(priv->engineKey);
- else
- priv->eng->ref.ref();
-
- c->usedEngines.insert(priv->engineKey, priv->eng);
- return;
- }
-#endif // QT_NO_REGEXP_OPTIM
-
- priv->eng = new QRegExpEngine(priv->engineKey);
-}
-
-inline static void prepareEngine(QRegExpPrivate *priv)
-{
- if (priv->eng)
- return;
- prepareEngine_helper(priv);
- priv->matchState.prepareForMatch(priv->eng);
-}
-
-static void prepareEngineForMatch(QRegExpPrivate *priv, const QString &str)
-{
- prepareEngine(priv);
- priv->matchState.prepareForMatch(priv->eng);
-#ifndef QT_NO_REGEXP_CAPTURE
- priv->t = str;
- priv->capturedCache.clear();
-#else
- Q_UNUSED(str);
-#endif
-}
-
-static void invalidateEngine(QRegExpPrivate *priv)
-{
- if (priv->eng) {
- derefEngine(priv->eng, priv->engineKey);
- priv->eng = nullptr;
- priv->matchState.drain();
- }
-}
-
-/*!
- \enum QRegExp::CaretMode
-
- The CaretMode enum defines the different meanings of the caret
- (\b{^}) in a regular expression. The possible values are:
-
- \value CaretAtZero
- The caret corresponds to index 0 in the searched string.
-
- \value CaretAtOffset
- The caret corresponds to the start offset of the search.
-
- \value CaretWontMatch
- The caret never matches.
-*/
-
-/*!
- \enum QRegExp::PatternSyntax
-
- The syntax used to interpret the meaning of the pattern.
-
- \value RegExp A rich Perl-like pattern matching syntax. This is
- the default.
-
- \value RegExp2 Like RegExp, but with \l{greedy quantifiers}.
- (Introduced in Qt 4.2.)
-
- \value Wildcard This provides a simple pattern matching syntax
- similar to that used by shells (command interpreters) for "file
- globbing". See \l{QRegExp wildcard matching}.
-
- \value WildcardUnix This is similar to Wildcard but with the
- behavior of a Unix shell. The wildcard characters can be escaped
- with the character "\\".
-
- \value FixedString The pattern is a fixed string. This is
- equivalent to using the RegExp pattern on a string in
- which all metacharacters are escaped using escape().
-
- \value W3CXmlSchema11 The pattern is a regular expression as
- defined by the W3C XML Schema 1.1 specification.
-
- \sa setPatternSyntax()
-*/
-
-/*!
- Constructs an empty regexp.
-
- \sa isValid(), errorString()
-*/
-QRegExp::QRegExp()
-{
- priv = new QRegExpPrivate;
- prepareEngine(priv);
-}
-
-/*!
- Constructs a regular expression object for the given \a pattern
- string. The pattern must be given using wildcard notation if \a
- syntax is \l Wildcard; the default is \l RegExp. The pattern is
- case sensitive, unless \a cs is Qt::CaseInsensitive. Matching is
- greedy (maximal), but can be changed by calling
- setMinimal().
-
- \sa setPattern(), setCaseSensitivity(), setPatternSyntax()
-*/
-QRegExp::QRegExp(const QString &pattern, Qt::CaseSensitivity cs, PatternSyntax syntax)
-{
- priv = new QRegExpPrivate(QRegExpEngineKey(pattern, syntax, cs));
- prepareEngine(priv);
-}
-
-/*!
- Constructs a regular expression as a copy of \a rx.
-
- \sa operator=()
-*/
-QRegExp::QRegExp(const QRegExp &rx)
-{
- priv = new QRegExpPrivate;
- operator=(rx);
-}
-
-/*!
- Destroys the regular expression and cleans up its internal data.
-*/
-QRegExp::~QRegExp()
-{
- invalidateEngine(priv);
- delete priv;
-}
-
-/*!
- Copies the regular expression \a rx and returns a reference to the
- copy. The case sensitivity, wildcard, and minimal matching options
- are also copied.
-*/
-QRegExp &QRegExp::operator=(const QRegExp &rx)
-{
- prepareEngine(rx.priv); // to allow sharing
- QRegExpEngine *otherEng = rx.priv->eng;
- if (otherEng)
- otherEng->ref.ref();
- invalidateEngine(priv);
- priv->eng = otherEng;
- priv->engineKey = rx.priv->engineKey;
- priv->minimal = rx.priv->minimal;
-#ifndef QT_NO_REGEXP_CAPTURE
- priv->t = rx.priv->t;
- priv->capturedCache = rx.priv->capturedCache;
-#endif
- if (priv->eng)
- priv->matchState.prepareForMatch(priv->eng);
- priv->matchState.captured = rx.priv->matchState.captured;
- return *this;
-}
-
-/*!
- \fn QRegExp &QRegExp::operator=(QRegExp &&other)
-
- Move-assigns \a other to this QRegExp instance.
-
- \since 5.2
-*/
-
-/*!
- \fn void QRegExp::swap(QRegExp &other)
- \since 4.8
-
- Swaps regular expression \a other with this regular
- expression. This operation is very fast and never fails.
-*/
-
-/*!
- Returns \c true if this regular expression is equal to \a rx;
- otherwise returns \c false.
-
- Two QRegExp objects are equal if they have the same pattern
- strings and the same settings for case sensitivity, wildcard and
- minimal matching.
-*/
-bool QRegExp::operator==(const QRegExp &rx) const
-{
- return priv->engineKey == rx.priv->engineKey && priv->minimal == rx.priv->minimal;
-}
-
-/*!
- \since 5.6
- \relates QRegExp
-
- Returns the hash value for \a key, using
- \a seed to seed the calculation.
-*/
-size_t qHash(const QRegExp &key, size_t seed) noexcept
-{
- QtPrivate::QHashCombine hash;
- seed = hash(seed, key.priv->engineKey);
- seed = hash(seed, key.priv->minimal);
- return seed;
-}
-
-/*!
- \fn bool QRegExp::operator!=(const QRegExp &rx) const
-
- Returns \c true if this regular expression is not equal to \a rx;
- otherwise returns \c false.
-
- \sa operator==()
-*/
-
-/*!
- Returns \c true if the pattern string is empty; otherwise returns
- false.
-
- If you call exactMatch() with an empty pattern on an empty string
- it will return true; otherwise it returns \c false since it operates
- over the whole string. If you call indexIn() with an empty pattern
- on \e any string it will return the start offset (0 by default)
- because the empty pattern matches the 'emptiness' at the start of
- the string. In this case the length of the match returned by
- matchedLength() will be 0.
-
- See QString::isEmpty().
-*/
-
-bool QRegExp::isEmpty() const
-{
- return priv->engineKey.pattern.isEmpty();
-}
-
-/*!
- Returns \c true if the regular expression is valid; otherwise returns
- false. An invalid regular expression never matches.
-
- The pattern \b{[a-z} is an example of an invalid pattern, since
- it lacks a closing square bracket.
-
- Note that the validity of a regexp may also depend on the setting
- of the wildcard flag, for example \b{*.html} is a valid
- wildcard regexp but an invalid full regexp.
-
- \sa errorString()
-*/
-bool QRegExp::isValid() const
-{
- if (priv->engineKey.pattern.isEmpty()) {
- return true;
- } else {
- prepareEngine(priv);
- return priv->eng->isValid();
- }
-}
-
-/*!
- Returns the pattern string of the regular expression. The pattern
- has either regular expression syntax or wildcard syntax, depending
- on patternSyntax().
-
- \sa patternSyntax(), caseSensitivity()
-*/
-QString QRegExp::pattern() const
-{
- return priv->engineKey.pattern;
-}
-
-/*!
- Sets the pattern string to \a pattern. The case sensitivity,
- wildcard, and minimal matching options are not changed.
-
- \sa setPatternSyntax(), setCaseSensitivity()
-*/
-void QRegExp::setPattern(const QString &pattern)
-{
- if (priv->engineKey.pattern != pattern) {
- invalidateEngine(priv);
- priv->engineKey.pattern = pattern;
- }
-}
-
-/*!
- Returns Qt::CaseSensitive if the regexp is matched case
- sensitively; otherwise returns Qt::CaseInsensitive.
-
- \sa patternSyntax(), pattern(), isMinimal()
-*/
-Qt::CaseSensitivity QRegExp::caseSensitivity() const
-{
- return priv->engineKey.cs;
-}
-
-/*!
- Sets case sensitive matching to \a cs.
-
- If \a cs is Qt::CaseSensitive, \b{\\.txt$} matches
- \c{readme.txt} but not \c{README.TXT}.
-
- \sa setPatternSyntax(), setPattern(), setMinimal()
-*/
-void QRegExp::setCaseSensitivity(Qt::CaseSensitivity cs)
-{
- if ((bool)cs != (bool)priv->engineKey.cs) {
- invalidateEngine(priv);
- priv->engineKey.cs = cs;
- }
-}
-
-/*!
- Returns the syntax used by the regular expression. The default is
- QRegExp::RegExp.
-
- \sa pattern(), caseSensitivity()
-*/
-QRegExp::PatternSyntax QRegExp::patternSyntax() const
-{
- return priv->engineKey.patternSyntax;
-}
-
-/*!
- Sets the syntax mode for the regular expression. The default is
- QRegExp::RegExp.
-
- Setting \a syntax to QRegExp::Wildcard enables simple shell-like
- \l{QRegExp wildcard matching}. For example, \b{r*.txt} matches the
- string \c{readme.txt} in wildcard mode, but does not match
- \c{readme}.
-
- Setting \a syntax to QRegExp::FixedString means that the pattern
- is interpreted as a plain string. Special characters (e.g.,
- backslash) don't need to be escaped then.
-
- \sa setPattern(), setCaseSensitivity(), escape()
-*/
-void QRegExp::setPatternSyntax(PatternSyntax syntax)
-{
- if (syntax != priv->engineKey.patternSyntax) {
- invalidateEngine(priv);
- priv->engineKey.patternSyntax = syntax;
- }
-}
-
-/*!
- Returns \c true if minimal (non-greedy) matching is enabled;
- otherwise returns \c false.
-
- \sa caseSensitivity(), setMinimal()
-*/
-bool QRegExp::isMinimal() const
-{
- return priv->minimal;
-}
-
-/*!
- Enables or disables minimal matching. If \a minimal is false,
- matching is greedy (maximal) which is the default.
-
- For example, suppose we have the input string "We must be
- <b>bold</b>, very <b>bold</b>!" and the pattern
- \b{<b>.*</b>}. With the default greedy (maximal) matching,
- the match is "We must be \underline{<b>bold</b>, very
- <b>bold</b>}!". But with minimal (non-greedy) matching, the
- first match is: "We must be \underline{<b>bold</b>}, very
- <b>bold</b>!" and the second match is "We must be <b>bold</b>,
- very \underline{<b>bold</b>}!". In practice we might use the pattern
- \b{<b>[^<]*\</b>} instead, although this will still fail for
- nested tags.
-
- \sa setCaseSensitivity()
-*/
-void QRegExp::setMinimal(bool minimal)
-{
- priv->minimal = minimal;
-}
-
-// ### Qt 5: make non-const
-/*!
- Returns \c true if \a str is matched exactly by this regular
- expression; otherwise returns \c false. You can determine how much of
- the string was matched by calling matchedLength().
-
- For a given regexp string R, exactMatch("R") is the equivalent of
- indexIn("^R$") since exactMatch() effectively encloses the regexp
- in the start of string and end of string anchors, except that it
- sets matchedLength() differently.
-
- For example, if the regular expression is \b{blue}, then
- exactMatch() returns \c true only for input \c blue. For inputs \c
- bluebell, \c blutak and \c lightblue, exactMatch() returns \c false
- and matchedLength() will return 4, 3 and 0 respectively.
-
- Although const, this function sets matchedLength(),
- capturedTexts(), and pos().
-
- \sa indexIn(), lastIndexIn()
-*/
-bool QRegExp::exactMatch(const QString &str) const
-{
- prepareEngineForMatch(priv, str);
- priv->matchState.match(str.unicode(), str.length(), 0, priv->minimal, true, 0);
- if (priv->matchState.captured[1] == str.length()) {
- return true;
- } else {
- priv->matchState.captured[0] = 0;
- priv->matchState.captured[1] = priv->matchState.oneTestMatchedLen;
- return false;
- }
-}
-
-/*!
- Returns the regexp as a QVariant
-*/
-QRegExp::operator QVariant() const
-{
-QT_WARNING_PUSH QT_WARNING_DISABLE_DEPRECATED
- QVariant v;
- v.setValue(*this);
- return v;
-QT_WARNING_POP
-}
-
-// ### Qt 5: make non-const
-/*!
- Attempts to find a match in \a str from position \a offset (0 by
- default). If \a offset is -1, the search starts at the last
- character; if -2, at the next to last character; etc.
-
- Returns the position of the first match, or -1 if there was no
- match.
-
- The \a caretMode parameter can be used to instruct whether \b{^}
- should match at index 0 or at \a offset.
-
- You might prefer to use QString::indexOf(), QString::contains(),
- or even QStringList::filter(). To replace matches use
- QString::replace().
-
- Example:
- \snippet code/src_corelib_text_qregexp.cpp 13
-
- Although const, this function sets matchedLength(),
- capturedTexts() and pos().
-
- If the QRegExp is a wildcard expression (see setPatternSyntax())
- and want to test a string against the whole wildcard expression,
- use exactMatch() instead of this function.
-
- \sa lastIndexIn(), exactMatch()
-*/
-
-int QRegExp::indexIn(const QString &str, int offset, CaretMode caretMode) const
-{
- prepareEngineForMatch(priv, str);
- if (offset < 0)
- offset += str.length();
- priv->matchState.match(str.unicode(), str.length(), offset,
- priv->minimal, false, caretIndex(offset, caretMode));
- return priv->matchState.captured[0];
-}
-
-// ### Qt 5: make non-const
-/*!
- Attempts to find a match backwards in \a str from position \a
- offset. If \a offset is -1 (the default), the search starts at the
- last character; if -2, at the next to last character; etc.
-
- Returns the position of the first match, or -1 if there was no
- match.
-
- The \a caretMode parameter can be used to instruct whether \b{^}
- should match at index 0 or at \a offset.
-
- Although const, this function sets matchedLength(),
- capturedTexts() and pos().
-
- \warning Searching backwards is much slower than searching
- forwards.
-
- \sa indexIn(), exactMatch()
-*/
-
-int QRegExp::lastIndexIn(const QString &str, int offset, CaretMode caretMode) const
-{
- prepareEngineForMatch(priv, str);
- if (offset < 0)
- offset += str.length();
- if (offset < 0 || offset > str.length()) {
- memset(priv->matchState.captured, -1, priv->matchState.capturedSize*sizeof(int));
- return -1;
- }
-
- while (offset >= 0) {
- priv->matchState.match(str.unicode(), str.length(), offset,
- priv->minimal, true, caretIndex(offset, caretMode));
- if (priv->matchState.captured[0] == offset)
- return offset;
- --offset;
- }
- return -1;
-}
-
-/*!
- Returns the length of the last matched string, or -1 if there was
- no match.
-
- \sa exactMatch(), indexIn(), lastIndexIn()
-*/
-int QRegExp::matchedLength() const
-{
- return priv->matchState.captured[1];
-}
-
-
-/*!
- Replaces every occurrence of this regular expression in
- \a str with \a after and returns the result.
-
- For regular expressions containing \l{capturing parentheses},
- occurrences of \b{\\1}, \b{\\2}, ..., in \a after are replaced
- with \a{rx}.cap(1), cap(2), ...
-
- \sa indexIn(), lastIndexIn(), QRegExp::cap()
-*/
-QString QRegExp::replaceIn(const QString &str, const QString &after) const
-{
- struct QStringCapture
- {
- int pos;
- int len;
- int no;
- };
-
- QRegExp rx2(*this);
-
- if (str.isEmpty() && rx2.indexIn(str) == -1)
- return str;
-
- QString s(str);
-
- int index = 0;
- int numCaptures = rx2.captureCount();
- int al = after.length();
- QRegExp::CaretMode caretMode = QRegExp::CaretAtZero;
-
- if (numCaptures > 0) {
- const QChar *uc = after.unicode();
- int numBackRefs = 0;
-
- for (int i = 0; i < al - 1; i++) {
- if (uc[i] == QLatin1Char('\\')) {
- int no = uc[i + 1].digitValue();
- if (no > 0 && no <= numCaptures)
- numBackRefs++;
- }
- }
-
- /*
- This is the harder case where we have back-references.
- */
- if (numBackRefs > 0) {
- QVarLengthArray<QStringCapture, 16> captures(numBackRefs);
- int j = 0;
-
- for (int i = 0; i < al - 1; i++) {
- if (uc[i] == QLatin1Char('\\')) {
- int no = uc[i + 1].digitValue();
- if (no > 0 && no <= numCaptures) {
- QStringCapture capture;
- capture.pos = i;
- capture.len = 2;
-
- if (i < al - 2) {
- int secondDigit = uc[i + 2].digitValue();
- if (secondDigit != -1 && ((no * 10) + secondDigit) <= numCaptures) {
- no = (no * 10) + secondDigit;
- ++capture.len;
- }
- }
-
- capture.no = no;
- captures[j++] = capture;
- }
- }
- }
-
- while (index <= s.length()) {
- index = rx2.indexIn(s, index, caretMode);
- if (index == -1)
- break;
-
- QString after2(after);
- for (j = numBackRefs - 1; j >= 0; j--) {
- const QStringCapture &capture = captures[j];
- after2.replace(capture.pos, capture.len, rx2.cap(capture.no));
- }
-
- s.replace(index, rx2.matchedLength(), after2);
- index += after2.length();
-
- // avoid infinite loop on 0-length matches (e.g., QRegExp("[a-z]*"))
- if (rx2.matchedLength() == 0)
- ++index;
-
- caretMode = QRegExp::CaretWontMatch;
- }
- return s;
- }
- }
-
- /*
- This is the simple and optimized case where we don't have
- back-references.
- */
- while (index != -1) {
- struct {
- int pos;
- int length;
- } replacements[2048];
-
- int pos = 0;
- int adjust = 0;
- while (pos < 2047) {
- index = rx2.indexIn(s, index, caretMode);
- if (index == -1)
- break;
- int ml = rx2.matchedLength();
- replacements[pos].pos = index;
- replacements[pos++].length = ml;
- index += ml;
- adjust += al - ml;
- // avoid infinite loop
- if (!ml)
- index++;
- }
- if (!pos)
- break;
- replacements[pos].pos = s.size();
- int newlen = s.size() + adjust;
-
- // to continue searching at the right position after we did
- // the first round of replacements
- if (index != -1)
- index += adjust;
- QString newstring;
- newstring.reserve(newlen + 1);
- QChar *newuc = newstring.data();
- QChar *uc = newuc;
- int copystart = 0;
- int i = 0;
- while (i < pos) {
- int copyend = replacements[i].pos;
- int size = copyend - copystart;
- memcpy(static_cast<void*>(uc), static_cast<const void *>(s.constData() + copystart), size * sizeof(QChar));
- uc += size;
- memcpy(static_cast<void *>(uc), static_cast<const void *>(after.constData()), al * sizeof(QChar));
- uc += al;
- copystart = copyend + replacements[i].length;
- i++;
- }
- memcpy(static_cast<void *>(uc), static_cast<const void *>(s.constData() + copystart), (s.size() - copystart) * sizeof(QChar));
- newstring.resize(newlen);
- s = newstring;
- caretMode = QRegExp::CaretWontMatch;
- }
- return s;
-
-}
-
-
-/*!
- \fn QString QRegExp::removeIn(const QString &str)
-
- Removes every occurrence of this regular expression \a str, and
- returns the result
-
- Does the same as replaceIn(str, QString()).
-
- \sa indexIn(), lastIndexIn(), replaceIn()
-*/
-
-
-/*!
- \fn QString QRegExp::countIn(const QString &str)
-
- Returns the number of times this regular expression matches
- in \a str.
-
- \sa indexIn(), lastIndexIn(), replaceIn()
-*/
-
-int QRegExp::countIn(const QString &str) const
-{
- QRegExp rx2(*this);
- int count = 0;
- int index = -1;
- int len = str.length();
- while (index < len - 1) { // count overlapping matches
- index = rx2.indexIn(str, index + 1);
- if (index == -1)
- break;
- count++;
- }
- return count;
-}
-
-/*!
- Splits \a str into substrings wherever this regular expression
- matches, and returns the list of those strings. If this regular
- expression does not match anywhere in the string, split() returns a
- single-element list containing \a str.
-
- \sa QStringList::join(), section(), QString::split()
-*/
-QStringList QRegExp::splitString(const QString &str, Qt::SplitBehavior behavior) const
-{
- QRegExp rx2(*this);
- QStringList list;
- int start = 0;
- int extra = 0;
- int end;
- while ((end = rx2.indexIn(str, start + extra)) != -1) {
- int matchedLen = rx2.matchedLength();
- if (start != end || behavior == Qt::KeepEmptyParts)
- list.append(str.mid(start, end - start));
- start = end + matchedLen;
- extra = (matchedLen == 0) ? 1 : 0;
- }
- if (start != str.size() || behavior == Qt::KeepEmptyParts)
- list.append(str.mid(start, -1));
- return list;
-}
-
-/*!
- \fn QStringList QStringList::filter(const QRegExp &rx) const
-
- \overload
-
- Returns a list of all the strings that match the regular
- expression \a rx.
-*/
-QStringList QRegExp::filterList(const QStringList &stringList) const
-{
- QStringList res;
- for (const QString &s : stringList) {
- if (containedIn(s))
- res << s;
- }
- return res;
-}
-
-/*!
- Replaces every occurrence of the regexp \a rx, in each of the
- string lists's strings, with \a after. Returns a reference to the
- string list.
-*/
-QStringList QRegExp::replaceIn(const QStringList &stringList, const QString &after) const
-{
- QStringList list;
- for (const QString &s : stringList)
- list << replaceIn(s, after);
- return list;
-}
-
-/*!
- Returns the index position of the first exact match of this regexp in
- \a list, searching forward from index position \a from. Returns
- -1 if no item matched.
-
- \sa lastIndexIn(), contains(), exactMatch()
-*/
-int QRegExp::indexIn(const QStringList &list, int from) const
-{
- QRegExp rx2(*this);
- if (from < 0)
- from = qMax(from + list.size(), 0);
- for (int i = from; i < list.size(); ++i) {
- if (rx2.exactMatch(list.at(i)))
- return i;
- }
- return -1;
-}
-
-/*!
- Returns the index position of the last exact match of this regexp in
- \a list, searching backward from index position \a from. If \a
- from is -1 (the default), the search starts at the last item.
- Returns -1 if no item matched.
-
- \sa indexOf(), contains(), QRegExp::exactMatch()
-*/
-int QRegExp::lastIndexIn(const QStringList &list, int from) const
-{
- QRegExp rx2(*this);
- if (from < 0)
- from += list.size();
- else if (from >= list.size())
- from = list.size() - 1;
- for (int i = from; i >= 0; --i) {
- if (rx2.exactMatch(list.at(i)))
- return i;
- }
- return -1;
-}
-
-#ifndef QT_NO_REGEXP_CAPTURE
-
-/*!
- \since 4.6
- Returns the number of captures contained in the regular expression.
- */
-int QRegExp::captureCount() const
-{
- prepareEngine(priv);
- return priv->eng->captureCount();
-}
-
-/*!
- Returns a list of the captured text strings.
-
- The first string in the list is the entire matched string. Each
- subsequent list element contains a string that matched a
- (capturing) subexpression of the regexp.
-
- For example:
- \snippet code/src_corelib_text_qregexp.cpp 14
-
- The above example also captures elements that may be present but
- which we have no interest in. This problem can be solved by using
- non-capturing parentheses:
-
- \snippet code/src_corelib_text_qregexp.cpp 15
-
- Note that if you want to iterate over the list, you should iterate
- over a copy, e.g.
- \snippet code/src_corelib_text_qregexp.cpp 16
-
- Some regexps can match an indeterminate number of times. For
- example if the input string is "Offsets: 12 14 99 231 7" and the
- regexp, \c{rx}, is \b{(\\d+)+}, we would hope to get a list of
- all the numbers matched. However, after calling
- \c{rx.indexIn(str)}, capturedTexts() will return the list ("12",
- "12"), i.e. the entire match was "12" and the first subexpression
- matched was "12". The correct approach is to use cap() in a
- \l{QRegExp#cap_in_a_loop}{loop}.
-
- The order of elements in the string list is as follows. The first
- element is the entire matching string. Each subsequent element
- corresponds to the next capturing open left parentheses. Thus
- capturedTexts()[1] is the text of the first capturing parentheses,
- capturedTexts()[2] is the text of the second and so on
- (corresponding to $1, $2, etc., in some other regexp languages).
-
- \sa cap(), pos()
-*/
-QStringList QRegExp::capturedTexts() const
-{
- if (priv->capturedCache.isEmpty()) {
- prepareEngine(priv);
- const int *captured = priv->matchState.captured;
- int n = priv->matchState.capturedSize;
-
- for (int i = 0; i < n; i += 2) {
- QString m;
- if (captured[i + 1] == 0)
- m = QLatin1String(""); // ### Qt 5: don't distinguish between null and empty
- else if (captured[i] >= 0)
- m = priv->t.mid(captured[i], captured[i + 1]);
- priv->capturedCache.append(m);
- }
- priv->t.clear();
- }
- return priv->capturedCache;
-}
-
-/*!
- \internal
-*/
-QStringList QRegExp::capturedTexts()
-{
- return const_cast<const QRegExp *>(this)->capturedTexts();
-}
-
-/*!
- Returns the text captured by the \a nth subexpression. The entire
- match has index 0 and the parenthesized subexpressions have
- indexes starting from 1 (excluding non-capturing parentheses).
-
- \snippet code/src_corelib_text_qregexp.cpp 17
-
- The order of elements matched by cap() is as follows. The first
- element, cap(0), is the entire matching string. Each subsequent
- element corresponds to the next capturing open left parentheses.
- Thus cap(1) is the text of the first capturing parentheses, cap(2)
- is the text of the second, and so on.
-
- \sa capturedTexts(), pos()
-*/
-QString QRegExp::cap(int nth) const
-{
- return capturedTexts().value(nth);
-}
-
-/*!
- \internal
-*/
-QString QRegExp::cap(int nth)
-{
- return const_cast<const QRegExp *>(this)->cap(nth);
-}
-
-/*!
- Returns the position of the \a nth captured text in the searched
- string. If \a nth is 0 (the default), pos() returns the position
- of the whole match.
-
- Example:
- \snippet code/src_corelib_text_qregexp.cpp 18
-
- For zero-length matches, pos() always returns -1. (For example, if
- cap(4) would return an empty string, pos(4) returns -1.) This is
- a feature of the implementation.
-
- \sa cap(), capturedTexts()
-*/
-int QRegExp::pos(int nth) const
-{
- if (nth < 0 || nth >= priv->matchState.capturedSize / 2)
- return -1;
- else
- return priv->matchState.captured[2 * nth];
-}
-
-/*!
- \internal
-*/
-int QRegExp::pos(int nth)
-{
- return const_cast<const QRegExp *>(this)->pos(nth);
-}
-
-/*!
- Returns a text string that explains why a regexp pattern is
- invalid the case being; otherwise returns "no error occurred".
-
- \sa isValid()
-*/
-QString QRegExp::errorString() const
-{
- if (isValid()) {
- return QString::fromLatin1(RXERR_OK);
- } else {
- return priv->eng->errorString();
- }
-}
-
-/*!
- \internal
-*/
-QString QRegExp::errorString()
-{
- return const_cast<const QRegExp *>(this)->errorString();
-}
-
-#endif
-
-/*!
- Returns the string \a str with every regexp special character
- escaped with a backslash. The special characters are $, (,), *, +,
- ., ?, [, \,], ^, {, | and }.
-
- Example:
-
- \snippet code/src_corelib_text_qregexp.cpp 19
-
- This function is useful to construct regexp patterns dynamically:
-
- \snippet code/src_corelib_text_qregexp.cpp 20
-
- \sa setPatternSyntax()
-*/
-QString QRegExp::escape(const QString &str)
-{
- QString quoted;
- const int count = str.count();
- quoted.reserve(count * 2);
- const QLatin1Char backslash('\\');
- for (int i = 0; i < count; i++) {
- switch (str.at(i).toLatin1()) {
- case '$':
- case '(':
- case ')':
- case '*':
- case '+':
- case '.':
- case '?':
- case '[':
- case '\\':
- case ']':
- case '^':
- case '{':
- case '|':
- case '}':
- quoted.append(backslash);
- }
- quoted.append(str.at(i));
- }
- return quoted;
-}
-
-
-#ifndef QT_NO_DATASTREAM
-/*!
- \relates QRegExp
-
- Writes the regular expression \a regExp to stream \a out.
-
- \sa {Serializing Qt Data Types}
-*/
-QDataStream &operator<<(QDataStream &out, const QRegExp &regExp)
-{
- return out << regExp.pattern() << (quint8)regExp.caseSensitivity()
- << (quint8)regExp.patternSyntax()
- << (quint8)!!regExp.isMinimal();
-}
-
-/*!
- \relates QRegExp
-
- Reads a regular expression from stream \a in into \a regExp.
-
- \sa {Serializing Qt Data Types}
-*/
-QDataStream &operator>>(QDataStream &in, QRegExp &regExp)
-{
- QString pattern;
- quint8 cs;
- quint8 patternSyntax;
- quint8 isMinimal;
-
- in >> pattern >> cs >> patternSyntax >> isMinimal;
-
- QRegExp newRegExp(pattern, Qt::CaseSensitivity(cs),
- QRegExp::PatternSyntax(patternSyntax));
-
- newRegExp.setMinimal(isMinimal);
- regExp = newRegExp;
- return in;
-}
-#endif // QT_NO_DATASTREAM
-
-#ifndef QT_NO_DEBUG_STREAM
-QDebug operator<<(QDebug dbg, const QRegExp &r)
-{
- QDebugStateSaver saver(dbg);
- dbg.nospace() << "QRegExp(patternSyntax=" << r.patternSyntax()
- << ", pattern='"<< r.pattern() << "')";
- return dbg;
-}
-#endif
-
-QT_END_NAMESPACE
diff --git a/src/corelib/text/qregexp.h b/src/corelib/text/qregexp.h
deleted file mode 100644
index 0c117fd17f..0000000000
--- a/src/corelib/text/qregexp.h
+++ /dev/null
@@ -1,151 +0,0 @@
-/****************************************************************************
-**
-** Copyright (C) 2016 The Qt Company Ltd.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the QtCore module of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:LGPL$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU Lesser General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU Lesser
-** General Public License version 3 as published by the Free Software
-** Foundation and appearing in the file LICENSE.LGPL3 included in the
-** packaging of this file. Please review the following information to
-** ensure the GNU Lesser General Public License version 3 requirements
-** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 2.0 or (at your option) the GNU General
-** Public license version 3 or any later version approved by the KDE Free
-** Qt Foundation. The licenses are as published by the Free Software
-** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-2.0.html and
-** https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
-
-#ifndef QREGEXP_H
-#define QREGEXP_H
-
-#include <QtCore/qglobal.h>
-
-#ifndef QT_NO_REGEXP
-
-#include <QtCore/qstring.h>
-#include <QtCore/qvariant.h>
-
-QT_BEGIN_NAMESPACE
-
-
-struct QRegExpPrivate;
-class QStringList;
-class QRegExp;
-
-Q_CORE_EXPORT size_t qHash(const QRegExp &key, size_t seed = 0) noexcept;
-
-class Q_CORE_EXPORT QRegExp
-{
-public:
- enum PatternSyntax {
- RegExp,
- Wildcard,
- FixedString,
- RegExp2,
- WildcardUnix,
- W3CXmlSchema11 };
- enum CaretMode { CaretAtZero, CaretAtOffset, CaretWontMatch };
-
- QRegExp();
- explicit QRegExp(const QString &pattern, Qt::CaseSensitivity cs = Qt::CaseSensitive,
- PatternSyntax syntax = RegExp);
- QRegExp(const QRegExp &rx);
- ~QRegExp();
- QRegExp &operator=(const QRegExp &rx);
- QRegExp &operator=(QRegExp &&other) noexcept { swap(other); return *this; }
- void swap(QRegExp &other) noexcept { qSwap(priv, other.priv); }
-
- bool operator==(const QRegExp &rx) const;
- inline bool operator!=(const QRegExp &rx) const { return !operator==(rx); }
-
- bool isEmpty() const;
- bool isValid() const;
- QString pattern() const;
- void setPattern(const QString &pattern);
- Qt::CaseSensitivity caseSensitivity() const;
- void setCaseSensitivity(Qt::CaseSensitivity cs);
- PatternSyntax patternSyntax() const;
- void setPatternSyntax(PatternSyntax syntax);
-
- bool isMinimal() const;
- void setMinimal(bool minimal);
-
- bool exactMatch(const QString &str) const;
-
- operator QVariant() const;
-
- int indexIn(const QString &str, int offset = 0, CaretMode caretMode = CaretAtZero) const;
- int lastIndexIn(const QString &str, int offset = -1, CaretMode caretMode = CaretAtZero) const;
- int matchedLength() const;
-#ifndef QT_NO_REGEXP_CAPTURE
- int captureCount() const;
- QStringList capturedTexts() const;
- QStringList capturedTexts();
- QString cap(int nth = 0) const;
- QString cap(int nth = 0);
- int pos(int nth = 0) const;
- int pos(int nth = 0);
- QString errorString() const;
- QString errorString();
-#endif
-
- QString replaceIn(const QString &str, const QString &after) const;
- QString removeIn(const QString &str) const
- { return replaceIn(str, QString()); }
- bool containedIn(const QString &str) const
- { return indexIn(str) != -1; }
- int countIn(const QString &str) const;
-
- QStringList splitString(const QString &str, Qt::SplitBehavior behavior = Qt::KeepEmptyParts) const;
-
- int indexIn(const QStringList &list, int from) const;
- int lastIndexIn(const QStringList &list, int from) const;
- QStringList replaceIn(const QStringList &stringList, const QString &after) const;
- QStringList filterList(const QStringList &stringList) const;
-
- static QString escape(const QString &str);
-
- friend Q_CORE_EXPORT size_t qHash(const QRegExp &key, size_t seed) noexcept;
-
-private:
- QRegExpPrivate *priv;
-};
-
-#ifndef QT_NO_DATASTREAM
-Q_CORE_EXPORT QDataStream &operator<<(QDataStream &out, const QRegExp &regExp);
-Q_CORE_EXPORT QDataStream &operator>>(QDataStream &in, QRegExp &regExp);
-#endif
-
-#ifndef QT_NO_DEBUG_STREAM
-Q_CORE_EXPORT QDebug operator<<(QDebug, const QRegExp &);
-#endif
-
-QT_END_NAMESPACE
-
-Q_DECLARE_METATYPE(QRegExp)
-
-#endif // QT_NO_REGEXP
-
-#endif // QREGEXP_H
diff --git a/src/corelib/text/text.pri b/src/corelib/text/text.pri
index 89fbdddd83..0d9a6af454 100644
--- a/src/corelib/text/text.pri
+++ b/src/corelib/text/text.pri
@@ -16,7 +16,6 @@ HEADERS += \
text/qlocale_p.h \
text/qlocale_tools_p.h \
text/qlocale_data_p.h \
- text/qregexp.h \
text/qstring.h \
text/qstringalgorithms.h \
text/qstringalgorithms_p.h \
@@ -41,7 +40,6 @@ SOURCES += \
text/qcollator.cpp \
text/qlocale.cpp \
text/qlocale_tools.cpp \
- text/qregexp.cpp \
text/qstring.cpp \
text/qstringbuilder.cpp \
text/qstringconverter.cpp \
diff --git a/src/tools/uic/qclass_lib_map.h b/src/tools/uic/qclass_lib_map.h
index b63b05107b..c0862a9c02 100644
--- a/src/tools/uic/qclass_lib_map.h
+++ b/src/tools/uic/qclass_lib_map.h
@@ -214,7 +214,6 @@ QT_CLASS_LIB(QPointF, QtCore, qpoint.h)
QT_CLASS_LIB(QQueue, QtCore, qqueue.h)
QT_CLASS_LIB(QRect, QtCore, qrect.h)
QT_CLASS_LIB(QRectF, QtCore, qrect.h)
-QT_CLASS_LIB(QRegExp, QtCore, qregexp.h)
QT_CLASS_LIB(QScopedPointerDeleter, QtCore, qscopedpointer.h)
QT_CLASS_LIB(QScopedPointerArrayDeleter, QtCore, qscopedpointer.h)
QT_CLASS_LIB(QScopedPointerPodDeleter, QtCore, qscopedpointer.h)
diff --git a/tests/auto/corelib/text/.prev_CMakeLists.txt b/tests/auto/corelib/text/.prev_CMakeLists.txt
index e23de92c8c..d68a8e9c2d 100644
--- a/tests/auto/corelib/text/.prev_CMakeLists.txt
+++ b/tests/auto/corelib/text/.prev_CMakeLists.txt
@@ -8,7 +8,6 @@ add_subdirectory(qchar)
add_subdirectory(qcollator)
add_subdirectory(qlatin1string)
add_subdirectory(qlocale)
-add_subdirectory(qregexp)
add_subdirectory(qregularexpression)
add_subdirectory(qstring)
add_subdirectory(qstring_no_cast_from_bytearray)
diff --git a/tests/auto/corelib/text/CMakeLists.txt b/tests/auto/corelib/text/CMakeLists.txt
index 0c36f1e088..285fdb27c3 100644
--- a/tests/auto/corelib/text/CMakeLists.txt
+++ b/tests/auto/corelib/text/CMakeLists.txt
@@ -10,7 +10,6 @@ add_subdirectory(qchar)
add_subdirectory(qcollator)
add_subdirectory(qlatin1string)
add_subdirectory(qlocale)
-add_subdirectory(qregexp)
add_subdirectory(qregularexpression)
add_subdirectory(qstring)
add_subdirectory(qstring_no_cast_from_bytearray)
diff --git a/tests/auto/corelib/text/qregexp/.gitignore b/tests/auto/corelib/text/qregexp/.gitignore
deleted file mode 100644
index e6e629ee2c..0000000000
--- a/tests/auto/corelib/text/qregexp/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-tst_qregexp
diff --git a/tests/auto/corelib/text/qregexp/CMakeLists.txt b/tests/auto/corelib/text/qregexp/CMakeLists.txt
deleted file mode 100644
index 257e87a684..0000000000
--- a/tests/auto/corelib/text/qregexp/CMakeLists.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-# Generated from qregexp.pro.
-
-#####################################################################
-## tst_qregexp Test:
-#####################################################################
-
-qt_add_test(tst_qregexp
- SOURCES
- tst_qregexp.cpp
-)
-
-# Resources:
-set(qregexp_resource_files
- "data/qdatastream_4.9.bin"
- "data/qdatastream_5.0.bin"
-)
-
-qt_add_resource(tst_qregexp "qregexp"
- PREFIX
- "/"
- FILES
- ${qregexp_resource_files}
-)
-
diff --git a/tests/auto/corelib/text/qregexp/data/qdatastream_4.9.bin b/tests/auto/corelib/text/qregexp/data/qdatastream_4.9.bin
deleted file mode 100644
index db8518e064..0000000000
--- a/tests/auto/corelib/text/qregexp/data/qdatastream_4.9.bin
+++ /dev/null
Binary files differ
diff --git a/tests/auto/corelib/text/qregexp/data/qdatastream_5.0.bin b/tests/auto/corelib/text/qregexp/data/qdatastream_5.0.bin
deleted file mode 100644
index db8518e064..0000000000
--- a/tests/auto/corelib/text/qregexp/data/qdatastream_5.0.bin
+++ /dev/null
Binary files differ
diff --git a/tests/auto/corelib/text/qregexp/qregexp.pro b/tests/auto/corelib/text/qregexp/qregexp.pro
deleted file mode 100644
index 748e6a248c..0000000000
--- a/tests/auto/corelib/text/qregexp/qregexp.pro
+++ /dev/null
@@ -1,5 +0,0 @@
-CONFIG += testcase
-TARGET = tst_qregexp
-QT = core testlib
-SOURCES = tst_qregexp.cpp
-RESOURCES += qregexp.qrc
diff --git a/tests/auto/corelib/text/qregexp/qregexp.qrc b/tests/auto/corelib/text/qregexp/qregexp.qrc
deleted file mode 100644
index 8fd168793f..0000000000
--- a/tests/auto/corelib/text/qregexp/qregexp.qrc
+++ /dev/null
@@ -1,6 +0,0 @@
-<RCC>
- <qresource prefix="/">
- <file>data/qdatastream_4.9.bin</file>
- <file>data/qdatastream_5.0.bin</file>
- </qresource>
-</RCC>
diff --git a/tests/auto/corelib/text/qregexp/tst_qregexp.cpp b/tests/auto/corelib/text/qregexp/tst_qregexp.cpp
deleted file mode 100644
index 29ddf3673f..0000000000
--- a/tests/auto/corelib/text/qregexp/tst_qregexp.cpp
+++ /dev/null
@@ -1,1726 +0,0 @@
-
-/****************************************************************************
-**
-** Copyright (C) 2016 The Qt Company Ltd.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the test suite of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:GPL-EXCEPT$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 3 as published by the Free Software
-** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
-
-#include <QtTest/QtTest>
-#include <qregexp.h>
-
-const int N = 1;
-
-class tst_QRegExp : public QObject
-{
- Q_OBJECT
-private slots:
- void getSetCheck();
- void indexIn_data();
- void indexIn_addMoreRows(const QByteArray &stri);
- void indexIn();
- void lastIndexIn_data();
- void lastIndexIn();
- void matchedLength();
- void wildcard_data();
- void wildcard();
- void testEscapingWildcard_data();
- void testEscapingWildcard();
- void testInvalidWildcard_data();
- void testInvalidWildcard();
- void caretAnchoredOptimization();
- void isEmpty();
- void prepareEngineOptimization();
- void swap();
- void operator_eq();
-
- void exactMatch();
- void capturedTexts();
- void staticRegExp();
- void rainersSlowRegExpCopyBug();
- void nonExistingBackReferenceBug();
-
- void reentrancy();
- void threadsafeEngineCache();
-
- void posAndCapConsistency_data();
- void posAndCapConsistency();
- void interval();
- void validityCheck_data();
- void validityCheck();
- void escapeSequences();
-
- void splitString_data();
- void splitString();
-
- void countIn();
- void containedIn();
-
- void replaceIn_data();
- void replaceIn();
- void removeIn_data();
- void removeIn();
-
- void filterList();
- void replaceInList();
-
- void datastream_data();
- void datastream();
-
- void datastream2();
-
-private:
- void readQRegExp(QDataStream *s);
- void writeQRegExp(QDataStream* dev);
-};
-
-// Testing get/set functions
-void tst_QRegExp::getSetCheck()
-{
- QRegExp obj1;
- // PatternSyntax QRegExp::patternSyntax()
- // void QRegExp::setPatternSyntax(PatternSyntax)
- obj1.setPatternSyntax(QRegExp::PatternSyntax(QRegExp::RegExp));
- QCOMPARE(QRegExp::PatternSyntax(QRegExp::RegExp), obj1.patternSyntax());
- obj1.setPatternSyntax(QRegExp::PatternSyntax(QRegExp::Wildcard));
- QCOMPARE(QRegExp::PatternSyntax(QRegExp::Wildcard), obj1.patternSyntax());
- obj1.setPatternSyntax(QRegExp::PatternSyntax(QRegExp::FixedString));
- QCOMPARE(QRegExp::PatternSyntax(QRegExp::FixedString), obj1.patternSyntax());
-}
-
-extern const char email[];
-
-void tst_QRegExp::lastIndexIn_data()
-{
- indexIn_data();
-}
-
-void tst_QRegExp::indexIn_data()
-{
- QTest::addColumn<QString>("regexpStr");
- QTest::addColumn<QString>("target");
- QTest::addColumn<int>("pos");
- QTest::addColumn<int>("len");
- QTest::addColumn<QStringList>("caps");
-
- for (int i = 0; i < N; ++i) {
- QByteArray stri;
- if (i > 0)
- stri.setNum(i);
-
- // anchors
- QTest::newRow(qPrintable(stri + "anc00")) << QString("a(?=)z") << QString("az") << 0 << 2 << QStringList();
- QTest::newRow(qPrintable(stri + "anc01")) << QString("a(?!)z") << QString("az") << -1 << -1 << QStringList();
- QTest::newRow(qPrintable(stri + "anc02")) << QString("a(?:(?=)|(?=))z") << QString("az") << 0 << 2
- << QStringList();
- QTest::newRow(qPrintable(stri + "anc03")) << QString("a(?:(?=)|(?!))z") << QString("az") << 0 << 2
- << QStringList();
- QTest::newRow(qPrintable(stri + "anc04")) << QString("a(?:(?!)|(?=))z") << QString("az") << 0 << 2
- << QStringList();
- QTest::newRow(qPrintable(stri + "anc05")) << QString("a(?:(?!)|(?!))z") << QString("az") << -1 << -1
- << QStringList();
- QTest::newRow(qPrintable(stri + "anc06")) << QString("a(?:(?=)|b)z") << QString("az") << 0 << 2
- << QStringList();
- QTest::newRow(qPrintable(stri + "anc07")) << QString("a(?:(?=)|b)z") << QString("abz") << 0 << 3
- << QStringList();
- QTest::newRow(qPrintable(stri + "anc08")) << QString("a(?:(?!)|b)z") << QString("az") << -1 << -1
- << QStringList();
- QTest::newRow(qPrintable(stri + "anc09")) << QString("a(?:(?!)|b)z") << QString("abz") << 0 << 3
- << QStringList();
- QTest::newRow(qPrintable(stri + "anc10")) << QString("a?(?=^b$)") << QString("ab") << -1 << -1
- << QStringList();
- QTest::newRow(qPrintable(stri + "anc11")) << QString("a?(?=^b$)") << QString("b") << 0 << 0
- << QStringList();
-
- // back-references
- QTest::newRow(qPrintable(stri + "bref00")) << QString("(a*)(\\1)") << QString("aaaaa") << 0 << 4
- << QStringList( QStringList() << "aa" << "aa" );
- QTest::newRow(qPrintable(stri + "bref01")) << QString("<(\\w*)>.+</\\1>") << QString("<b>blabla</b>bla</>")
- << 0 << 13 << QStringList( QStringList() << "b" );
- QTest::newRow(qPrintable(stri + "bref02")) << QString("<(\\w*)>.+</\\1>") << QString("<>blabla</b>bla</>")
- << 0 << 18 << QStringList( QStringList() << "" );
- QTest::newRow(qPrintable(stri + "bref03")) << QString("((a*\\2)\\2)") << QString("aaaa") << 0 << 4
- << QStringList( QStringList() << QString("aaaa") << "aa" );
- QTest::newRow(qPrintable(stri + "bref04")) << QString("^(aa+)\\1+$") << QString("aaaaaa") << 0 << 6
- << QStringList( QStringList() << QString("aa") );
- QTest::newRow(qPrintable(stri + "bref05")) << QString("^(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)(11)(12)(13)(14)"
- "\\14\\13\\12\\11\\10\\9\\8\\7\\6\\5\\4\\3\\2\\1")
- << QString("12345678910111213141413121110987654321") << 0 << 38
- << QStringList( QStringList() << "1" << "2" << "3" << "4" << "5" << "6"
- << "7" << "8" << "9" << "10" << "11"
- << "12" << "13" << "14");
-
- // captures
- QTest::newRow(qPrintable(stri + "cap00")) << QString("(a*)") << QString("") << 0 << 0
- << QStringList( QStringList() << QString("") );
- QTest::newRow(qPrintable(stri + "cap01")) << QString("(a*)") << QString("aaa") << 0 << 3
- << QStringList( QStringList() << "aaa" );
- QTest::newRow(qPrintable(stri + "cap02")) << QString("(a*)") << QString("baaa") << 0 << 0
- << QStringList( QStringList() << QString("") );
- QTest::newRow(qPrintable(stri + "cap03")) << QString("(a*)(a*)") << QString("aaa") << 0 << 3
- << QStringList( QStringList() << QString("aaa") << QString("") );
- QTest::newRow(qPrintable(stri + "cap04")) << QString("(a*)(b*)") << QString("aaabbb") << 0 << 6
- << QStringList( QStringList() << QString("aaa") << QString("bbb") );
- QTest::newRow(qPrintable(stri + "cap06")) << QString("(a*)a*") << QString("aaa") << 0 << 3
- << QStringList( QStringList() << QString("aaa") );
- QTest::newRow(qPrintable(stri + "cap07")) << QString("((a*a*)*)") << QString("aaa") << 0 << 3
- << QStringList( QStringList() << "aaa" << QString("aaa") );
- QTest::newRow(qPrintable(stri + "cap08")) << QString("(((a)*(b)*)*)") << QString("ababa") << 0 << 5
- << QStringList( QStringList() << QString("ababa") << QString("a") << QString("a")
- << "" );
- QTest::newRow(qPrintable(stri + "cap09")) << QString("(((a)*(b)*)c)*") << QString("") << 0 << 0
- << QStringList( QStringList() << QString("") << QString("") << QString("") << QString("") );
- QTest::newRow(qPrintable(stri + "cap10")) << QString("(((a)*(b)*)c)*") << QString("abc") << 0 << 3
- << QStringList( QStringList() << "abc" << "ab" << "a"
- << "b" );
- QTest::newRow(qPrintable(stri + "cap11")) << QString("(((a)*(b)*)c)*") << QString("abcc") << 0 << 4
- << QStringList( QStringList() << "c" << "" << "" << "" );
- QTest::newRow(qPrintable(stri + "cap12")) << QString("(((a)*(b)*)c)*") << QString("abcac") << 0 << 5
- << QStringList( QStringList() << "ac" << "a" << "a" << "" );
- QTest::newRow(qPrintable(stri + "cap13")) << QString("(to|top)?(o|polo)?(gical|o?logical)")
- << QString("topological") << 0 << 11
- << QStringList( QStringList() << "top" << "o"
- << "logical" );
- QTest::newRow(qPrintable(stri + "cap14")) << QString("(a)+") << QString("aaaa") << 0 << 4
- << QStringList( QStringList() << "a" );
-
- // concatenation
- QTest::newRow(qPrintable(stri + "cat00")) << QString("") << QString("") << 0 << 0 << QStringList();
- QTest::newRow(qPrintable(stri + "cat01")) << QString("") << QString("a") << 0 << 0 << QStringList();
- QTest::newRow(qPrintable(stri + "cat02")) << QString("a") << QString("") << -1 << -1 << QStringList();
- QTest::newRow(qPrintable(stri + "cat03")) << QString("a") << QString("a") << 0 << 1 << QStringList();
- QTest::newRow(qPrintable(stri + "cat04")) << QString("a") << QString("b") << -1 << -1 << QStringList();
- QTest::newRow(qPrintable(stri + "cat05")) << QString("b") << QString("a") << -1 << -1 << QStringList();
- QTest::newRow(qPrintable(stri + "cat06")) << QString("ab") << QString("ab") << 0 << 2 << QStringList();
- QTest::newRow(qPrintable(stri + "cat07")) << QString("ab") << QString("ba") << -1 << -1 << QStringList();
- QTest::newRow(qPrintable(stri + "cat08")) << QString("abab") << QString("abbaababab") << 4 << 4 << QStringList();
-
- indexIn_addMoreRows(stri);
- }
-}
-
-void tst_QRegExp::indexIn_addMoreRows(const QByteArray &stri)
-{
- // from Perl Cookbook
- QTest::newRow(qPrintable(stri + "cook00")) << QString("^(m*)(d?c{0,3}|c[dm])(1?x{0,3}|x[lc])(v?i{0,3}|i[vx])$")
- << QString("mmxl") << 0 << 4
- << QStringList( QStringList() << "mm" << "" << "xl"
- << "" );
- QTest::newRow(qPrintable(stri + "cook01")) << QString("(\\S+)(\\s+)(\\S+)") << QString(" a b") << 1 << 5
- << QStringList( QStringList() << "a" << " " << "b" );
- QTest::newRow(qPrintable(stri + "cook02")) << QString("(\\w+)\\s*=\\s*(.*)\\s*$") << QString(" PATH=. ") << 1
- << 7 << QStringList( QStringList() << "PATH" << ". " );
- QTest::newRow(qPrintable(stri + "cook03")) << QString(".{80,}")
- << QString("0000000011111111222222223333333344444444555"
- "5555566666666777777778888888899999999000000"
- "00aaaaaaaa")
- << 0 << 96 << QStringList();
- QTest::newRow(qPrintable(stri + "cook04")) << QString("(\\d+)/(\\d+)/(\\d+) (\\d+):(\\d+):(\\d+)")
- << QString("1978/05/24 07:30:00") << 0 << 19
- << QStringList( QStringList() << "1978" << "05" << "24"
- << "07" << "30" << "00" );
- QTest::newRow(qPrintable(stri + "cook05")) << QString("/usr/bin") << QString("/usr/local/bin:/usr/bin")
- << 15 << 8 << QStringList();
- QTest::newRow(qPrintable(stri + "cook06")) << QString("%([0-9A-Fa-f]{2})") << QString("http://%7f") << 7 << 3
- << QStringList( QStringList() << "7f" );
- QTest::newRow(qPrintable(stri + "cook07")) << QString("/\\*.*\\*/") << QString("i++; /* increment i */") << 5
- << 17 << QStringList();
- QTest::newRow(qPrintable(stri + "cook08")) << QString("^\\s+") << QString(" aaa ") << 0 << 3
- << QStringList();
- QTest::newRow(qPrintable(stri + "cook09")) << QString("\\s+$") << QString(" aaa ") << 6 << 3
- << QStringList();
- QTest::newRow(qPrintable(stri + "cook10")) << QString("^.*::") << QString("Box::cat") << 0 << 5
- << QStringList();
- QTest::newRow(qPrintable(stri + "cook11")) << QString("^([01]?\\d\\d|2[0-4]\\d|25[0-5])\\.([01]?\\"
- "d\\d|2[0-4]\\d|25[0-5])\\.([01]?\\d\\d|2[0-"
- "4]\\d|25[0-5])\\.([01]?\\d\\d|2[0-4]\\d|25["
- "0-5])$")
- << QString("255.00.40.30") << 0 << 12
- << QStringList( QStringList() << "255" << "00" << "40"
- << "30" );
- QTest::newRow(qPrintable(stri + "cook12")) << QString("^.*/") << QString(" /usr/local/bin/moc") << 0 << 16
- << QStringList();
- QTest::newRow(qPrintable(stri + "cook13")) << QString(":co#(\\d+):") << QString("bla:co#55:") << 3 << 7
- << QStringList( QStringList() << "55" );
- QTest::newRow(qPrintable(stri + "cook14")) << QString("linux") << QString("alphalinuxinunix") << 5 << 5
- << QStringList();
- QTest::newRow(qPrintable(stri + "cook15")) << QString("(\\d+\\.?\\d*|\\.\\d+)") << QString("0.0.5") << 0 << 3
- << QStringList( QStringList() << "0.0" );
-
- // mathematical trivia
- QTest::newRow(qPrintable(stri + "math00")) << QString("^(a\\1*)$") << QString("a") << 0 << 1
- << QStringList( QStringList() << "a" );
- QTest::newRow(qPrintable(stri + "math01")) << QString("^(a\\1*)$") << QString("aa") << 0 << 2
- << QStringList( QStringList() << "aa" );
- QTest::newRow(qPrintable(stri + "math02")) << QString("^(a\\1*)$") << QString("aaa") << -1 << -1
- << QStringList( QStringList() << QString() );
- QTest::newRow(qPrintable(stri + "math03")) << QString("^(a\\1*)$") << QString("aaaa") << 0 << 4
- << QStringList( QStringList() << "aaaa" );
- QTest::newRow(qPrintable(stri + "math04")) << QString("^(a\\1*)$") << QString("aaaaa") << -1 << -1
- << QStringList( QStringList() << QString() );
- QTest::newRow(qPrintable(stri + "math05")) << QString("^(a\\1*)$") << QString("aaaaaa") << -1 << -1
- << QStringList( QStringList() << QString() );
- QTest::newRow(qPrintable(stri + "math06")) << QString("^(a\\1*)$") << QString("aaaaaaa") << -1 << -1
- << QStringList( QStringList() << QString() );
- QTest::newRow(qPrintable(stri + "math07")) << QString("^(a\\1*)$") << QString("aaaaaaaa") << 0 << 8
- << QStringList( QStringList() << "aaaaaaaa" );
- QTest::newRow(qPrintable(stri + "math08")) << QString("^(a\\1*)$") << QString("aaaaaaaaa") << -1 << -1
- << QStringList( QStringList() << QString() );
- QTest::newRow(qPrintable(stri + "math09")) << QString("^a(?:a(\\1a))*$") << QString("a") << 0 << 1
- << QStringList( QStringList() << "" );
- QTest::newRow(qPrintable(stri + "math10")) << QString("^a(?:a(\\1a))*$") << QString("aaa") << 0 << 3
- << QStringList( QStringList() << "a" );
-
- QTest::newRow(qPrintable(stri + "math13")) << QString("^(?:((?:^a)?\\2\\3)(\\3\\1|(?=a$))(\\1\\2|("
- "?=a$)))*a$")
- << QString("aaa") << 0 << 3
- << QStringList( QStringList() << "a" << "a" << "" );
- QTest::newRow(qPrintable(stri + "math14")) << QString("^(?:((?:^a)?\\2\\3)(\\3\\1|(?=a$))(\\1\\2|("
- "?=a$)))*a$")
- << QString("aaaaa") << 0 << 5
- << QStringList( QStringList() << "a" << "a" << "aa" );
- QTest::newRow(qPrintable(stri + "math17")) << QString("^(?:(a(?:(\\1\\3)(\\1\\2))*(?:\\1\\3)?)|((?"
- ":(\\4(?:^a)?\\6)(\\4\\5))*(?:\\4\\6)?))$")
- << QString("aaa") << 0 << 3
- << QStringList( QStringList() << "" << "" << "" << "aaa" << "a" << "aa" );
- QTest::newRow(qPrintable(stri + "math18")) << QString("^(?:(a(?:(\\1\\3)(\\1\\2))*(?:\\1\\3)?)|((?"
- ":(\\4(?:^a)?\\6)(\\4\\5))*(?:\\4\\6)?))$")
- << QString("aaaaa") << 0 << 5
- << QStringList( QStringList() << "aaaaa" << "a" << "aaa" << "" << "" << "" );
- QTest::newRow(qPrintable(stri + "math19")) << QString("^(?:(a(?:(\\1\\3)(\\1\\2))*(?:\\1\\3)?)|((?"
- ":(\\4(?:^a)?\\6)(\\4\\5))*(?:\\4\\6)?))$")
- << QString("aaaaaaaa") << 0 << 8
- << QStringList( QStringList() << "" << "" << "" << "aaaaaaaa" << "a" << "aa" );
- QTest::newRow(qPrintable(stri + "math20")) << QString("^(?:(a(?:(\\1\\3)(\\1\\2))*(?:\\1\\3)?)|((?"
- ":(\\4(?:^a)?\\6)(\\4\\5))*(?:\\4\\6)?))$")
- << QString("aaaaaaaaa") << -1 << -1
- << QStringList( QStringList() << QString()
- << QString()
- << QString()
- << QString()
- << QString()
- << QString() );
- QTest::newRow(qPrintable(stri + "math21")) << QString("^(aa+)\\1+$") << QString("aaaaaaaaaaaa") << 0 << 12
- << QStringList( QStringList() << "aa" );
-
- static const char * const squareRegExp[] = {
- "^a(?:(\\1aa)a)*$",
- "^(\\2(\\1a))+$",
- "^((\\2a)*)\\1\\2a$",
- 0
- };
-
- int ii = 0;
-
- while ( squareRegExp[ii] != 0 ) {
- for ( int j = 0; j < 100; j++ ) {
- const QString name = QString::asprintf( "square%.1d%.2d", ii, j );
-
- QString target = "";
- target.fill( 'a', j );
-
- int pos = -1;
- int len = -1;
-
- for ( int k = 1; k * k <= j; k++ ) {
- if ( k * k == j ) {
- pos = 0;
- len = j;
- break;
- }
- }
-
- QTest::newRow( name.toLatin1() ) << QString( squareRegExp[ii] ) << target
- << pos << len << QStringList( "IGNORE ME" );
- }
- ii++;
- }
-
- // miscellaneous
- QTest::newRow(qPrintable(stri + "misc00")) << QString(email)
- << QString("email123@example.com") << 0 << 20
- << QStringList();
- QTest::newRow(qPrintable(stri + "misc01")) << QString("[0-9]*\\.[0-9]+") << QString("pi = 3.14") << 5 << 4
- << QStringList();
-
- // or operator
- QTest::newRow(qPrintable(stri + "or00")) << QString("(?:|b)") << QString("xxx") << 0 << 0 << QStringList();
- QTest::newRow(qPrintable(stri + "or01")) << QString("(?:|b)") << QString("b") << 0 << 1 << QStringList();
- QTest::newRow(qPrintable(stri + "or02")) << QString("(?:b|)") << QString("") << 0 << 0 << QStringList();
- QTest::newRow(qPrintable(stri + "or03")) << QString("(?:b|)") << QString("b") << 0 << 1 << QStringList();
- QTest::newRow(qPrintable(stri + "or04")) << QString("(?:||b||)") << QString("") << 0 << 0 << QStringList();
- QTest::newRow(qPrintable(stri + "or05")) << QString("(?:||b||)") << QString("b") << 0 << 1 << QStringList();
- QTest::newRow(qPrintable(stri + "or06")) << QString("(?:a|b)") << QString("") << -1 << -1 << QStringList();
- QTest::newRow(qPrintable(stri + "or07")) << QString("(?:a|b)") << QString("cc") << -1 << -1 << QStringList();
- QTest::newRow(qPrintable(stri + "or08")) << QString("(?:a|b)") << QString("abc") << 0 << 1 << QStringList();
- QTest::newRow(qPrintable(stri + "or09")) << QString("(?:a|b)") << QString("cba") << 1 << 1 << QStringList();
- QTest::newRow(qPrintable(stri + "or10")) << QString("(?:ab|ba)") << QString("aba") << 0 << 2
- << QStringList();
- QTest::newRow(qPrintable(stri + "or11")) << QString("(?:ab|ba)") << QString("bab") << 0 << 2
- << QStringList();
- QTest::newRow(qPrintable(stri + "or12")) << QString("(?:ab|ba)") << QString("caba") << 1 << 2
- << QStringList();
- QTest::newRow(qPrintable(stri + "or13")) << QString("(?:ab|ba)") << QString("cbab") << 1 << 2
- << QStringList();
-
- // quantifiers
- QTest::newRow(qPrintable(stri + "qua00")) << QString("((([a-j])){0,0})") << QString("") << 0 << 0
- << QStringList( QStringList() << "" << "" << "" );
- QTest::newRow(qPrintable(stri + "qua01")) << QString("((([a-j])){0,0})") << QString("a") << 0 << 0
- << QStringList( QStringList() << "" << "" << "" );
- QTest::newRow(qPrintable(stri + "qua02")) << QString("((([a-j])){0,0})") << QString("xyz") << 0 << 0
- << QStringList( QStringList() << "" << "" << "" );
- QTest::newRow(qPrintable(stri + "qua03")) << QString("((([a-j]))?)") << QString("") << 0 << 0
- << QStringList( QStringList() << "" << "" << "" );
- QTest::newRow(qPrintable(stri + "qua04")) << QString("((([a-j]))?)") << QString("a") << 0 << 1
- << QStringList( QStringList() << "a" << "a" << "a" );
- QTest::newRow(qPrintable(stri + "qua05")) << QString("((([a-j]))?)") << QString("x") << 0 << 0
- << QStringList( QStringList() << "" << "" << "" );
- QTest::newRow(qPrintable(stri + "qua06")) << QString("((([a-j]))?)") << QString("ab") << 0 << 1
- << QStringList( QStringList() << "a" << "a" << "a" );
- QTest::newRow(qPrintable(stri + "qua07")) << QString("((([a-j]))?)") << QString("xa") << 0 << 0
- << QStringList( QStringList() << "" << "" << "" );
- QTest::newRow(qPrintable(stri + "qua08")) << QString("((([a-j])){0,3})") << QString("") << 0 << 0
- << QStringList( QStringList() << "" << "" << "" );
- QTest::newRow(qPrintable(stri + "qua09")) << QString("((([a-j])){0,3})") << QString("a") << 0 << 1
- << QStringList( QStringList() << "a" << "a" << "a" );
- QTest::newRow(qPrintable(stri + "qua10")) << QString("((([a-j])){0,3})") << QString("abcd") << 0 << 3
- << QStringList( QStringList() << "abc" << "c" << "c" );
- QTest::newRow(qPrintable(stri + "qua11")) << QString("((([a-j])){0,3})") << QString("abcde") << 0 << 3
- << QStringList( QStringList() << "abc" << "c" << "c" );
- QTest::newRow(qPrintable(stri + "qua12")) << QString("((([a-j])){2,4})") << QString("a") << -1 << -1
- << QStringList( QStringList() << QString()
- << QString()
- << QString() );
- QTest::newRow(qPrintable(stri + "qua13")) << QString("((([a-j])){2,4})") << QString("ab") << 0 << 2
- << QStringList( QStringList() << "ab" << "b" << "b" );
- QTest::newRow(qPrintable(stri + "qua14")) << QString("((([a-j])){2,4})") << QString("abcd") << 0 << 4
- << QStringList( QStringList() << "abcd" << "d" << "d" );
- QTest::newRow(qPrintable(stri + "qua15")) << QString("((([a-j])){2,4})") << QString("abcdef") << 0 << 4
- << QStringList( QStringList() << "abcd" << "d" << "d" );
- QTest::newRow(qPrintable(stri + "qua16")) << QString("((([a-j])){2,4})") << QString("xaybcd") << 3 << 3
- << QStringList( QStringList() << "bcd" << "d" << "d" );
- QTest::newRow(qPrintable(stri + "qua17")) << QString("((([a-j])){0,})") << QString("abcdefgh") << 0 << 8
- << QStringList( QStringList() << "abcdefgh" << "h" << "h" );
- QTest::newRow(qPrintable(stri + "qua18")) << QString("((([a-j])){,0})") << QString("abcdefgh") << 0 << 0
- << QStringList( QStringList() << "" << "" << "" );
- QTest::newRow(qPrintable(stri + "qua19")) << QString("(1(2(3){3,4}){2,3}){1,2}") << QString("123332333") << 0
- << 9
- << QStringList( QStringList() << "123332333" << "2333"
- << "3" );
- QTest::newRow(qPrintable(stri + "qua20")) << QString("(1(2(3){3,4}){2,3}){1,2}")
- << QString("12333323333233331233332333323333") << 0 << 32
- << QStringList( QStringList() << "1233332333323333"
- << "23333" << "3" );
- QTest::newRow(qPrintable(stri + "qua21")) << QString("(1(2(3){3,4}){2,3}){1,2}") << QString("") << -1 << -1
- << QStringList( QStringList() << QString()
- << QString()
- << QString() );
- QTest::newRow(qPrintable(stri + "qua22")) << QString("(1(2(3){3,4}){2,3}){1,2}") << QString("12333") << -1
- << -1
- << QStringList( QStringList() << QString()
- << QString()
- << QString() );
- QTest::newRow(qPrintable(stri + "qua23")) << QString("(1(2(3){3,4}){2,3}){1,2}") << QString("12333233") << -1
- << -1
- << QStringList( QStringList() << QString()
- << QString()
- << QString() );
- QTest::newRow(qPrintable(stri + "qua24")) << QString("(1(2(3){3,4}){2,3}){1,2}") << QString("122333") << -1
- << -1
- << QStringList( QStringList() << QString()
- << QString()
- << QString() );
-
- // star operator
- QTest::newRow(qPrintable(stri + "star00")) << QString("(?:)*") << QString("") << 0 << 0 << QStringList();
- QTest::newRow(qPrintable(stri + "star01")) << QString("(?:)*") << QString("abc") << 0 << 0 << QStringList();
- QTest::newRow(qPrintable(stri + "star02")) << QString("(?:a)*") << QString("") << 0 << 0 << QStringList();
- QTest::newRow(qPrintable(stri + "star03")) << QString("(?:a)*") << QString("a") << 0 << 1 << QStringList();
- QTest::newRow(qPrintable(stri + "star04")) << QString("(?:a)*") << QString("aaa") << 0 << 3 << QStringList();
- QTest::newRow(qPrintable(stri + "star05")) << QString("(?:a)*") << QString("bbbbaaa") << 0 << 0
- << QStringList();
- QTest::newRow(qPrintable(stri + "star06")) << QString("(?:a)*") << QString("bbbbaaabbaaaaa") << 0 << 0
- << QStringList();
- QTest::newRow(qPrintable(stri + "star07")) << QString("(?:b)*(?:a)*") << QString("") << 0 << 0
- << QStringList();
- QTest::newRow(qPrintable(stri + "star08")) << QString("(?:b)*(?:a)*") << QString("a") << 0 << 1
- << QStringList();
- QTest::newRow(qPrintable(stri + "star09")) << QString("(?:b)*(?:a)*") << QString("aaa") << 0 << 3
- << QStringList();
- QTest::newRow(qPrintable(stri + "star10")) << QString("(?:b)*(?:a)*") << QString("bbbbaaa") << 0 << 7
- << QStringList();
- QTest::newRow(qPrintable(stri + "star11")) << QString("(?:b)*(?:a)*") << QString("bbbbaaabbaaaaa") << 0 << 7
- << QStringList();
- QTest::newRow(qPrintable(stri + "star12")) << QString("(?:a|b)*") << QString("c") << 0 << 0 << QStringList();
- QTest::newRow(qPrintable(stri + "star13")) << QString("(?:a|b)*") << QString("abac") << 0 << 3
- << QStringList();
- QTest::newRow(qPrintable(stri + "star14")) << QString("(?:a|b|)*") << QString("c") << 0 << 0
- << QStringList();
- QTest::newRow(qPrintable(stri + "star15")) << QString("(?:a|b|)*") << QString("abac") << 0 << 3
- << QStringList();
- QTest::newRow(qPrintable(stri + "star16")) << QString("(?:ab|ba|b)*") << QString("abbbababbbaaab") << 0 << 11
- << QStringList();
-}
-
-void tst_QRegExp::exactMatch()
-{
- QRegExp rx_d( "\\d" );
- QRegExp rx_s( "\\s" );
- QRegExp rx_w( "\\w" );
- QRegExp rx_D( "\\D" );
- QRegExp rx_S( "\\S" );
- QRegExp rx_W( "\\W" );
-
- for ( int i = 0; i < 65536; i++ ) {
- QChar ch( i );
- bool is_d = ( ch.category() == QChar::Number_DecimalDigit );
- bool is_s = ch.isSpace();
- bool is_w = ( ch.isLetterOrNumber()
- || ch.isMark()
- || ch == '_' );
-
- QVERIFY( rx_d.exactMatch(QString(ch)) == is_d );
- QVERIFY( rx_s.exactMatch(QString(ch)) == is_s );
- QVERIFY( rx_w.exactMatch(QString(ch)) == is_w );
- QVERIFY( rx_D.exactMatch(QString(ch)) != is_d );
- QVERIFY( rx_S.exactMatch(QString(ch)) != is_s );
- QVERIFY( rx_W.exactMatch(QString(ch)) != is_w );
- }
-}
-
-void tst_QRegExp::capturedTexts()
-{
- QRegExp rx1("a*(a*)", Qt::CaseSensitive, QRegExp::RegExp);
- rx1.exactMatch("aaa");
- QCOMPARE(rx1.matchedLength(), 3);
- QCOMPARE(rx1.cap(0), QString("aaa"));
- QCOMPARE(rx1.cap(1), QString("aaa"));
-
- QRegExp rx2("a*(a*)", Qt::CaseSensitive, QRegExp::RegExp2);
- rx2.exactMatch("aaa");
- QCOMPARE(rx2.matchedLength(), 3);
- QCOMPARE(rx2.cap(0), QString("aaa"));
- QCOMPARE(rx2.cap(1), QString(""));
-
- QRegExp rx3("(?:a|aa)(a*)", Qt::CaseSensitive, QRegExp::RegExp);
- rx3.exactMatch("aaa");
- QCOMPARE(rx3.matchedLength(), 3);
- QCOMPARE(rx3.cap(0), QString("aaa"));
- QCOMPARE(rx3.cap(1), QString("aa"));
-
- QRegExp rx4("(?:a|aa)(a*)", Qt::CaseSensitive, QRegExp::RegExp2);
- rx4.exactMatch("aaa");
- QCOMPARE(rx4.matchedLength(), 3);
- QCOMPARE(rx4.cap(0), QString("aaa"));
- QCOMPARE(rx4.cap(1), QString("a"));
-
- QRegExp rx5("(a)*(a*)", Qt::CaseSensitive, QRegExp::RegExp);
- rx5.exactMatch("aaa");
- QCOMPARE(rx5.matchedLength(), 3);
- QCOMPARE(rx5.cap(0), QString("aaa"));
- QCOMPARE(rx5.cap(1), QString("a"));
- QCOMPARE(rx5.cap(2), QString("aa"));
-
- QRegExp rx6("(a)*(a*)", Qt::CaseSensitive, QRegExp::RegExp2);
- rx6.exactMatch("aaa");
- QCOMPARE(rx6.matchedLength(), 3);
- QCOMPARE(rx6.cap(0), QString("aaa"));
- QCOMPARE(rx6.cap(1), QString("a"));
- QCOMPARE(rx6.cap(2), QString(""));
-
- QRegExp rx7("([A-Za-z_])([A-Za-z_0-9]*)");
- rx7.setCaseSensitivity(Qt::CaseSensitive);
- rx7.setPatternSyntax(QRegExp::RegExp);
- QCOMPARE(rx7.captureCount(), 2);
-
- int pos = rx7.indexIn("(10 + delta4) * 32");
- QCOMPARE(pos, 6);
- QCOMPARE(rx7.matchedLength(), 6);
- QCOMPARE(rx7.cap(0), QString("delta4"));
- QCOMPARE(rx7.cap(1), QString("d"));
- QCOMPARE(rx7.cap(2), QString("elta4"));
-}
-
-void tst_QRegExp::indexIn()
-{
- QFETCH( QString, regexpStr );
- QFETCH( QString, target );
- QFETCH( int, pos );
- QFETCH( int, len );
- QFETCH( QStringList, caps );
-
- caps.prepend( "dummy cap(0)" );
-
- {
- QRegExp rx( regexpStr );
- QVERIFY( rx.isValid() );
-
- int mypos = rx.indexIn( target );
- int mylen = rx.matchedLength();
- QStringList mycaps = rx.capturedTexts();
-
- QCOMPARE( mypos, pos );
- QCOMPARE( mylen, len );
- if ( caps.size() > 1 && caps[1] != "IGNORE ME" ) {
- QCOMPARE( mycaps.count(), caps.count() );
- for ( int i = 1; i < (int) mycaps.count(); i++ )
- QCOMPARE( mycaps[i], caps[i] );
- }
- }
-
- // same as above, but with RegExp2
- {
- QRegExp rx( regexpStr, Qt::CaseSensitive, QRegExp::RegExp2 );
- QVERIFY( rx.isValid() );
-
- int mypos = rx.indexIn( target );
- int mylen = rx.matchedLength();
- QStringList mycaps = rx.capturedTexts();
-
- QCOMPARE( mypos, pos );
- QCOMPARE( mylen, len );
- if ( caps.size() > 1 && caps[1] != "IGNORE ME" ) {
- QCOMPARE( mycaps.count(), caps.count() );
- for ( int i = 1; i < (int) mycaps.count(); i++ )
- QCOMPARE( mycaps[i], caps[i] );
- }
- }
-}
-
-void tst_QRegExp::lastIndexIn()
-{
- QFETCH( QString, regexpStr );
- QFETCH( QString, target );
- QFETCH( int, pos );
- QFETCH( int, len );
- QFETCH( QStringList, caps );
-
- caps.prepend( "dummy" );
-
- /*
- The test data was really designed for indexIn(), not
- lastIndexIn(), but it turns out that we can reuse much of that
- for lastIndexIn().
- */
-
- {
- QRegExp rx( regexpStr );
- QVERIFY( rx.isValid() );
-
- int mypos = rx.lastIndexIn( target, target.length() );
- int mylen = rx.matchedLength();
- QStringList mycaps = rx.capturedTexts();
-
- if ( mypos <= pos || pos == -1 ) {
- QCOMPARE( mypos, pos );
- QCOMPARE( mylen, len );
-
- if (caps.size() > 1 && caps[1] != "IGNORE ME") {
- QCOMPARE( mycaps.count(), caps.count() );
- for ( int i = 1; i < (int) mycaps.count(); i++ )
- QCOMPARE( mycaps[i], caps[i] );
- }
- }
- }
-
- {
- QRegExp rx( regexpStr, Qt::CaseSensitive, QRegExp::RegExp2 );
- QVERIFY( rx.isValid() );
-
- int mypos = rx.lastIndexIn( target, target.length() );
- int mylen = rx.matchedLength();
- QStringList mycaps = rx.capturedTexts();
-
- if ( mypos <= pos || pos == -1 ) {
- QCOMPARE( mypos, pos );
- QCOMPARE( mylen, len );
-
- if (caps.size() > 1 && caps[1] != "IGNORE ME") {
- QCOMPARE( mycaps.count(), caps.count() );
- for ( int i = 1; i < (int) mycaps.count(); i++ )
- QCOMPARE( mycaps[i], caps[i] );
- }
- }
- }
-}
-
-void tst_QRegExp::matchedLength()
-{
- QRegExp r1( "a+" );
- r1.exactMatch( "aaaba" );
- QCOMPARE( r1.matchedLength(), 3 );
-}
-
-const char email[] =
- "^[\\040\\t]*(?:\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff"
- "]|\\([^\\\\\\x80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\x"
- "ff\\n\\015()]*)*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*(?:"
- "(?:[^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]+(?![^(\\040)<>@"
- ",;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff])|\"[^\\\\\\x80-\\xff\\n\\015\""
- "]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015\"]*)*\")[\\040\\t]*(?"
- ":\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\\\x"
- "80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015()]*"
- ")*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*(?:\\.[\\040\\t]*"
- "(?:\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\"
- "\\x80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015("
- ")]*)*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*(?:[^(\\040)<>"
- "@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]+(?![^(\\040)<>@,;:\".\\\\\\["
- "\\]\\000-\\037\\x80-\\xff])|\"[^\\\\\\x80-\\xff\\n\\015\"]*(?:\\\\[^\\"
- "x80-\\xff][^\\\\\\x80-\\xff\\n\\015\"]*)*\")[\\040\\t]*(?:\\([^\\\\\\x"
- "80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\\\x80-\\xff\\n\\"
- "015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015()]*)*\\))[^\\\\"
- "\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*)*@[\\040\\t]*(?:\\([^\\\\\\x"
- "80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\\\x80-\\xff\\n\\"
- "015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015()]*)*\\))[^\\\\"
- "\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*(?:[^(\\040)<>@,;:\".\\\\\\["
- "\\]\\000-\\037\\x80-\\xff]+(?![^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037"
- "\\x80-\\xff])|\\[(?:[^\\\\\\x80-\\xff\\n\\015\\[\\]]|\\\\[^\\x80-\\xff"
- "])*\\])[\\040\\t]*(?:\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80"
- "-\\xff]|\\([^\\\\\\x80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x"
- "80-\\xff\\n\\015()]*)*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]"
- "*)*(?:\\.[\\040\\t]*(?:\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x"
- "80-\\xff]|\\([^\\\\\\x80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\"
- "\\x80-\\xff\\n\\015()]*)*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040"
- "\\t]*)*(?:[^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]+(?![^(\\"
- "040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff])|\\[(?:[^\\\\\\x80-\\xf"
- "f\\n\\015\\[\\]]|\\\\[^\\x80-\\xff])*\\])[\\040\\t]*(?:\\([^\\\\\\x80-"
- "\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\\\x80-\\xff\\n\\015"
- "()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015()]*)*\\))[^\\\\\\x8"
- "0-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*)*|(?:[^(\\040)<>@,;:\".\\\\\\[\\"
- "]\\000-\\037\\x80-\\xff]+(?![^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x"
- "80-\\xff])|\"[^\\\\\\x80-\\xff\\n\\015\"]*(?:\\\\[^\\x80-\\xff][^\\\\"
- "\\x80-\\xff\\n\\015\"]*)*\")[^()<>@,;:\".\\\\\\[\\]\\x80-\\xff\\000-\\"
- "010\\012-\\037]*(?:(?:\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x8"
- "0-\\xff]|\\([^\\\\\\x80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\"
- "x80-\\xff\\n\\015()]*)*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)|\"[^\\\\"
- "\\x80-\\xff\\n\\015\"]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015"
- "\"]*)*\")[^()<>@,;:\".\\\\\\[\\]\\x80-\\xff\\000-\\010\\012-\\037]*)*<"
- "[\\040\\t]*(?:\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]"
- "|\\([^\\\\\\x80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xf"
- "f\\n\\015()]*)*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*(?:@"
- "[\\040\\t]*(?:\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]"
- "|\\([^\\\\\\x80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xf"
- "f\\n\\015()]*)*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*(?:["
- "^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]+(?![^(\\040)<>@,;:"
- "\".\\\\\\[\\]\\000-\\037\\x80-\\xff])|\\[(?:[^\\\\\\x80-\\xff\\n\\015"
- "\\[\\]]|\\\\[^\\x80-\\xff])*\\])[\\040\\t]*(?:\\([^\\\\\\x80-\\xff\\n"
- "\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\\\x80-\\xff\\n\\015()]*(?:"
- "\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015()]*)*\\))[^\\\\\\x80-\\xff"
- "\\n\\015()]*)*\\)[\\040\\t]*)*(?:\\.[\\040\\t]*(?:\\([^\\\\\\x80-\\xff"
- "\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\\\x80-\\xff\\n\\015()]*("
- "?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015()]*)*\\))[^\\\\\\x80-\\x"
- "ff\\n\\015()]*)*\\)[\\040\\t]*)*(?:[^(\\040)<>@,;:\".\\\\\\[\\]\\000-"
- "\\037\\x80-\\xff]+(?![^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xf"
- "f])|\\[(?:[^\\\\\\x80-\\xff\\n\\015\\[\\]]|\\\\[^\\x80-\\xff])*\\])[\\"
- "040\\t]*(?:\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\"
- "([^\\\\\\x80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\"
- "n\\015()]*)*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*)*(?:,["
- "\\040\\t]*(?:\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|"
- "\\([^\\\\\\x80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff"
- "\\n\\015()]*)*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*@[\\0"
- "40\\t]*(?:\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\("
- "[^\\\\\\x80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n"
- "\\015()]*)*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*(?:[^(\\"
- "040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]+(?![^(\\040)<>@,;:\".\\"
- "\\\\[\\]\\000-\\037\\x80-\\xff])|\\[(?:[^\\\\\\x80-\\xff\\n\\015\\[\\]"
- "]|\\\\[^\\x80-\\xff])*\\])[\\040\\t]*(?:\\([^\\\\\\x80-\\xff\\n\\015()"
- "]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\\\x80-\\xff\\n\\015()]*(?:\\\\[^\\"
- "x80-\\xff][^\\\\\\x80-\\xff\\n\\015()]*)*\\))[^\\\\\\x80-\\xff\\n\\015"
- "()]*)*\\)[\\040\\t]*)*(?:\\.[\\040\\t]*(?:\\([^\\\\\\x80-\\xff\\n\\015"
- "()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\\\x80-\\xff\\n\\015()]*(?:\\\\[^"
- "\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015()]*)*\\))[^\\\\\\x80-\\xff\\n\\0"
- "15()]*)*\\)[\\040\\t]*)*(?:[^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x8"
- "0-\\xff]+(?![^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff])|\\[(?"
- ":[^\\\\\\x80-\\xff\\n\\015\\[\\]]|\\\\[^\\x80-\\xff])*\\])[\\040\\t]*("
- "?:\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\\\"
- "x80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015()]"
- "*)*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*)*)*:[\\040\\t]*"
- "(?:\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\"
- "\\x80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015("
- ")]*)*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*)?(?:[^(\\040)"
- "<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]+(?![^(\\040)<>@,;:\".\\\\\\"
- "[\\]\\000-\\037\\x80-\\xff])|\"[^\\\\\\x80-\\xff\\n\\015\"]*(?:\\\\[^"
- "\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015\"]*)*\")[\\040\\t]*(?:\\([^\\\\"
- "\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\\\x80-\\xff\\"
- "n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015()]*)*\\))[^\\"
- "\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*(?:\\.[\\040\\t]*(?:\\([^\\"
- "\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\\\x80-\\xff"
- "\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015()]*)*\\))[^"
- "\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*(?:[^(\\040)<>@,;:\".\\\\"
- "\\[\\]\\000-\\037\\x80-\\xff]+(?![^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\0"
- "37\\x80-\\xff])|\"[^\\\\\\x80-\\xff\\n\\015\"]*(?:\\\\[^\\x80-\\xff][^"
- "\\\\\\x80-\\xff\\n\\015\"]*)*\")[\\040\\t]*(?:\\([^\\\\\\x80-\\xff\\n"
- "\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\\\x80-\\xff\\n\\015()]*(?:"
- "\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015()]*)*\\))[^\\\\\\x80-\\xff"
- "\\n\\015()]*)*\\)[\\040\\t]*)*)*@[\\040\\t]*(?:\\([^\\\\\\x80-\\xff\\n"
- "\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\\\x80-\\xff\\n\\015()]*(?:"
- "\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\015()]*)*\\))[^\\\\\\x80-\\xff"
- "\\n\\015()]*)*\\)[\\040\\t]*)*(?:[^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\0"
- "37\\x80-\\xff]+(?![^(\\040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff])"
- "|\\[(?:[^\\\\\\x80-\\xff\\n\\015\\[\\]]|\\\\[^\\x80-\\xff])*\\])[\\040"
- "\\t]*(?:\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\([^"
- "\\\\\\x80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n\\"
- "015()]*)*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*(?:\\.[\\0"
- "40\\t]*(?:\\([^\\\\\\x80-\\xff\\n\\015()]*(?:(?:\\\\[^\\x80-\\xff]|\\("
- "[^\\\\\\x80-\\xff\\n\\015()]*(?:\\\\[^\\x80-\\xff][^\\\\\\x80-\\xff\\n"
- "\\015()]*)*\\))[^\\\\\\x80-\\xff\\n\\015()]*)*\\)[\\040\\t]*)*(?:[^(\\"
- "040)<>@,;:\".\\\\\\[\\]\\000-\\037\\x80-\\xff]+(?![^(\\040)<>@,;:\".\\"
- "\\\\[\\]\\000-\\037\\x80-\\xff])|\\[(?:[^\\\\\\x80-\\xff\\n\\015\\[\\]"
- "]|\\\\[^\\x80-\\xff])*\\])[\\040\\t]*(?:\\([^\\\\\\x80-\\xff\\n\\015()"
- "]*(?:(?:\\\\[^\\x80-\\xff]|\\([^\\\\\\x80-\\xff\\n\\015()]*(?:\\\\[^\\"
- "x80-\\xff][^\\\\\\x80-\\xff\\n\\015()]*)*\\))[^\\\\\\x80-\\xff\\n\\015"
- "()]*)*\\)[\\040\\t]*)*)*>)$";
-
-void tst_QRegExp::wildcard_data()
-{
- QTest::addColumn<QString>("rxp");
- QTest::addColumn<QString>("string");
- QTest::addColumn<int>("foundIndex");
-
- QTest::newRow( "data0" ) << QString("*.html") << QString("test.html") << 0;
- QTest::newRow( "data1" ) << QString("*.html") << QString("test.htm") << -1;
- QTest::newRow( "data2" ) << QString("bar*") << QString("foobarbaz") << 3;
- QTest::newRow( "data3" ) << QString("*") << QString("Qt Rocks!") << 0;
- QTest::newRow( "data4" ) << QString(".html") << QString("test.html") << 4;
- QTest::newRow( "data5" ) << QString(".h") << QString("test.cpp") << -1;
- QTest::newRow( "data6" ) << QString(".???l") << QString("test.html") << 4;
- QTest::newRow( "data7" ) << QString("?") << QString("test.html") << 0;
- QTest::newRow( "data8" ) << QString("?m") << QString("test.html") << 6;
- QTest::newRow( "data9" ) << QString(".h[a-z]ml") << QString("test.html") << 4;
- QTest::newRow( "data10" ) << QString(".h[A-Z]ml") << QString("test.html") << -1;
- QTest::newRow( "data11" ) << QString(".h[A-Z]ml") << QString("test.hTml") << 4;
-}
-
-void tst_QRegExp::wildcard()
-{
- QFETCH( QString, rxp );
- QFETCH( QString, string );
- QFETCH( int, foundIndex );
-
- QRegExp r( rxp );
- r.setPatternSyntax(QRegExp::WildcardUnix);
- QCOMPARE( r.indexIn( string ), foundIndex );
-}
-
-void tst_QRegExp::testEscapingWildcard_data(){
- QTest::addColumn<QString>("pattern");
- QTest::addColumn<QString>("teststring");
- QTest::addColumn<bool>("isMatching");
-
- QTest::newRow("[ Not escaped") << "[Qt;" << "[Qt;" << false;
- QTest::newRow("[ Escaped") << "\\[Qt;" << "[Qt;" << true;
-
- QTest::newRow("] Not escaped") << "]Ik;" << "]Ik;" << false;
- QTest::newRow("] Escaped") << "\\]Ip;" << "]Ip;" << true;
-
- QTest::newRow("? Not escaped valid") << "?Ou:" << ".Ou:" << true;
- QTest::newRow("? Not escaped invalid") << "?Tr;" << "Tr;" << false;
- QTest::newRow("? Escaped") << "\\?O;" << "?O;" << true;
-
- QTest::newRow("[] not escaped") << "[lL]" << "l" << true;
- QTest::newRow("[] escaped") << "\\[\\]" << "[]" << true;
-
- QTest::newRow("case [[]") << "[[abc]" << "[" << true;
- QTest::newRow("case []abc] match ]") << "[]abc]" << "]" << true;
- QTest::newRow("case []abc] match a") << "[]abc]" << "a" << true;
- QTest::newRow("case [abc] match a") << "[abc]" << "a" << true;
- QTest::newRow("case []] don't match [") << "[]abc]" << "[" << false;
- QTest::newRow("case [^]abc] match d") << "[^]abc]" << "d" << true;
- QTest::newRow("case [^]abc] don't match ]") << "[^]abc]" << "]" << false;
-
- QTest::newRow("* Not escaped with char") << "*Te;" << "12345Te;" << true;
- QTest::newRow("* Not escaped without char") << "*Ch;" << "Ch;" << true;
- QTest::newRow("* Not escaped invalid") << "*Ro;" << "o;" << false;
- QTest::newRow("* Escaped") << "\\[Cks;" << "[Cks;" << true;
-
- QTest::newRow("a true '\\' in input") << "\\Qt;" << "\\Qt;" << true;
- QTest::newRow("two true '\\' in input") << "\\\\Qt;" << "\\\\Qt;" << true;
- QTest::newRow("a '\\' at the end") << "\\\\Qt;\\" << "\\\\Qt;\\" << true;
-
- QTest::newRow("[]\\] matches ]") << "[]\\]" << "]" << true;
- QTest::newRow("[]\\] matches \\") << "[]\\]" << "\\" << true;
- QTest::newRow("[]\\] does not match [") << "[]\\]" << "[" << false;
- QTest::newRow("[]\\]a matches ]a") << "[]\\]a" << "]a" << true;
- QTest::newRow("[]\\]a matches \\a") << "[]\\]a" << "\\a" << true;
- QTest::newRow("[]\\]a does not match [a") << "[]\\]a" << "[a" << false;
-}
-
-void tst_QRegExp::testEscapingWildcard(){
- QFETCH(QString, pattern);
-
- QRegExp re(pattern);
- re.setPatternSyntax(QRegExp::WildcardUnix);
-
- QFETCH(QString, teststring);
- QFETCH(bool, isMatching);
- QCOMPARE(re.exactMatch(teststring), isMatching);
-}
-
-void tst_QRegExp::testInvalidWildcard_data(){
- QTest::addColumn<QString>("pattern");
- QTest::addColumn<bool>("isValid");
-
- QTest::newRow("valid []") << "[abc]" << true;
- QTest::newRow("invalid [") << "[abc" << false;
- QTest::newRow("ending [") << "abc[" << false;
- QTest::newRow("ending ]") << "abc]" << false;
- QTest::newRow("ending [^") << "abc[^" << false;
- QTest::newRow("ending [\\") << "abc[\\" << false;
- QTest::newRow("ending []") << "abc[]" << false;
- QTest::newRow("ending [[") << "abc[[" << false;
-}
-
-void tst_QRegExp::testInvalidWildcard(){
- QFETCH(QString, pattern);
-
- QRegExp re(pattern);
- re.setPatternSyntax(QRegExp::Wildcard);
-
- QFETCH(bool, isValid);
- QCOMPARE(re.isValid(), isValid);
-}
-
-void tst_QRegExp::caretAnchoredOptimization()
-{
- QString s = "---babnana----";
- s = QRegExp("^-*|(-*)$").replaceIn(s, "" );
- QCOMPARE(s, QLatin1String("babnana"));
-
- s = "---babnana----";
- s = QRegExp("^-*|(-{0,})$").replaceIn(s, "" );
- QCOMPARE(s, QLatin1String("babnana"));
-
- s = "---babnana----";
- s = QRegExp("^-*|(-{1,})$").replaceIn(s, "" );
- QCOMPARE(s, QLatin1String("babnana"));
-
- s = "---babnana----";
- s = QRegExp("^-*|(-+)$").replaceIn(s, "" );
- QCOMPARE(s, QLatin1String("babnana"));
-}
-
-void tst_QRegExp::isEmpty()
-{
- QRegExp rx1;
- QVERIFY(rx1.isEmpty());
-
- QRegExp rx2 = rx1;
- QVERIFY(rx2.isEmpty());
-
- rx2.setPattern("");
- QVERIFY(rx2.isEmpty());
-
- rx2.setPattern("foo");
- QVERIFY(!rx2.isEmpty());
-
- rx2.setPattern(")(");
- QVERIFY(!rx2.isEmpty());
-
- rx2.setPattern("");
- QVERIFY(rx2.isEmpty());
-
- rx2.setPatternSyntax(QRegExp::Wildcard);
- rx2.setPattern("");
- QVERIFY(rx2.isEmpty());
-}
-
-static QRegExp re("foo.*bar");
-
-void tst_QRegExp::staticRegExp()
-{
- QVERIFY(re.exactMatch("fooHARRYbar"));
- // the actual test is that a static regexp should not crash
-}
-
-void tst_QRegExp::rainersSlowRegExpCopyBug()
-{
- // this test should take an extreme amount of time if QRegExp is broken
- QRegExp original(email);
- for (int i = 0; i < 100000; ++i) {
- QRegExp copy = original;
- (void)copy.exactMatch("~");
- QRegExp copy2 = original;
- }
-}
-
-void tst_QRegExp::nonExistingBackReferenceBug()
-{
- {
- QRegExp rx("<\\5>");
- QVERIFY(rx.isValid());
- QCOMPARE(rx.indexIn("<>"), 0);
- QCOMPARE(rx.capturedTexts(), QStringList("<>"));
- }
-
- {
- QRegExp rx("<\\1>");
- QVERIFY(rx.isValid());
- QCOMPARE(rx.indexIn("<>"), 0);
- QCOMPARE(rx.capturedTexts(), QStringList("<>"));
- }
-
- {
- QRegExp rx("(?:<\\1>)\\1\\5\\4");
- QVERIFY(rx.isValid());
- QCOMPARE(rx.indexIn("<>"), 0);
- QCOMPARE(rx.capturedTexts(), QStringList("<>"));
- }
-}
-
-class Thread : public QThread
-{
-public:
- Thread(const QRegExp &rx) : rx(rx) {}
-
- void run();
-
- QRegExp rx;
-};
-
-void Thread::run()
-{
- QString str = "abc";
- for (int i = 0; i < 10; ++i)
- str += str;
- str += "abbbdekcz";
- int x;
-
- for (int j = 0; j < 10000; ++j)
- x = rx.indexIn(str);
-
- QCOMPARE(x, 3072);
-}
-
-void tst_QRegExp::reentrancy()
-{
- QRegExp rx("(ab{2,}d?e?f?[g-z]?)c");
- Thread *threads[10];
-
- for (int i = 0; i < int(sizeof(threads) / sizeof(threads[0])); ++i) {
- threads[i] = new Thread(rx);
- threads[i]->start();
- }
-
- for (int i = 0; i < int(sizeof(threads) / sizeof(threads[0])); ++i)
- threads[i]->wait();
-
- for (int i = 0; i < int(sizeof(threads) / sizeof(threads[0])); ++i)
- delete threads[i];
-}
-
-class Thread2 : public QThread
-{
-public:
- void run();
-};
-
-void Thread2::run()
-{
- QRegExp rx("(ab{2,}d?e?f?[g-z]?)c");
- QString str = "abc";
- for (int i = 0; i < 10; ++i)
- str += str;
- str += "abbbdekcz";
- int x;
-
- for (int j = 0; j < 10000; ++j)
- x = rx.indexIn(str);
-
- QCOMPARE(x, 3072);
-}
-
-// Test that multiple threads can construct equal QRegExps.
-// (In the current QRegExp design each engine instatance will share
-// the same cache key, so the threads will race for the cache entry
-// in the global cache.)
-void tst_QRegExp::threadsafeEngineCache()
-{
- Thread2 *threads[10];
-
- for (int i = 0; i < int(sizeof(threads) / sizeof(threads[0])); ++i) {
- threads[i] = new Thread2();
- threads[i]->start();
- }
-
- for (int i = 0; i < int(sizeof(threads) / sizeof(threads[0])); ++i)
- threads[i]->wait();
-
- for (int i = 0; i < int(sizeof(threads) / sizeof(threads[0])); ++i)
- delete threads[i];
-}
-
-
-void tst_QRegExp::prepareEngineOptimization()
-{
- QRegExp rx0("(f?)(?:(o?)(o?))?");
-
- QRegExp rx1(rx0);
-
- QCOMPARE(rx1.capturedTexts(), QStringList() << "" << "" << "" << "");
- QCOMPARE(rx1.matchedLength(), -1);
- QCOMPARE(rx1.matchedLength(), -1);
- QCOMPARE(rx1.captureCount(), 3);
-
- QCOMPARE(rx1.exactMatch("foo"), true);
- QCOMPARE(rx1.matchedLength(), 3);
- QCOMPARE(rx1.capturedTexts(), QStringList() << "foo" << "f" << "o" << "o");
- QCOMPARE(rx1.captureCount(), 3);
- QCOMPARE(rx1.matchedLength(), 3);
- QCOMPARE(rx1.capturedTexts(), QStringList() << "foo" << "f" << "o" << "o");
- QCOMPARE(rx1.pos(3), 2);
-
- QCOMPARE(rx1.exactMatch("foo"), true);
- QCOMPARE(rx1.captureCount(), 3);
- QCOMPARE(rx1.matchedLength(), 3);
- QCOMPARE(rx1.capturedTexts(), QStringList() << "foo" << "f" << "o" << "o");
- QCOMPARE(rx1.pos(3), 2);
-
- QRegExp rx2 = rx1;
-
- QCOMPARE(rx1.captureCount(), 3);
- QCOMPARE(rx1.matchedLength(), 3);
- QCOMPARE(rx1.capturedTexts(), QStringList() << "foo" << "f" << "o" << "o");
- QCOMPARE(rx1.pos(3), 2);
-
- QCOMPARE(rx2.captureCount(), 3);
- QCOMPARE(rx2.matchedLength(), 3);
- QCOMPARE(rx2.capturedTexts(), QStringList() << "foo" << "f" << "o" << "o");
- QCOMPARE(rx2.pos(3), 2);
-
- QCOMPARE(rx1.exactMatch("fo"), true);
- QCOMPARE(rx1.captureCount(), 3);
- QCOMPARE(rx1.matchedLength(), 2);
- QCOMPARE(rx1.capturedTexts(), QStringList() << "fo" << "f" << "o" << "");
- QCOMPARE(rx1.pos(2), 1);
-
- QRegExp rx3;
- QVERIFY(rx3.isValid());
-
- QRegExp rx4("foo", Qt::CaseInsensitive, QRegExp::RegExp);
- QVERIFY(rx4.isValid());
-
- QRegExp rx5("foo", Qt::CaseInsensitive, QRegExp::RegExp2);
- QVERIFY(rx5.isValid());
-
- QRegExp rx6("foo", Qt::CaseInsensitive, QRegExp::FixedString);
- QVERIFY(rx6.isValid());
-
- QRegExp rx7("foo", Qt::CaseInsensitive, QRegExp::Wildcard);
- QVERIFY(rx7.isValid());
-
- QRegExp rx8("][", Qt::CaseInsensitive, QRegExp::RegExp);
- QVERIFY(!rx8.isValid());
-
- QRegExp rx9("][", Qt::CaseInsensitive, QRegExp::RegExp2);
- QVERIFY(!rx9.isValid());
-
- QRegExp rx10("][", Qt::CaseInsensitive, QRegExp::Wildcard);
- QVERIFY(!rx10.isValid());
-
- QRegExp rx11("][", Qt::CaseInsensitive, QRegExp::FixedString);
- QVERIFY(rx11.isValid());
- QVERIFY(rx11.exactMatch("]["));
- QCOMPARE(rx11.matchedLength(), 2);
-
- rx11.setPatternSyntax(QRegExp::Wildcard);
- QVERIFY(!rx11.isValid());
- QCOMPARE(rx11.captureCount(), 0);
- QCOMPARE(rx11.matchedLength(), -1);
-
- rx11.setPatternSyntax(QRegExp::RegExp);
- QVERIFY(!rx11.isValid());
- QCOMPARE(rx11.captureCount(), 0);
- QCOMPARE(rx11.matchedLength(), -1);
-
- rx11.setPattern("(foo)");
- QVERIFY(rx11.isValid());
- QCOMPARE(rx11.captureCount(), 1);
- QCOMPARE(rx11.matchedLength(), -1);
-
- QCOMPARE(rx11.indexIn("ofoo"), 1);
- QCOMPARE(rx11.captureCount(), 1);
- QCOMPARE(rx11.matchedLength(), 3);
-
- rx11.setPatternSyntax(QRegExp::RegExp);
- QCOMPARE(rx11.captureCount(), 1);
- QCOMPARE(rx11.matchedLength(), 3);
-
- /*
- This behavior isn't entirely consistent with setPatter(),
- setPatternSyntax(), and setCaseSensitivity(), but I'm testing
- it here to ensure that it doesn't change subtly in future
- releases.
- */
- rx11.setMinimal(true);
- QCOMPARE(rx11.matchedLength(), 3);
- rx11.setMinimal(false);
- QCOMPARE(rx11.matchedLength(), 3);
-
- rx11.setPatternSyntax(QRegExp::Wildcard);
- QCOMPARE(rx11.captureCount(), 0);
- QCOMPARE(rx11.matchedLength(), -1);
-
- rx11.setPatternSyntax(QRegExp::RegExp);
- QCOMPARE(rx11.captureCount(), 1);
- QCOMPARE(rx11.matchedLength(), -1);
-}
-
-void tst_QRegExp::swap()
-{
- QRegExp r1(QLatin1String(".*")), r2(QLatin1String("a*"));
- r1.swap(r2);
- QCOMPARE(r1.pattern(),QLatin1String("a*"));
- QCOMPARE(r2.pattern(),QLatin1String(".*"));
-}
-
-void tst_QRegExp::operator_eq()
-{
- const int I = 2;
- const int J = 4;
- const int K = 2;
- const int ELL = 2;
- QRegExp rxtable[I * J * K * ELL];
- int n;
-
- n = 0;
- for (int i = 0; i < I; ++i) {
- for (int j = 0; j < J; ++j) {
- for (int k = 0; k < K; ++k) {
- for (int ell = 0; ell < ELL; ++ell) {
- Qt::CaseSensitivity cs = i == 0 ? Qt::CaseSensitive : Qt::CaseInsensitive;
- QRegExp::PatternSyntax syntax = QRegExp::PatternSyntax(j);
- bool minimal = k == 0;
-
- if (ell == 0) {
- QRegExp rx("foo", cs, syntax);
- rx.setMinimal(minimal);
- rxtable[n++] = rx;
- } else {
- QRegExp rx;
- rx.setPattern("bar");
- rx.setMinimal(true);
- rx.exactMatch("bar");
- rx.setCaseSensitivity(cs);
- rx.setMinimal(minimal);
- rx.setPattern("foo");
- rx.setPatternSyntax(syntax);
- rx.exactMatch("foo");
- rxtable[n++] = rx;
- }
- }
- }
- }
- }
-
- for (int i = 0; i < I * J * K * ELL; ++i) {
- for (int j = 0; j < I * J * K * ELL; ++j) {
- QCOMPARE(rxtable[i] == rxtable[j], i / ELL == j / ELL);
- QCOMPARE(rxtable[i] != rxtable[j], i / ELL != j / ELL);
- // this just happens to have no hash collisions. If at some point
- // we get collisions, restrict the test to only equal elements:
- QCOMPARE(qHash(rxtable[i]) == qHash(rxtable[j]), i / ELL == j / ELL);
- }
- }
-}
-
-// This test aims to ensure that the values returned by pos() and cap()
-// are consistent.
-void tst_QRegExp::posAndCapConsistency_data()
-{
- QTest::addColumn<QString>("reStr");
- QTest::addColumn<QString>("text");
- QTest::addColumn<int>("matchIndex");
-
- QTest::addColumn<int>("pos0");
- QTest::addColumn<int>("pos1");
- QTest::addColumn<int>("pos2");
-
- QTest::addColumn<QString>("cap0");
- QTest::addColumn<QString>("cap1");
- QTest::addColumn<QString>("cap2");
-
- QTest::newRow("no match")
- << QString("(a) (b)") << QString("b a") << -1
- << -1 << -1 << -1 << QString() << QString() << QString();
-
- QTest::newRow("both captures match")
- << QString("(a) (b)") << QString("a b") << 0
- << 0 << 0 << 2 << QString("a b") << QString("a") << QString("b");
-
- QTest::newRow("first capture matches @0")
- << QString("(a*)|(b*)") << QString("axx") << 0
- << 0 << 0 << -1 << QString("a") << QString("a") << QString();
- QTest::newRow("second capture matches @0")
- << QString("(a*)|(b*)") << QString("bxx") << 0
- << 0 << -1 << 0 << QString("b") << QString() << QString("b");
- QTest::newRow("first capture empty match @0")
- << QString("(a*)|(b*)") << QString("xx") << 0
- << 0 << -1 << -1 << QString("") << QString() << QString();
- QTest::newRow("second capture empty match @0")
- << QString("(a)|(b*)") << QString("xx") << 0
- << 0 << -1 << -1 << QString("") << QString() << QString();
-
- QTest::newRow("first capture matches @1")
- << QString("x(?:(a*)|(b*))") << QString("-xa") << 1
- << 1 << 2 << -1 << QString("xa") << QString("a") << QString();
- QTest::newRow("second capture matches @1")
- << QString("x(?:(a*)|(b*))") << QString("-xb") << 1
- << 1 << -1 << 2 << QString("xb") << QString() << QString("b");
- QTest::newRow("first capture empty match @1")
- << QString("x(?:(a*)|(b*))") << QString("-xx") << 1
- << 1 << -1 << -1 << QString("x") << QString() << QString();
- QTest::newRow("second capture empty match @1")
- << QString("x(?:(a)|(b*))") << QString("-xx") << 1
- << 1 << -1 << -1 << QString("x") << QString() << QString();
-
- QTest::newRow("first capture matches @2")
- << QString("(a)|(b)") << QString("xxa") << 2
- << 2 << 2 << -1 << QString("a") << QString("a") << QString();
- QTest::newRow("second capture matches @2")
- << QString("(a)|(b)") << QString("xxb") << 2
- << 2 << -1 << 2 << QString("b") << QString() << QString("b");
- QTest::newRow("no match - with options")
- << QString("(a)|(b)") << QString("xx") << -1
- << -1 << -1 << -1 << QString() << QString() << QString();
-
-}
-
-void tst_QRegExp::posAndCapConsistency()
-{
- QFETCH( QString, reStr );
- QFETCH( QString, text );
- QFETCH( int, matchIndex );
- QFETCH( int, pos0 );
- QFETCH( int, pos1 );
- QFETCH( int, pos2 );
- QFETCH( QString, cap0 );
- QFETCH( QString, cap1 );
- QFETCH( QString, cap2 );
-
- QRegExp re(reStr);
- QCOMPARE(re.captureCount(), 2);
- QCOMPARE(re.capturedTexts().size(), 3);
-
- QCOMPARE(re.indexIn(text), matchIndex);
-
- QCOMPARE( re.pos(0), pos0 );
- QCOMPARE( re.pos(1), pos1 );
- QCOMPARE( re.pos(2), pos2 );
-
- QCOMPARE( re.cap(0).isNull(), cap0.isNull() );
- QCOMPARE( re.cap(0), cap0 );
- QCOMPARE( re.cap(1).isNull(), cap1.isNull() );
- QCOMPARE( re.cap(1), cap1 );
- QCOMPARE( re.cap(2).isNull(), cap2.isNull() );
- QCOMPARE( re.cap(2), cap2 );
-}
-
-void tst_QRegExp::interval()
-{
- {
- QRegExp exp("a{0,1}");
- QVERIFY(exp.isValid());
- }
- {
- QRegExp exp("a{1,1}");
- QVERIFY(exp.isValid());
- }
- {
- QRegExp exp("a{1,0}");
- QVERIFY(!exp.isValid());
- }
-}
-
-void tst_QRegExp::validityCheck_data()
-{
- QTest::addColumn<QString>("pattern");
- QTest::addColumn<bool>("validity");
- QTest::newRow("validity01") << QString() << true;
- QTest::newRow("validity02") << QString("abc.*abc") << true;
- QTest::newRow("validity03") << QString("[a-z") << false;
- QTest::newRow("validity04") << QString("a(b") << false;
-}
-
-void tst_QRegExp::validityCheck()
-{
- QFETCH(QString, pattern);
-
- QRegExp rx(pattern);
- QTEST(rx.isValid(), "validity");
- QCOMPARE(rx.matchedLength(), -1);
- QCOMPARE(rx.pos(), -1);
- QCOMPARE(rx.cap(), QString(""));
-
- QRegExp rx2(rx);
- QTEST(rx2.isValid(), "validity");
- QCOMPARE(rx2.matchedLength(), -1);
- QCOMPARE(rx2.pos(), -1);
- QCOMPARE(rx2.cap(), QString(""));
-}
-
-void tst_QRegExp::escapeSequences()
-{
- QString perlSyntaxSpecialChars("0123456789afnrtvbBdDwWsSx\\|[]{}()^$?+*");
- QString w3cXmlSchema11SyntaxSpecialChars("cCiIpP"); // as well as the perl ones
- QString pattern = QLatin1String("\\?");
- for (int i = ' '; i <= 127; ++i) {
- QLatin1Char c(i);
- if (perlSyntaxSpecialChars.indexOf(c) == -1) {
- pattern[1] = c;
- QRegExp rx(pattern, Qt::CaseSensitive, QRegExp::RegExp);
- // we'll never have c == 'a' since it's a special character
- const QString s = QLatin1String("aaa") + c + QLatin1String("aaa");
- QCOMPARE(rx.indexIn(s), 3);
-
- rx.setPatternSyntax(QRegExp::RegExp2);
- QCOMPARE(rx.indexIn(s), 3);
-
- if (w3cXmlSchema11SyntaxSpecialChars.indexOf(c) == -1) {
- rx.setPatternSyntax(QRegExp::W3CXmlSchema11);
- QCOMPARE(rx.indexIn(s), 3);
- }
- }
- }
-}
-
-void tst_QRegExp::splitString_data()
-{
- QTest::addColumn<QString>("string");
- QTest::addColumn<QString>("pattern");
- QTest::addColumn<QStringList>("result");
-
- QTest::newRow("data01") << "Some text\n\twith strange whitespace."
- << "\\s+"
- << (QStringList() << "Some" << "text" << "with" << "strange" << "whitespace." );
-
- QTest::newRow("data02") << "This time, a normal English sentence."
- << "\\W+"
- << (QStringList() << "This" << "time" << "a" << "normal" << "English" << "sentence" << "");
-
- QTest::newRow("data03") << "Now: this sentence fragment."
- << "\\b"
- << (QStringList() << "" << "Now" << ": " << "this" << " " << "sentence" << " " << "fragment" << ".");
-}
-
-void tst_QRegExp::splitString()
-{
- QFETCH(QString, string);
- QFETCH(QString, pattern);
- QFETCH(QStringList, result);
- QStringList list = QRegExp(pattern).splitString(string);
- QVERIFY(list == result);
-
- QVERIFY(list == result);
-
- result.removeAll(QString());
-
- list = QRegExp(pattern).splitString(string, Qt::SkipEmptyParts);
- QVERIFY(list == result);
-}
-
-void tst_QRegExp::countIn()
-{
- QString a;
- a="ABCDEFGHIEfGEFG"; // 15 chars
- QCOMPARE(QRegExp("[FG][HI]").countIn(a),1);
- QCOMPARE(QRegExp("[G][HE]").countIn(a),2);
-}
-
-
-void tst_QRegExp::containedIn()
-{
- QString a;
- a="ABCDEFGHIEfGEFG"; // 15 chars
- QVERIFY(QRegExp("[FG][HI]").containedIn(a));
- QVERIFY(QRegExp("[G][HE]").containedIn(a));
-}
-
-void tst_QRegExp::replaceIn_data()
-{
- QTest::addColumn<QString>("string" );
- QTest::addColumn<QString>("regexp" );
- QTest::addColumn<QString>("after" );
- QTest::addColumn<QString>("result" );
-
- QTest::newRow( "rem00" ) << QString("alpha") << QString("a+") << QString("") << QString("lph");
- QTest::newRow( "rem01" ) << QString("banana") << QString("^.a") << QString("") << QString("nana");
- QTest::newRow( "rem02" ) << QString("") << QString("^.a") << QString("") << QString("");
- QTest::newRow( "rem03" ) << QString("") << QString("^.a") << QString() << QString("");
- QTest::newRow( "rem04" ) << QString() << QString("^.a") << QString("") << QString();
- QTest::newRow( "rem05" ) << QString() << QString("^.a") << QString() << QString();
-
- QTest::newRow( "rep00" ) << QString("A <i>bon mot</i>.") << QString("<i>([^<]*)</i>") << QString("\\emph{\\1}") << QString("A \\emph{bon mot}.");
- QTest::newRow( "rep01" ) << QString("banana") << QString("^.a()") << QString("\\1") << QString("nana");
- QTest::newRow( "rep02" ) << QString("banana") << QString("(ba)") << QString("\\1X\\1") << QString("baXbanana");
- QTest::newRow( "rep03" ) << QString("banana") << QString("(ba)(na)na") << QString("\\2X\\1") << QString("naXba");
-
- QTest::newRow("backref00") << QString("\\1\\2\\3\\4\\5\\6\\7\\8\\9\\A\\10\\11") << QString("\\\\[34]")
- << QString("X") << QString("\\1\\2XX\\5\\6\\7\\8\\9\\A\\10\\11");
- QTest::newRow("backref01") << QString("foo") << QString("[fo]") << QString("\\1") << QString("\\1\\1\\1");
- QTest::newRow("backref02") << QString("foo") << QString("([fo])") << QString("(\\1)") << QString("(f)(o)(o)");
- QTest::newRow("backref03") << QString("foo") << QString("([fo])") << QString("\\2") << QString("\\2\\2\\2");
- QTest::newRow("backref04") << QString("foo") << QString("([fo])") << QString("\\10") << QString("f0o0o0");
- QTest::newRow("backref05") << QString("foo") << QString("([fo])") << QString("\\11") << QString("f1o1o1");
- QTest::newRow("backref06") << QString("foo") << QString("([fo])") << QString("\\19") << QString("f9o9o9");
- QTest::newRow("backref07") << QString("foo") << QString("(f)(o+)")
- << QString("\\2\\1\\10\\20\\11\\22\\19\\29\\3")
- << QString("ooff0oo0f1oo2f9oo9\\3");
- QTest::newRow("backref08") << QString("abc") << QString("(((((((((((((([abc]))))))))))))))")
- << QString("{\\14}") << QString("{a}{b}{c}");
- QTest::newRow("backref09") << QString("abcdefghijklmn")
- << QString("(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)(m)(n)")
- << QString("\\19\\18\\17\\16\\15\\14\\13\\12\\11\\10"
- "\\9\\90\\8\\80\\7\\70\\6\\60\\5\\50\\4\\40\\3\\30\\2\\20\\1")
- << QString("a9a8a7a6a5nmlkjii0hh0gg0ff0ee0dd0cc0bb0a");
- QTest::newRow("backref10") << QString("abc") << QString("((((((((((((((abc))))))))))))))")
- << QString("\\0\\01\\011") << QString("\\0\\01\\011");
- QTest::newRow("invalid") << QString("") << QString("invalid regex\\") << QString("") << QString("");
-}
-
-void tst_QRegExp::replaceIn()
-{
- QFETCH( QString, string );
- QFETCH( QString, regexp );
- QFETCH( QString, after );
-
- QString s2 = string;
- s2 = QRegExp(regexp).replaceIn(s2, after);
- QTEST( s2, "result" );
- s2 = string;
-}
-
-void tst_QRegExp::removeIn_data()
-{
- replaceIn_data();
-}
-
-void tst_QRegExp::removeIn()
-{
- QFETCH( QString, string );
- QFETCH( QString, regexp );
- QFETCH( QString, after );
-
- if ( after.length() == 0 ) {
- QString s2 = string;
- s2 = QRegExp(regexp).removeIn(s2);
- QTEST( s2, "result" );
- } else {
- QCOMPARE( 0, 0 ); // shut Qt Test
- }
-}
-
-void tst_QRegExp::filterList()
-{
- QStringList list3, list4;
- list3 << "Bill Gates" << "Joe Blow" << "Bill Clinton";
- list3 = QRegExp("[i]ll") .filterList(list3);
- list4 << "Bill Gates" << "Bill Clinton";
- QCOMPARE( list3, list4 );
-}
-
-void tst_QRegExp::replaceInList()
-{
- QStringList list3, list4;
- list3 << "alpha" << "beta" << "gamma" << "epsilon";
- list3 = QRegExp("^a").replaceIn(list3, "o");
- list4 << "olpha" << "beta" << "gamma" << "epsilon";
- QCOMPARE( list3, list4 );
-
- QStringList list5, list6;
- list5 << "Bill Clinton" << "Gates, Bill";
- list6 << "Bill Clinton" << "Bill Gates";
- list5 = QRegExp("^(.*), (.*)$").replaceIn(list5, "\\2 \\1");
- QCOMPARE( list5, list6 );
-}
-
-static QRegExp QRegExpData(int index)
-{
- switch (index) {
- case 0: return QRegExp();
- case 1: return QRegExp("");
- case 2: return QRegExp("A", Qt::CaseInsensitive);
- case 3: return QRegExp("ABCDE FGHI", Qt::CaseSensitive, QRegExp::Wildcard);
- case 4: return QRegExp("This is a long string", Qt::CaseInsensitive, QRegExp::FixedString);
- case 5: return QRegExp("And again a string with a \nCRLF", Qt::CaseInsensitive, QRegExp::RegExp);
- case 6:
- {
- QRegExp rx("abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRESTUVWXYZ 1234567890 ~`!@#$%^&*()_-+={[}]|\\:;\"'<,>.?/");
- rx.setMinimal(true);
- return rx;
- }
- }
- return QRegExp("foo");
-}
-#define MAX_QREGEXP_DATA 7
-
-void tst_QRegExp::datastream_data()
-{
- QTest::addColumn<QString>("device");
- QTest::addColumn<QString>("byteOrder");
-
- const char * const devices[] = {
- "file",
- "bytearray",
- "buffer",
- 0
- };
- for (int d=0; devices[d] != 0; d++) {
- QString device = devices[d];
- for (int b=0; b<2; b++) {
- QString byte_order = b == 0 ? "BigEndian" : "LittleEndian";
-
- QString tag = device + QLatin1Char('_') + byte_order;
- for (int e = 0; e < MAX_QREGEXP_DATA; e++) {
- QTest::newRow(qPrintable(tag + QLatin1Char('_') + QString::number(e))) << device << byte_order;
- }
- }
- }
-}
-
-static int dataIndex(const QString &tag)
-{
- int pos = tag.lastIndexOf(QLatin1Char('_'));
- if (pos >= 0) {
- int ret = 0;
- QString count = tag.mid(pos + 1);
- bool ok;
- ret = count.toInt(&ok);
- if (ok)
- return ret;
- }
- return -1;
-}
-
-void tst_QRegExp::datastream()
-{
- QFETCH(QString, device); \
-
- qRegisterMetaTypeStreamOperators<QRegExp>("QRegExp");
-
- if (device == "bytearray") { \
- QByteArray ba; \
- QDataStream sout(&ba, QIODevice::WriteOnly); \
- writeQRegExp(&sout); \
- QDataStream sin(&ba, QIODevice::ReadOnly); \
- readQRegExp(&sin); \
- } else if (device == "file") { \
- QString fileName = "qdatastream.out"; \
- QFile fOut(fileName); \
- QVERIFY(fOut.open(QIODevice::WriteOnly)); \
- QDataStream sout(&fOut); \
- writeQRegExp(&sout); \
- fOut.close(); \
- QFile fIn(fileName); \
- QVERIFY(fIn.open(QIODevice::ReadOnly)); \
- QDataStream sin(&fIn); \
- readQRegExp(&sin); \
- fIn.close(); \
- } else if (device == "buffer") { \
- QByteArray ba(10000, '\0'); \
- QBuffer bOut(&ba); \
- bOut.open(QIODevice::WriteOnly); \
- QDataStream sout(&bOut); \
- writeQRegExp(&sout); \
- bOut.close(); \
- QBuffer bIn(&ba); \
- bIn.open(QIODevice::ReadOnly); \
- QDataStream sin(&bIn); \
- readQRegExp(&sin); \
- bIn.close(); \
- }
-}
-
-static void saveQVariantFromDataStream(const QString &fileName, QDataStream::Version version)
-{
-
- QFile file(fileName);
- QVERIFY(file.open(QIODevice::ReadOnly));
- QDataStream dataFileStream(&file);
-
- QString typeName;
- dataFileStream >> typeName;
- QByteArray data = file.readAll();
- const int id = QMetaType::type(typeName.toLatin1());
-
- QBuffer buffer;
- buffer.open(QIODevice::ReadWrite);
- QDataStream stream(&buffer);
- stream.setVersion(version);
-
- QVariant constructedVariant(static_cast<QVariant::Type>(id));
- QCOMPARE(constructedVariant.userType(), id);
- stream << constructedVariant;
-
- // We are testing QVariant there is no point in testing full array.
- QCOMPARE(buffer.data().left(5), data.left(5));
-
- buffer.seek(0);
- QVariant recunstructedVariant;
- stream >> recunstructedVariant;
- QCOMPARE(recunstructedVariant.userType(), constructedVariant.userType());
-}
-
-void tst_QRegExp::datastream2()
-{
- saveQVariantFromDataStream(QLatin1String(":/data/qdatastream_4.9.bin"), QDataStream::Qt_4_9);
- saveQVariantFromDataStream(QLatin1String(":/data/qdatastream_5.0.bin"), QDataStream::Qt_5_0);
-}
-
-void tst_QRegExp::writeQRegExp(QDataStream* s)
-{
- QRegExp test(QRegExpData(dataIndex(QTest::currentDataTag())));
- *s << test;
- *s << QString("Her er det noe tekst");
- *s << test;
- *s << QString("nonempty");
- *s << test;
- *s << QVariant(test);
-}
-
-void tst_QRegExp::readQRegExp(QDataStream *s)
-{
- QRegExp R;
- QString S;
- QVariant V;
- QRegExp test(QRegExpData(dataIndex(QTest::currentDataTag())));
-
- *s >> R;
- QCOMPARE(R, test);
- *s >> S;
- QCOMPARE(S, QString("Her er det noe tekst"));
- *s >> R;
- QCOMPARE(R, test);
- *s >> S;
- QCOMPARE(S, QString("nonempty"));
- *s >> R;
- QCOMPARE(R, test);
- *s >> V;
- QCOMPARE(V.userType(), qMetaTypeId<QRegExp>());
- QCOMPARE(qvariant_cast<QRegExp>(V), test);
-}
-
-
-QTEST_APPLESS_MAIN(tst_QRegExp)
-#include "tst_qregexp.moc"
diff --git a/tests/auto/corelib/text/qstring/tst_qstring.cpp b/tests/auto/corelib/text/qstring/tst_qstring.cpp
index 376a4cb5dc..e0d5180b78 100644
--- a/tests/auto/corelib/text/qstring/tst_qstring.cpp
+++ b/tests/auto/corelib/text/qstring/tst_qstring.cpp
@@ -1631,7 +1631,7 @@ void tst_QString::lastIndexOf()
QCOMPARE(haystack.lastIndexOf(needle.toLatin1().data(), from, cs), expected);
if (from >= -1 && from < haystack.size()) {
- // unfortunately, QString and QRegExp don't have the same out of bound semantics
+ // unfortunately, QString and QRegularExpression don't have the same out of bound semantics
// I think QString is wrong -- See file log for contact information.
{
QRegularExpression::PatternOptions options = QRegularExpression::NoPatternOption;
diff --git a/tests/auto/corelib/text/text.pro b/tests/auto/corelib/text/text.pro
index 272bcb1013..0e89ea0850 100644
--- a/tests/auto/corelib/text/text.pro
+++ b/tests/auto/corelib/text/text.pro
@@ -11,7 +11,6 @@ SUBDIRS = \
qcollator \
qlatin1string \
qlocale \
- qregexp \
qregularexpression \
qstring \
qstring_no_cast_from_bytearray \
diff --git a/tests/benchmarks/corelib/text/qregexp/CMakeLists.txt b/tests/benchmarks/corelib/text/qregexp/CMakeLists.txt
deleted file mode 100644
index 1646f4778a..0000000000
--- a/tests/benchmarks/corelib/text/qregexp/CMakeLists.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-# Generated from qregexp.pro.
-
-#####################################################################
-## tst_bench_qregexp Binary:
-#####################################################################
-
-qt_add_benchmark(tst_bench_qregexp
- EXCEPTIONS
- SOURCES
- main.cpp
- PUBLIC_LIBRARIES
- Qt::Test
-)
-
-# Resources:
-set(qregexp_resource_files
- "main.cpp"
-)
-
-qt_add_resource(tst_bench_qregexp "qregexp"
- PREFIX
- "/"
- FILES
- ${qregexp_resource_files}
-)
-
-
-#### Keys ignored in scope 1:.:.:qregexp.pro:<TRUE>:
-# TEMPLATE = "app"
-
-## Scopes:
-#####################################################################
-
-qt_extend_target(tst_bench_qregexp CONDITION TARGET Qt::Script AND NOT pcre
- DEFINES
- HAVE_JSC
- PUBLIC_LIBRARIES
- Qt::Script
-)
-
-qt_extend_target(tst_bench_qregexp CONDITION (NOT QNX) AND (EXISTS _ss_[QT_SYSROOT]/usr/include/boost/regex.hpp)
- DEFINES
- HAVE_BOOST
- PUBLIC_LIBRARIES
- boost_regex
-)
diff --git a/tests/benchmarks/corelib/text/qregexp/main.cpp b/tests/benchmarks/corelib/text/qregexp/main.cpp
deleted file mode 100644
index 798b23f2b0..0000000000
--- a/tests/benchmarks/corelib/text/qregexp/main.cpp
+++ /dev/null
@@ -1,615 +0,0 @@
-/****************************************************************************
-**
-** Copyright (C) 2016 The Qt Company Ltd.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the test suite of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:GPL-EXCEPT$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 3 as published by the Free Software
-** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
-
-#include <QDebug>
-#include <QRegExp>
-#include <QString>
-#include <QFile>
-
-#include <qtest.h>
-#ifdef HAVE_BOOST
-#include <boost/regex.hpp>
-#endif
-
-#ifdef HAVE_JSC
-#include <QtScript>
-#include "pcre/pcre.h"
-#endif
-#define ZLIB_VERSION "1.2.3.4"
-
-class tst_qregexp : public QObject
-{
- Q_OBJECT
-public:
- tst_qregexp();
-private slots:
- void escape_old();
- void escape_old_data() { escape_data(); }
- void escape_new1();
- void escape_new1_data() { escape_data(); }
- void escape_new2();
- void escape_new2_data() { escape_data(); }
- void escape_new3();
- void escape_new3_data() { escape_data(); }
- void escape_new4();
- void escape_new4_data() { escape_data(); }
-/*
- JSC outperforms everything.
- Boost is less impressive then expected.
- */
- void simpleFind1();
- void rangeReplace1();
- void matchReplace1();
-
- void simpleFind2();
- void rangeReplace2();
- void matchReplace2();
-
- void simpleFindJSC();
- void rangeReplaceJSC();
- void matchReplaceJSC();
-
- void simpleFindBoost();
- void rangeReplaceBoost();
- void matchReplaceBoost();
-
-/* those apply an (incorrect) regexp on entire source
- (this main.cpp). JSC appears to handle this
- (ab)use case best. QRegExp performs extremly bad.
- */
- void horribleWrongReplace1();
- void horribleReplace1();
- void horribleReplace2();
- void horribleWrongReplace2();
- void horribleWrongReplaceJSC();
- void horribleReplaceJSC();
- void horribleWrongReplaceBoost();
- void horribleReplaceBoost();
-private:
- QString str1;
- QString str2;
- void escape_data();
-};
-
-tst_qregexp::tst_qregexp()
- :QObject()
- ,str1("We are all happy monkeys")
-{
- QFile f(":/main.cpp");
- f.open(QFile::ReadOnly);
- str2=f.readAll();
-}
-
-static void verify(const QString &quoted, const QString &expected)
-{
- if (quoted != expected)
- qDebug() << "ERROR:" << quoted << expected;
-}
-
-void tst_qregexp::escape_data()
-{
- QTest::addColumn<QString>("pattern");
- QTest::addColumn<QString>("expected");
-
- QTest::newRow("escape 0") << "Hello world" << "Hello world";
- QTest::newRow("escape 1") << "(Hello world)" << "\\(Hello world\\)";
- {
- QString s;
- for (int i = 0; i < 10; ++i)
- s += "(escape)";
- QTest::newRow("escape 10") << s << QRegExp::escape(s);
- }
- {
- QString s;
- for (int i = 0; i < 100; ++i)
- s += "(escape)";
- QTest::newRow("escape 100") << s << QRegExp::escape(s);
- }
-}
-
-void tst_qregexp::escape_old()
-{
- QFETCH(QString, pattern);
- QFETCH(QString, expected);
-
- QBENCHMARK {
- static const char meta[] = "$()*+.?[\\]^{|}";
- QString quoted = pattern;
- int i = 0;
-
- while (i < quoted.length()) {
- if (strchr(meta, quoted.at(i).toLatin1()) != 0)
- quoted.insert(i++, QLatin1Char('\\'));
- ++i;
- }
-
- verify(quoted, expected);
- }
-}
-
-void tst_qregexp::escape_new1()
-{
- QFETCH(QString, pattern);
- QFETCH(QString, expected);
-
- QBENCHMARK {
- QString quoted;
- const int count = pattern.count();
- quoted.reserve(count * 2);
- const QLatin1Char backslash('\\');
- for (int i = 0; i < count; i++) {
- switch (pattern.at(i).toLatin1()) {
- case '$':
- case '(':
- case ')':
- case '*':
- case '+':
- case '.':
- case '?':
- case '[':
- case '\\':
- case ']':
- case '^':
- case '{':
- case '|':
- case '}':
- quoted.append(backslash);
- }
- quoted.append(pattern.at(i));
- }
- verify(quoted, expected);
- }
-}
-
-void tst_qregexp::escape_new2()
-{
- QFETCH(QString, pattern);
- QFETCH(QString, expected);
-
- QBENCHMARK {
- int count = pattern.count();
- const QLatin1Char backslash('\\');
- QString quoted(count * 2, backslash);
- const QChar *patternData = pattern.data();
- QChar *quotedData = quoted.data();
- int escaped = 0;
- for ( ; --count >= 0; ++patternData) {
- const QChar c = *patternData;
- switch (c.unicode()) {
- case '$':
- case '(':
- case ')':
- case '*':
- case '+':
- case '.':
- case '?':
- case '[':
- case '\\':
- case ']':
- case '^':
- case '{':
- case '|':
- case '}':
- ++escaped;
- ++quotedData;
- }
- *quotedData = c;
- ++quotedData;
- }
- quoted.resize(pattern.size() + escaped);
-
- verify(quoted, expected);
- }
-}
-
-void tst_qregexp::escape_new3()
-{
- QFETCH(QString, pattern);
- QFETCH(QString, expected);
-
- QBENCHMARK {
- QString quoted;
- const int count = pattern.count();
- quoted.reserve(count * 2);
- const QLatin1Char backslash('\\');
- for (int i = 0; i < count; i++) {
- switch (pattern.at(i).toLatin1()) {
- case '$':
- case '(':
- case ')':
- case '*':
- case '+':
- case '.':
- case '?':
- case '[':
- case '\\':
- case ']':
- case '^':
- case '{':
- case '|':
- case '}':
- quoted += backslash;
- }
- quoted += pattern.at(i);
- }
-
- verify(quoted, expected);
- }
-}
-
-
-static inline bool needsEscaping(int c)
-{
- switch (c) {
- case '$':
- case '(':
- case ')':
- case '*':
- case '+':
- case '.':
- case '?':
- case '[':
- case '\\':
- case ']':
- case '^':
- case '{':
- case '|':
- case '}':
- return true;
- }
- return false;
-}
-
-void tst_qregexp::escape_new4()
-{
- QFETCH(QString, pattern);
- QFETCH(QString, expected);
-
- QBENCHMARK {
- const int n = pattern.size();
- const QChar *patternData = pattern.data();
- // try to prevent copy if no escape is needed
- int i = 0;
- for (int i = 0; i != n; ++i) {
- const QChar c = patternData[i];
- if (needsEscaping(c.unicode()))
- break;
- }
- if (i == n) {
- verify(pattern, expected);
- // no escaping needed, "return pattern" should be done here.
- return;
- }
- const QLatin1Char backslash('\\');
- QString quoted(n * 2, backslash);
- QChar *quotedData = quoted.data();
- for (int j = 0; j != i; ++j)
- *quotedData++ = *patternData++;
- int escaped = 0;
- for (; i != n; ++i) {
- const QChar c = *patternData;
- if (needsEscaping(c.unicode())) {
- ++escaped;
- ++quotedData;
- }
- *quotedData = c;
- ++quotedData;
- ++patternData;
- }
- quoted.resize(n + escaped);
- verify(quoted, expected);
- // "return quoted"
- }
-}
-
-
-void tst_qregexp::simpleFind1()
-{
- int roff;
- QRegExp rx("happy");
- rx.setPatternSyntax(QRegExp::RegExp);
- QBENCHMARK{
- roff = rx.indexIn(str1);
- }
- QCOMPARE(roff, 11);
-}
-
-void tst_qregexp::rangeReplace1()
-{
- QString r;
- QRegExp rx("[a-f]");
- rx.setPatternSyntax(QRegExp::RegExp);
- QBENCHMARK{
- r = QString(str1).replace(rx, "-");
- }
- QCOMPARE(r, QString("W- -r- -ll h-ppy monk-ys"));
-}
-
-void tst_qregexp::matchReplace1()
-{
- QString r;
- QRegExp rx("[^a-f]*([a-f]+)[^a-f]*");
- rx.setPatternSyntax(QRegExp::RegExp);
- QBENCHMARK{
- r = QString(str1).replace(rx, "\\1");
- }
- QCOMPARE(r, QString("eaeaae"));
-}
-
-void tst_qregexp::horribleWrongReplace1()
-{
- QString r;
- QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*");
- rx.setPatternSyntax(QRegExp::RegExp);
- QBENCHMARK{
- r = QString(str2).replace(rx, "\\1.\\2.\\3");
- }
- QCOMPARE(r, str2);
-}
-
-void tst_qregexp::horribleReplace1()
-{
- QString r;
- QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*");
- rx.setPatternSyntax(QRegExp::RegExp);
- QBENCHMARK{
- r = QString(str2).replace(rx, "\\1.\\2.\\3");
- }
- QCOMPARE(r, QString("1.2.3"));
-}
-
-
-void tst_qregexp::simpleFind2()
-{
- int roff;
- QRegExp rx("happy");
- rx.setPatternSyntax(QRegExp::RegExp2);
- QBENCHMARK{
- roff = rx.indexIn(str1);
- }
- QCOMPARE(roff, 11);
-}
-
-void tst_qregexp::rangeReplace2()
-{
- QString r;
- QRegExp rx("[a-f]");
- rx.setPatternSyntax(QRegExp::RegExp2);
- QBENCHMARK{
- r = QString(str1).replace(rx, "-");
- }
- QCOMPARE(r, QString("W- -r- -ll h-ppy monk-ys"));
-}
-
-void tst_qregexp::matchReplace2()
-{
- QString r;
- QRegExp rx("[^a-f]*([a-f]+)[^a-f]*");
- rx.setPatternSyntax(QRegExp::RegExp2);
- QBENCHMARK{
- r = QString(str1).replace(rx, "\\1");
- }
- QCOMPARE(r, QString("eaeaae"));
-}
-
-void tst_qregexp::horribleWrongReplace2()
-{
- QString r;
- QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*");
- rx.setPatternSyntax(QRegExp::RegExp2);
- QBENCHMARK{
- r = QString(str2).replace(rx, "\\1.\\2.\\3");
- }
- QCOMPARE(r, str2);
-}
-
-void tst_qregexp::horribleReplace2()
-{
- QString r;
- QRegExp rx(".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*");
- rx.setPatternSyntax(QRegExp::RegExp2);
- QBENCHMARK{
- r = QString(str2).replace(rx, "\\1.\\2.\\3");
- }
- QCOMPARE(r, QString("1.2.3"));
-}
-void tst_qregexp::simpleFindJSC()
-{
-#ifdef HAVE_JSC
- int numr;
- const char * errmsg=" ";
- QString rxs("happy");
- JSRegExp *rx = jsRegExpCompile(rxs.utf16(), rxs.length(), JSRegExpDoNotIgnoreCase, JSRegExpSingleLine, 0, &errmsg);
- QVERIFY(rx != 0);
- QString s(str1);
- int offsetVector[3];
- QBENCHMARK{
- numr = jsRegExpExecute(rx, s.utf16(), s.length(), 0, offsetVector, 3);
- }
- jsRegExpFree(rx);
- QCOMPARE(numr, 1);
- QCOMPARE(offsetVector[0], 11);
-#else
- QSKIP("JSC is not enabled for this platform");
-#endif
-}
-
-void tst_qregexp::rangeReplaceJSC()
-{
-#ifdef HAVE_JSC
- QScriptValue r;
- QScriptEngine engine;
- engine.globalObject().setProperty("s", str1);
- QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/[a-f]/g, '-') } )");
- QVERIFY(replaceFunc.isFunction());
- QBENCHMARK{
- r = replaceFunc.call(QScriptValue());
- }
- QCOMPARE(r.toString(), QString("W- -r- -ll h-ppy monk-ys"));
-#else
- QSKIP("JSC is not enabled for this platform");
-#endif
-}
-
-void tst_qregexp::matchReplaceJSC()
-{
-#ifdef HAVE_JSC
- QScriptValue r;
- QScriptEngine engine;
- engine.globalObject().setProperty("s", str1);
- QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/[^a-f]*([a-f]+)[^a-f]*/g, '$1') } )");
- QVERIFY(replaceFunc.isFunction());
- QBENCHMARK{
- r = replaceFunc.call(QScriptValue());
- }
- QCOMPARE(r.toString(), QString("eaeaae"));
-#else
- QSKIP("JSC is not enabled for this platform");
-#endif
-}
-
-void tst_qregexp::horribleWrongReplaceJSC()
-{
-#ifdef HAVE_JSC
- QScriptValue r;
- QScriptEngine engine;
- engine.globalObject().setProperty("s", str2);
- QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/.*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*/gm, '$1.$2.$3') } )");
- QVERIFY(replaceFunc.isFunction());
- QBENCHMARK{
- r = replaceFunc.call(QScriptValue());
- }
- QCOMPARE(r.toString(), str2);
-#else
- QSKIP("JSC is not enabled for this platform");
-#endif
-}
-
-void tst_qregexp::horribleReplaceJSC()
-{
-#ifdef HAVE_JSC
- QScriptValue r;
- QScriptEngine engine;
- // the m flag doesn't actually work here; dunno
- engine.globalObject().setProperty("s", str2.replace('\n', ' '));
- QScriptValue replaceFunc = engine.evaluate("(function() { return s.replace(/.*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*/gm, '$1.$2.$3') } )");
- QVERIFY(replaceFunc.isFunction());
- QBENCHMARK{
- r = replaceFunc.call(QScriptValue());
- }
- QCOMPARE(r.toString(), QString("1.2.3"));
-#else
- QSKIP("JSC is not enabled for this platform");
-#endif
-}
-
-void tst_qregexp::simpleFindBoost()
-{
-#ifdef HAVE_BOOST
- int roff;
- boost::regex rx ("happy", boost::regex_constants::perl);
- std::string s = str1.toStdString();
- std::string::const_iterator start, end;
- start = s.begin();
- end = s.end();
- boost::match_flag_type flags = boost::match_default;
- QBENCHMARK{
- boost::match_results<std::string::const_iterator> what;
- regex_search(start, end, what, rx, flags);
- roff = (what[0].first)-start;
- }
- QCOMPARE(roff, 11);
-#else
- QSKIP("Boost is not enabled for this platform");
-#endif
-
-}
-
-void tst_qregexp::rangeReplaceBoost()
-{
-#ifdef HAVE_BOOST
- boost::regex pattern ("[a-f]", boost::regex_constants::perl);
- std::string s = str1.toStdString();
- std::string r;
- QBENCHMARK{
- r = boost::regex_replace (s, pattern, "-");
- }
- QCOMPARE(r, std::string("W- -r- -ll h-ppy monk-ys"));
-#else
- QSKIP("Boost is not enabled for this platform");
-#endif
-}
-
-void tst_qregexp::matchReplaceBoost()
-{
-#ifdef HAVE_BOOST
- boost::regex pattern ("[^a-f]*([a-f]+)[^a-f]*",boost::regex_constants::perl);
- std::string s = str1.toStdString();
- std::string r;
- QBENCHMARK{
- r = boost::regex_replace (s, pattern, "$1");
- }
- QCOMPARE(r, std::string("eaeaae"));
-#else
- QSKIP("Boost is not enabled for this platform");
-#endif
-}
-
-void tst_qregexp::horribleWrongReplaceBoost()
-{
-#ifdef HAVE_BOOST
- boost::regex pattern (".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+)\".*", boost::regex_constants::perl);
- std::string s = str2.toStdString();
- std::string r;
- QBENCHMARK{
- r = boost::regex_replace (s, pattern, "$1.$2.$3");
- }
- QCOMPARE(r, s);
-#else
- QSKIP("Boost is not enabled for this platform");
-#endif
-}
-
-void tst_qregexp::horribleReplaceBoost()
-{
-#ifdef HAVE_BOOST
- boost::regex pattern (".*#""define ZLIB_VERSION \"([0-9]+)\\.([0-9]+)\\.([0-9]+).*", boost::regex_constants::perl);
- std::string s = str2.toStdString();
- std::string r;
- QBENCHMARK{
- r = boost::regex_replace (s, pattern, "$1.$2.$3");
- }
- QCOMPARE(r, std::string("1.2.3"));
-#else
- QSKIP("Boost is not enabled for this platform");
-#endif
-}
-
-QTEST_MAIN(tst_qregexp)
-
-#include "main.moc"
diff --git a/tests/benchmarks/corelib/text/qregexp/qregexp.pro b/tests/benchmarks/corelib/text/qregexp/qregexp.pro
deleted file mode 100644
index c04c13060b..0000000000
--- a/tests/benchmarks/corelib/text/qregexp/qregexp.pro
+++ /dev/null
@@ -1,20 +0,0 @@
-TEMPLATE = app
-CONFIG += benchmark
-CONFIG += exceptions
-QT = core testlib
-
-TARGET = tst_bench_qregexp
-SOURCES += main.cpp
-RESOURCES += qregexp.qrc
-
-qtHaveModule(script):!pcre {
- DEFINES += HAVE_JSC
- QT += script
-}
-
-!qnx {
- exists($$[QT_SYSROOT]/usr/include/boost/regex.hpp) {
- DEFINES += HAVE_BOOST
- LIBS += -lboost_regex
- }
-}
diff --git a/tests/benchmarks/corelib/text/qregexp/qregexp.qrc b/tests/benchmarks/corelib/text/qregexp/qregexp.qrc
deleted file mode 100644
index a7fe13c035..0000000000
--- a/tests/benchmarks/corelib/text/qregexp/qregexp.qrc
+++ /dev/null
@@ -1,6 +0,0 @@
-<!DOCTYPE RCC><RCC version="1.0">
-<qresource>
- <file>main.cpp</file>
-</qresource>
-</RCC>
-