summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--dist/changes-5.0.05
-rw-r--r--doc/src/network/files-and-resources/datastreamformat.qdoc5
-rw-r--r--doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp289
-rw-r--r--src/corelib/tools/qregularexpression.cpp2022
-rw-r--r--src/corelib/tools/qregularexpression.h245
-rw-r--r--src/corelib/tools/tools.pri8
-rw-r--r--tests/auto/corelib/tools/qregularexpression/.gitignore1
-rw-r--r--tests/auto/corelib/tools/qregularexpression/qregularexpression.pro4
-rw-r--r--tests/auto/corelib/tools/qregularexpression/tst_qregularexpression.cpp1150
-rw-r--r--tests/auto/corelib/tools/tools.pro1
10 files changed, 3730 insertions, 0 deletions
diff --git a/dist/changes-5.0.0 b/dist/changes-5.0.0
index 93f8543fd8..dd0a0693ed 100644
--- a/dist/changes-5.0.0
+++ b/dist/changes-5.0.0
@@ -317,6 +317,11 @@ QtCore
* QIntValidator and QDoubleValidator no longer fall back to using the C locale if
the requested locale fails to validate the input.
+* A new set of classes for doing pattern matching with Perl-compatible regular
+ expressions has been added: QRegularExpression, QRegularExpressionMatch and
+ QRegularExpressionMatchIterator. They aim to replace QRegExp with a more
+ powerful and flexible regular expression engine.
+
QtGui
-----
* Accessibility has been refactored. The hierachy of accessible objects is implemented via
diff --git a/doc/src/network/files-and-resources/datastreamformat.qdoc b/doc/src/network/files-and-resources/datastreamformat.qdoc
index 17a0044a69..8ff31371a2 100644
--- a/doc/src/network/files-and-resources/datastreamformat.qdoc
+++ b/doc/src/network/files-and-resources/datastreamformat.qdoc
@@ -298,6 +298,11 @@
\li Regular expression syntax (quint8)
\li Minimal matching (quint8)
\endlist
+ \row \li QRegularExpression
+ \li \list
+ \li The regular expression pattern (QString)
+ \li The pattern options (quint32)
+ \endlist
\row \li QRegion
\li \list
\li The size of the data, i.e. 8 + 16 * (number of rectangles) (quint32)
diff --git a/doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp b/doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp
new file mode 100644
index 0000000000..cab89d9c9e
--- /dev/null
+++ b/doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp
@@ -0,0 +1,289 @@
+/****************************************************************************
+**
+** Copyright (C) 2012 Giuseppe D'Angelo <dangelog@gmail.com>.
+** Contact: http://www.qt-project.org/
+**
+** This file is part of the documentation of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:BSD$
+** You may use this file under the terms of the BSD license as follows:
+**
+** "Redistribution and use in source and binary forms, with or without
+** modification, are permitted provided that the following conditions are
+** met:
+** * Redistributions of source code must retain the above copyright
+** notice, this list of conditions and the following disclaimer.
+** * Redistributions in binary form must reproduce the above copyright
+** notice, this list of conditions and the following disclaimer in
+** the documentation and/or other materials provided with the
+** distribution.
+** * Neither the name of Nokia Corporation and its Subsidiary(-ies) nor
+** the names of its contributors may be used to endorse or promote
+** products derived from this software without specific prior written
+** permission.
+**
+** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+** "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+** LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+** A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+** OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+** SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+** LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+** OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+//! [0]
+QRegularExpression re("a pattern");
+//! [0]
+
+
+//! [1]
+QRegularExpression re;
+re.setPattern("another pattern");
+//! [1]
+
+
+//! [2]
+// matches two digits followed by a space and a word
+QRegularExpression re("\\d\\d \\w+");
+
+// matches a backslash
+QRegularExpression re2("\\\\");
+//! [2]
+
+
+//! [3]
+QRegularExpression re("a third pattern");
+QString pattern = re.pattern(); // pattern == "a third pattern"
+//! [3]
+
+
+//! [4]
+// matches "Qt rocks", but also "QT rocks", "QT ROCKS", "qT rOcKs", etc.
+QRegularExpression re("Qt rocks", QRegularExpression::CaseInsensitiveOption);
+//! [4]
+
+
+//! [5]
+QRegularExpression re("^\\d+$");
+re.setPatternOptions(QRegularExpression::MultilineOption);
+// re matches any line in the subject string that contains only digits (but at least one)
+//! [5]
+
+
+//! [6]
+QRegularExpression re = QRegularExpression("^two.*words$", QRegularExpression::MultilineOption
+ | QRegularExpression::DotMatchesEverythingOption);
+
+QRegularExpression::PatternOptions options = re.patternOptions();
+// options == QRegularExpression::MultilineOption | QRegularExpression::DotMatchesEverythingOption
+//! [6]
+
+
+//! [7]
+// match two digits followed by a space and a word
+QRegularExpression re("\\d\\d \\w+");
+QRegularExpressionMatch match = re.match("abc123 def");
+bool hasMatch = match.hasMatch(); // true
+//! [7]
+
+
+//! [8]
+QRegularExpression re("\\d\\d \\w+");
+QRegularExpressionMatch match = re.match("abc123 def");
+if (match.hasMatch()) {
+ QString matched = match.captured(0); // matched == "23 def"
+ // ...
+}
+//! [8]
+
+
+//! [9]
+QRegularExpression re("\\d\\d \\w+");
+QRegularExpressionMatch match = re.match("12 abc 45 def", 1);
+if (match.hasMatch()) {
+ QString matched = match.captured(0); // matched == "45 def"
+ // ...
+}
+//! [9]
+
+
+//! [10]
+QRegularExpression re("^(\\d\\d)/(\\d\\d)/(\\d\\d\\d\\d)$");
+QRegularExpressionMatch match = re.match("08/12/1985");
+if (match.hasMatch()) {
+ QString day = re.captured(1); // day == "08"
+ QString month = re.captured(2); // month == "12"
+ QString year = re.captured(3); // year == "1985"
+ // ...
+}
+//! [10]
+
+
+//! [11]
+QRegularExpression re("abc(\\d+)def");
+QRegularExpressionMatch match = re.match("XYZabc123defXYZ");
+if (match.hasMatch()) {
+ int startOffset = re.capturedStart(1); // startOffset == 6
+ int endOffset = re.capturedEnd(1); // endOffset == 9
+ // ...
+}
+//! [11]
+
+
+//! [12]
+QRegularExpression re("^(?<date>\\d\\d)/(?<month>\\d\\d)/(?<year>\\d\\d\\d\\d)$");
+QRegularExpressionMatch match = re.match("08/12/1985");
+if (match.hasMatch()) {
+ QString date = match.captured("date"); // date == "08"
+ QString month = match.captured("month"); // month == "12"
+ QString year = match.captured("year"); // year == 1985
+}
+//! [12]
+
+
+//! [13]
+QRegularExpression re("(\\w+)");
+QRegularExpressionMatchIterator i = re.globalMatch("the quick fox");
+//! [13]
+
+
+//! [14]
+QStringList words;
+while (i.hasNext()) {
+ QRegularExpressionMatch match = i.next();
+ QString word = match.captured(1);
+ words << word;
+}
+// words contains "the", "quick", "fox"
+//! [14]
+
+
+//! [15]
+QString pattern("^(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) \\d\\d?, \\d\\d\\d\\d$");
+QRegularExpression re(pattern);
+
+QString input("Jan 21,");
+QRegularExpressionMatch match = re.match(input, 0, QRegularExpressionMatch::PartialPreferCompleteMatch);
+bool hasMatch = match.hasMatch(); // false
+bool hasPartialMatch = match.hasPartialMatch(); // true
+//! [15]
+
+
+//! [16]
+QString input("Dec 8, 1985");
+QRegularExpressionMatch match = re.match(input, 0, QRegularExpressionMatch::PartialPreferCompleteMatch);
+bool hasMatch = match.hasMatch(); // true
+bool hasPartialMatch = match.hasPartialMatch(); // false
+//! [16]
+
+
+//! [17]
+QRegularExpression re("abc\\w+X|def");
+QRegularExpressionMatch match = re.match("abcdef", 0, QRegularExpressionMatch::PartialPreferCompleteMatch);
+bool hasMatch = match.hasMatch(); // true
+bool hasPartialMatch = match.hasPartialMatch(); // false
+QString captured = match.captured(0); // captured == "def"
+//! [17]
+
+
+//! [18]
+QRegularExpression re("abc\\w+X|defY");
+QRegularExpressionMatch match = re.match("abcdef", 0, QRegularExpressionMatch::PartialPreferCompleteMatch);
+bool hasMatch = match.hasMatch(); // false
+bool hasPartialMatch = match.hasPartialMatch(); // true
+QString captured = match.captured(0); // captured == "abcdef"
+//! [18]
+
+
+//! [19]
+QRegularExpression re("abc|ab");
+QRegularExpressionMatch match = re.match("ab", 0, QRegularExpressionMatch::PartialPreferFirstMatch);
+bool hasMatch = match.hasMatch(); // false
+bool hasPartialMatch = match.hasPartialMatch(); // true
+//! [19]
+
+
+//! [20]
+QRegularExpression re("abc(def)?");
+QRegularExpressionMatch match = re.match("abc", 0, QRegularExpressionMatch::PartialPreferFirstMatch);
+bool hasMatch = match.hasMatch(); // false
+bool hasPartialMatch = match.hasPartialMatch(); // true
+//! [20]
+
+//! [21]
+QRegularExpression re("(abc)*");
+QRegularExpressionMatch match = re.match("abc", 0, QRegularExpressionMatch::PartialPreferFirstMatch);
+bool hasMatch = match.hasMatch(); // false
+bool hasPartialMatch = match.hasPartialMatch(); // true
+//! [21]
+
+//! [22]
+QRegularExpression invalidRe("(unmatched|parenthesis");
+bool isValid = invalidRe.isValid(); // false
+//! [22]
+
+//! [23]
+QRegularExpression invalidRe("(unmatched|parenthesis");
+if (!invalidRe.isValid()) {
+ QString errorString = invalidRe.errorString(); // errorString == "missing )"
+ int errorOffset = invalidRe.patternErrorOffset(); // errorOffset == 22
+ // ...
+}
+//! [23]
+
+//! [24]
+QRegularExpression re("^this pattern must match exactly$");
+//! [24]
+
+//! [25]
+QString p("a .*|pattern");
+QRegularExpression re("\\A(?:" + p + ")\\z"); // re matches exactly the pattern string p
+//! [25]
+
+//! [26]
+QString escaped = QRegularExpression::escape("a(x) = f(x) + g(x)");
+// escaped == "a\\(x\\)\\ \\=\\ f\\(x\\)\\ \\+\\ g\\(x\\)"
+//! [26]
+
+//! [27]
+QString pattern = "(" + QRegularExpression::escape(name) +
+ "|" + QRegularExpression::escape(nickname) + ")";
+QRegularExpression re(pattern);
+//! [27]
+
+//! [28]
+QRegularExpressionMatch match = re.match(...);
+for (int i = 0; i <= match.lastCapturedIndex(); ++i) {
+ QString captured = match.captured(i);
+ // ...
+}
+//! [28]
+
+//! [29]
+QRegularExpression("(\d\d) (?<name>\w+)");
+QRegularExpressionMatch match = re.match("23 Jordan");
+if (match.hasMatch()) {
+ QString number = match.captured(1); // first == "23"
+ QString name = match.captured("name"); // name == "Jordan"
+}
+//! [29]
+
+//! [30]
+// extracts the words
+QRegularExpression re("(\w+)");
+QString subject("the quick fox");
+QRegularExpressionMatchIterator i = re.globalMatch(subject);
+while (i.hasNext()) {
+ QRegularExpressionMatch match = i.next();
+ // ...
+}
+//! [30]
+
+
diff --git a/src/corelib/tools/qregularexpression.cpp b/src/corelib/tools/qregularexpression.cpp
new file mode 100644
index 0000000000..488a454aaa
--- /dev/null
+++ b/src/corelib/tools/qregularexpression.cpp
@@ -0,0 +1,2022 @@
+/****************************************************************************
+**
+** Copyright (C) 2012 Giuseppe D'Angelo <dangelog@gmail.com>.
+** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: http://www.qt-project.org/
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** GNU Lesser General Public License Usage
+** This file may be used under the terms of the GNU Lesser General Public
+** License version 2.1 as published by the Free Software Foundation and
+** appearing in the file LICENSE.LGPL included in the packaging of this
+** file. Please review the following information to ensure the GNU Lesser
+** General Public License version 2.1 requirements will be met:
+** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU General
+** Public License version 3.0 as published by the Free Software Foundation
+** and appearing in the file LICENSE.GPL included in the packaging of this
+** file. Please review the following information to ensure the GNU General
+** Public License version 3.0 requirements will be met:
+** http://www.gnu.org/copyleft/gpl.html.
+**
+** Other Usage
+** Alternatively, this file may be used in accordance with the terms and
+** conditions contained in a signed written agreement between you and Nokia.
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "qregularexpression.h"
+
+#include <QtCore/qcoreapplication.h>
+#include <QtCore/qmutex.h>
+#include <QtCore/qvector.h>
+#include <QtCore/qstringlist.h>
+#include <QtCore/qdebug.h>
+
+#include <pcre.h>
+
+// after how many usages we optimize the regexp
+static const unsigned int OPTIMIZE_AFTER_USE_COUNT = 10;
+
+QT_BEGIN_NAMESPACE
+
+/*!
+ \class QRegularExpression
+ \reentrant
+
+ \brief The QRegularExpression class provides pattern matching using regular
+ expressions.
+
+ \since 5.0
+
+ \ingroup tools
+ \ingroup shared
+
+ \keyword regular expression
+
+ Regular expressions, or \e{regexps}, are a very powerful tool to handle
+ strings and texts. This is useful in many contexts, e.g.,
+
+ \table
+ \row \i Validation
+ \i A regexp can test whether a substring meets some criteria,
+ e.g. is an integer or contains no whitespace.
+ \row \i Searching
+ \i A regexp provides more powerful pattern matching than
+ simple substring matching, e.g., match one of the words
+ \e{mail}, \e{letter} or \e{correspondence}, but none of the
+ words \e{email}, \e{mailman}, \e{mailer}, \e{letterbox}, etc.
+ \row \i Search and Replace
+ \i A regexp can replace all occurrences of a substring with a
+ different substring, e.g., replace all occurrences of \e{&}
+ with \e{\&amp;} except where the \e{&} is already followed by
+ an \e{amp;}.
+ \row \i String Splitting
+ \i A regexp can be used to identify where a string should be
+ split apart, e.g. splitting tab-delimited strings.
+ \endtable
+
+ This document is by no means a complete reference to pattern matching using
+ regular expressions, and the following parts will require the reader to
+ have some basic knowledge about Perl-like regular expressions and their
+ pattern syntax.
+
+ Good references about regular expressions include:
+
+ \list
+ \o \e {Mastering Regular Expressions} (Third Edition) by Jeffrey E. F.
+ Friedl, ISBN 0-596-52812-4;
+ \o the \l{http://pcre.org/pcre.txt} {pcrepattern(3)} man page, describing
+ the pattern syntax supported by PCRE (the reference implementation of
+ Perl-compatible regular expressions);
+ \o the \l{http://perldoc.perl.org/perlre.html} {Perl's regular expression
+ documentation} and the \l{http://perldoc.perl.org/perlretut.html} {Perl's
+ regular expression tutorial}.
+ \endlist
+
+ \tableofcontents
+
+ \section1 Introduction
+
+ QRegularExpression implements Perl-compatible regular expressions. It fully
+ supports Unicode. For an overview of the regular expression syntax
+ supported by QRegularExpression, please refer to the aforementioned
+ pcrepattern(3) man page. A regular expression is made up of two things: a
+ \bold{pattern string} and a set of \bold{pattern options} that change the
+ meaning of the pattern string.
+
+ You can set the pattern string by passing a string to the QRegularExpression
+ constructor:
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 0
+
+ This sets the pattern string to \c{a pattern}. You can also use the
+ setPattern() function to set a pattern on an existing QRegularExpression
+ object:
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 1
+
+ Note that due to C++ literal strings rules, you must escape all backslashes
+ inside the pattern string with another backslash:
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 2
+
+ The pattern() function returns the pattern that it's currently set for a
+ QRegularExpression object:
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 3
+
+ \section1 Pattern options
+
+ The meaning of the pattern string can be modified by setting one or more
+ \e{pattern options}. For instance, it is possible to set a pattern to match
+ case insensitively by setting the QRegularExpression::CaseInsensitiveOption.
+
+ You can set the options by passing them to the QRegularExpression
+ constructor, as in:
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 4
+
+ Alternatively, you can use the setPatternOptions() function on an existing
+ QRegularExpressionObject:
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 5
+
+ It is possible to get the pattern options currently set on a
+ QRegularExpression object by using the patternOptions() function:
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 6
+
+ Please refer to the QRegularExpression::PatternOption enum documentation for
+ more information about each pattern option.
+
+ \section1 Match type and match options
+
+ The last two arguments of the match() and the globalMatch() functions set
+ the match type and the match options. The match type is a value of the
+ QRegularExpression::MatchType enum; the "traditional" matching algorithm is
+ chosen by using the NormalMatch match type (the default). It is also
+ possible to enable partial matching of the regular expression against a
+ subject string: see the \l{partial matching} section for more details.
+
+ The match options are a set of one or more QRegularExpression::MatchOption
+ values. They change the way a specific match of a regular expression
+ against a subject string is done. Please refer to the
+ QRegularExpression::MatchOption enum documentation for more details.
+
+ \target normal matching
+ \section1 Normal matching
+
+ In order to perform a match you can simply invoke the match() function
+ passing a string to match against. We refer to this string as the
+ \e{subject string}. The result of the match() function is a
+ QRegularExpressionMatch object that can be used to inspect the results of
+ the match. For instance:
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 7
+
+ If a match is successful, the (implicit) capturing group number 0 can be
+ used to retrieve the substring matched by the entire pattern (see also the
+ section about \l{extracting captured substrings}):
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 8
+
+ It's also possible to start a match at an arbitrary offset inside the
+ subject string by passing the offset as an argument of the
+ match() function. In the following example \c{"12 abc"}
+ is not matched because the match is started at offset 1:
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 9
+
+ \target extracting captured substrings
+ \section2 Extracting captured substrings
+
+ The QRegularExpressionMatch object contains also information about the
+ substrings captured by the capturing groups in the pattern string. The
+ \l{QRegularExpressionMatch::}{captured()} function will return the string
+ captured by the n-th capturing group:
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 10
+
+ Capturing groups in the pattern are numbered starting from 1, and the
+ implicit capturing group 0 is used to capture the substring that matched
+ the entire pattern.
+
+ It's also possible to retrieve the starting and the ending offsets (inside
+ the subject string) of each captured substring, by using the
+ \l{QRegularExpressionMatch::}{capturedStart()} and the
+ \l{QRegularExpressionMatch::}{capturedEnd()} functions:
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 11
+
+ All of these functions have an overload taking a QString as a parameter
+ in order to extract \e{named} captured substrings. For instance:
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 12
+
+ \target global matching
+ \section1 Global matching
+
+ \e{Global matching} is useful to find all the occurrences of a given
+ regular expression inside a subject string. Suppose that we want to extract
+ all the words from a given string, where a word is a substring matching
+ the pattern \c{\w+}.
+
+ QRegularExpression::globalMatch returns a QRegularExpressionMatchIterator,
+ which is a Java-like forward iterator that can be used to iterate over the
+ results. For instance:
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 13
+
+ Since it's a Java-like iterator, the QRegularExpressionMatchIterator will
+ point immediately before the first result. Every result is returned as a
+ QRegularExpressionMatch object. The
+ \l{QRegularExpressionMatchIterator::}{hasNext()} function will return true
+ if there's at least one more result, and
+ \l{QRegularExpressionMatchIterator::}{next()} will return the next result
+ and advance the iterator. Continuing from the previous example:
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 14
+
+ You can also use \l{QRegularExpressionMatchIterator::}{peekNext()} to get
+ the next result without advancing the iterator.
+
+ It is possible to pass a starting offset and one or more match options to
+ the globalMatch() function, exactly like normal matching with match().
+
+ \target partial matching
+ \section1 Partial matching
+
+ A \e{partial match} is obtained when the end of the subject string is
+ reached, but more characters are needed to successfully complete the match.
+ Note that a partial match is usually much more inefficient than a normal
+ match because many optimizations of the matching algorithm cannot be
+ employed.
+
+ A partial match must be explicitly requested by specifying a match type of
+ PartialPreferCompleteMatch or PartialPreferFirstMatch when calling
+ QRegularExpression::match or QRegularExpression::globalMatch. If a partial
+ match is found, then calling the \l{QRegularExpressionMatch::}{hasMatch()}
+ function on the QRegularExpressionMatch object returned by match() will
+ return \c{false}, but \l{QRegularExpressionMatch::}{hasPartialMatch()} will return
+ \c{true}.
+
+ When a partial match is found, no captured substrings are returned, and the
+ (implicit) capturing group 0 corresponding to the whole match captures the
+ partially matched substring of the subject string.
+
+ Note that asking for a partial match can still lead to a complete match, if
+ one is found; in this case, \l{QRegularExpressionMatch::}{hasMatch()} will
+ return \c{true} and \l{QRegularExpressionMatch::}{hasPartialMatch()}
+ \c{false}. It never happens that a QRegularExpressionMatch reports both a
+ partial and a complete match.
+
+ Partial matching is mainly useful in two scenarios: validating user input
+ in real time and incremental/multi-segment matching.
+
+ \target
+ \section2 Validating user input
+
+ Suppose that we would like the user to input a date in a specific
+ format, for instance "MMM dd, yyyy". We can check the input validity with
+ a pattern like:
+
+ \c{^(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) \d\d?, \d\d\d\d$}
+
+ (This pattern doesn't catch invalid days, but let's keep it for the
+ example's purposes).
+
+ We would like to validate the input with this regular expression \e{while}
+ the user is typing it, so that we can report an error in the input as soon
+ as it is committed (for instance, the user typed the wrong key). In order
+ to do so we must distinguish three cases:
+
+ \list
+ \o the input cannot possibly match the regular expression;
+ \o the input does match the regular expression;
+ \o the input does not match the regular expression right now,
+ but it will if more charaters will be added to it.
+ \endlist
+
+ Note that these three cases represent exactly the possible states of a
+ QValidator (see the QValidator::State enum).
+
+ In particular, in the last case we want the regular expression engine to
+ report a partial match: we are successfully matching the pattern against
+ the subject string but the matching cannot continue because the end of the
+ subject is encountered. Notice, however, that the matching algorithm should
+ continue and try all possibilities, and in case a complete (non-partial)
+ match is found, then this one should be reported, and the input string
+ accepted as fully valid.
+
+ This behaviour is implemented by the PartialPreferCompleteMatch match type.
+ For instance:
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 15
+
+ If matching the same regular expression against the subject string leads to
+ a complete match, it is reported as usual:
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 16
+
+ Another example with a different pattern, showing the behaviour of
+ preferring a complete match over a partial one:
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 17
+
+ In this case, the subpattern \c{abc\\w+X} partially matches the subject
+ string; however, the subpattern \c{def} matches the subject string
+ completely, and therefore a complete match is reported.
+
+ In case multiple partial matches are found when matching (but no complete
+ match), then the QRegularExpressionMatch will report the first one that it
+ is found. For instance:
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 18
+
+ \section2 Incremental/multi-segment matching
+
+ Incremental matching is another use case of partial matching. Suppose that
+ we want to find the occurrences of a regular expression inside a large text
+ (that is, substrings matching the regular expression). In order to do so we
+ would like to "feed" the large text to the regular expression engines in
+ smaller chunks. The obvious problem is what happens if the substring that
+ matches the regular expression spans across two or more chunks.
+
+ In this case, the regular expression engine should report a partial match,
+ so that we can match again adding new data and (eventually) get a complete
+ match. This implies that the regular expression engine may assume that
+ there are other characters \e{beyond the end} of the subject string. This
+ is not to be taken literally -- the engine will never try to access
+ any character after the last one in the subject.
+
+ QRegularExpression implements this behaviour when using the
+ PartialPreferFirstMatch match type. This match type reports a partial match
+ as soon as it is found, and other match alternatives are not tried
+ (even if they could lead to a complete match). For instance:
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 19
+
+ This happens because when matching the first branch of the alternation
+ operator a partial match is found, and therefore matching stops, without
+ trying the second branch. Another example:
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 20
+
+ This shows what could seem a counterintuitve behaviour of quantifiers:
+ since \c{?} is greedy, then the engine tries first to continue the match
+ after having matched \c{"abc"}; but then the matching reaches the end of the
+ subject string, and therefore a partial match is reported. This is
+ even more surprising in the following example:
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 21
+
+ It's easy to understand this behaviour if we remember that the engine
+ expects the subject string to be only a substring of the whole text we're
+ looking for a match into (that is, how we said before, that the engine
+ assumes that there are other characters beyond the end of the subject
+ string).
+
+ Since the \c{*} quantifier is greedy, then reporting a complete match could
+ be an error, because after the current subject \c{"abc"} there may be other
+ occurrences of \c{"abc"}. For instance, the complete text could have been
+ "abcabcX", and therefore the \e{right} match to report (in the complete
+ text) would have been \c{"abcabc"}; by matching only against the leading
+ \c{"abc"} we instead get a partial match.
+
+ \section1 Error handling
+
+ It is possible for a QRegularExpression object to be invalid because of
+ syntax errors in the pattern string. The isValid() function will return
+ true if the regular expression is valid, or false otherwise:
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 22
+
+ You can get more information about the specific error by calling the
+ errorString() function; moreover, the patternErrorOffset() function
+ will return the offset inside the pattern string
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 23
+
+ If a match is attempted with an invalid QRegularExpression, then the
+ returned QRegularExpressionMatch object will be invalid as well (that is,
+ its \l{QRegularExpressionMatch::}{isValid()} function will return false).
+ The same applies for attempting a global match.
+
+ \section1 Unsupported Perl-compatible regular expressions features
+
+ QRegularExpression does not support all the features available in
+ Perl-compatible regular expressions. The most notable one is the fact that
+ duplicated names for capturing groups are not supported, and using them can
+ lead to undefined behaviour.
+
+ This may change in a future version of Qt.
+
+ \section1 Notes for QRegExp users
+
+ The QRegularExpression class introduced in Qt 5 is a big improvement upon
+ QRegExp, in terms of APIs offered, supported pattern syntax and speed of
+ execution. The biggest difference is that QRegularExpression simply holds a
+ regular expression, and it's \e{not} modified when a match is requested.
+ Instead, a QRegularExpressionMatch object is returned, in order to check
+ the result of a match and extract the captured substring. The same applies
+ with global matching and QRegularExpressionMatchIterator.
+
+ Other differences are outlined below.
+
+ \section2 Exact matching
+
+ QRegExp::exactMatch in Qt 4 served for two purposes: it exactly matched
+ a regular expression against a subject string, and it implemented partial
+ matching. In fact, if an exact match was not found, one could still find
+ out how much of the subject string was matched by the regular expression
+ by calling QRegExp::matchedLength(). If the returned length was equal
+ to the subject string's length, then one could desume that a partial match
+ was found.
+
+ QRegularExpression supports partial matching explicitly by means of the
+ appropriate MatchType. If instead you simply want to be sure that the
+ subject string matches the regular expression exactly, you can wrap the
+ pattern between a couple of anchoring expressions. Simply
+ putting the pattern between the \c{^} and the \c{$} anchors is enough
+ in most cases:
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 24
+
+ However, remember that the \c{$} anchor not only matches at the end of the
+ string, but also at a newline character right before the end of the string;
+ that is, the previous pattern matches against the string "this pattern must
+ match exactly\n". Also, the behaviour of both the \c{^} and the \c{$}
+ anchors changes if the MultiLineOption is set either explicitely (as a
+ pattern option) or implicitly (as a directive inside the pattern string).
+
+ Therefore, in the most general case, you should wrap the pattern between
+ the \c{\A} and the \c{\z} anchors:
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 25
+
+ Note the usage of the non-capturing group in order to preserve the meaning
+ of the branch operator inside the pattern.
+
+ \section2 Global matching
+
+ Due to limitations of the QRegExp API it was impossible to implement global
+ matching correctly (that is, like Perl does). In particular, patterns that
+ can match 0 characters (like \c{"a*"}) are problematic.
+
+ QRegularExpression::globalMatch implements Perl global match correctly, and
+ the returned iterator can be used to examine each result.
+
+ \section2 Wildcard matching
+
+ There is no equivalent of wildcard matching in QRegularExpression.
+ Nevertheless, rewriting a regular expression in wildcard syntax to a
+ Perl-compatible regular expression is a very easy task, given the fact
+ that wildcard syntax supported by QRegExp is very simple.
+
+ \section2 Other pattern syntaxes
+
+ QRegularExpression supports only Perl-compatible regular expressions.
+
+ \section2 Minimal matching
+
+ QRegExp::setMinimal implemented minimal matching by simply reversing the
+ greediness of the quantifiers (QRegExp did not support lazy quantifiers,
+ like \c{*?}, \c{+?}, etc.). QRegularExpression instead does support greedy,
+ lazy and possessive quantifiers. The InvertedGreedinessOption
+ pattern option can be useful to emulate the effects of QRegExp::setMinimal:
+ if enabled, it inverts the greediness of quantifiers (greedy ones become
+ lazy and vice versa).
+
+ \section2 Caret modes
+
+ The AnchoredMatchOption match option can be used to emulate the
+ QRegExp::CaretAtOffset behaviour. There is no equivalent for the other
+ QRegExp::CaretMode modes.
+
+ \sa QRegularExpressionMatch, QRegularExpressionMatchIterator
+*/
+
+/*!
+ \class QRegularExpressionMatch
+ \reentrant
+
+ \brief The QRegularExpressionMatch class provides the results of a matching
+ a QRegularExpression against a string.
+
+ \since 5.0
+
+ \ingroup tools
+ \ingroup shared
+
+ \keyword regular expression match
+
+ A QRegularExpressionMatch object can be obtained by calling the
+ QRegularExpression::match() function, or as a single result of a global
+ match from a QRegularExpressionMatchIterator.
+
+ The success or the failure of a match attempt can be inspected by calling
+ the hasMatch() function. QRegularExpressionMatch also reports a successful
+ partial match through the hasPartialMatch() function.
+
+ In addition, QRegularExpressionMatch returns the substrings captured by the
+ capturing groups in the pattern string. The implicit capturing group with
+ index 0 captures the result of the whole match. The captured() function
+ returns each substring captured, either by the capturing group's index or
+ by its name:
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 29
+
+ For each captured substring it is possible to query its starting and ending
+ offsets in the subject string by calling the capturedStart() and the
+ capturedEnd() function, respectively. The length of each captured
+ substring is available using the capturedLength() function.
+
+ The convenience function capturedTexts() will return \e{all} the captured
+ substrings at once (including the substring matched by the entire pattern)
+ in the order they have been captured by captring groups; that is,
+ \c{captured(i) == capturedTexts().at(i)}.
+
+ You can retrieve the QRegularExpression object the subject string was
+ matched against by calling the regularExpression() function; the
+ match type and the match options are available as well by calling
+ the matchType() and the matchOptions() respectively.
+
+ Please refer to the QRegularExpression documentation for more information
+ about the Qt regular expression classes.
+
+ \sa QRegularExpression
+*/
+
+/*!
+ \class QRegularExpressionMatchIterator
+ \reentrant
+
+ \brief The QRegularExpressionMatchIterator class provides an iterator on
+ the results of a global match of a QRegularExpression object against a string.
+
+ \since 5.0
+
+ \ingroup tools
+ \ingroup shared
+
+ \keyword regular expression iterator
+
+ A QRegularExpressionMatchIterator object is a forward only Java-like
+ iterator; it can be obtained by calling the
+ QRegularExpression::globalMatch() function. A new
+ QRegularExpressionMatchIterator will be positioned before the first result.
+ You can then call the hasNext() function to check if there are more
+ results available; if so, the next() function will return the next
+ result and advance the iterator.
+
+ Each result is a QRegularExpressionMatch object holding all the information
+ for that result (including captured substrings).
+
+ For instance:
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 30
+
+ Moreover, QRegularExpressionMatchIterator offers a peekNext() function
+ to get the next result \e{without} advancing the iterator.
+
+ You can retrieve the QRegularExpression object the subject string was
+ matched against by calling the regularExpression() function; the
+ match type and the match options are available as well by calling
+ the matchType() and the matchOptions() respectively.
+
+ Please refer to the QRegularExpression documentation for more information
+ about the Qt regular expression classes.
+
+ \sa QRegularExpression, QRegularExpressionMatch
+*/
+
+
+/*!
+ \enum QRegularExpression::PatternOption
+
+ The PatternOption enum defines modifiers to the way the pattern string
+ should be interpreted, and therefore the way the pattern matches against a
+ subject string.
+
+ \value NoPatternOption
+ No pattern options are set.
+
+ \value CaseInsensitiveOption
+ The pattern should match against the subject string in a case
+ insensitive way. This option corresponds to the /i modifier in Perl
+ regular expressions.
+
+ \value DotMatchesEverythingOption
+ The dot metacharacter (\c{.}) in the pattern string is allowed to match
+ any character in the subject string, including newlines (normally, the
+ dot does not match newlines). This option corresponds to the \c{/s}
+ modifier in Perl regular expressions.
+
+ \value MultilineOption
+ The caret (\c{^}) and the dollar (\c{$}) metacharacters in the pattern
+ string are allowed to match, respectively, immediately after and
+ immediately before any newline in the subject string, as well as at the
+ very beginning and at the very end of the subject string. This option
+ corresponds to the \c{/m} modifier in Perl regular expressions.
+
+ \value ExtendedPatternSyntaxOption
+ Any whitespace in the pattern string which is not escaped and outside a
+ character class is ignored. Moreover, an unescaped sharp (\bold{#})
+ outside a character class causes all the following characters, until
+ the first newline (included), to be ignored. This can be used to
+ increase the readability of a pattern string as well as put comments
+ inside regular expressions; this is particulary useful if the pattern
+ string is loaded from a file or written by the user, because in C++
+ code it is always possible to use the rules for string literals to put
+ comments outside the pattern string. This option corresponds to the \c{/x}
+ modifier in Perl regular expressions.
+
+ \value InvertedGreedinessOption
+ The greediness of the quantifiers is inverted: \c{*}, \c{+}, \c{?},
+ \c{{m,n}}, etc. become lazy, while their lazy versions (\c{*?},
+ \c{+?}, \c{??}, \c{{m,n}?}, etc.) become greedy. There is no equivalent
+ for this option in Perl regular expressions.
+
+ \value DontCaptureOption
+ The non-named capturing groups do not capture substrings; named
+ capturing groups still work as intended, as well as the implicit
+ capturing group number 0 corresponding to the entire match. There is no
+ equivalent for this option in Perl regular expressions.
+
+ \value UseUnicodePropertiesOption
+ The meaning of the \c{\w}, \c{\d}, etc., character types, as well as
+ the meaning of their counterparts (\c{\W}, \c{\D}, etc.), is changed
+ from matching ASCII charaters only to matching any character with the
+ corresponding Unicode property. For instance, \c{\d} is changed to
+ match any character with the Unicode Nd (decimal digit) property;
+ \c{\w} to match any character with either the Unicode L (letter) or N
+ (digit) property, plus underscore, and so on. This option corresponds
+ to the \c{/u} modifier in Perl regular expressions.
+*/
+
+/*!
+ \enum QRegularExpression::MatchType
+
+ The MatchType enum defines the type of the match that should be attempted
+ against the subject string.
+
+ \value NormalMatch
+ A normal match is done.
+
+ \value PartialPreferCompleteMatch
+ The pattern string is matched partially against the subject string. If
+ a partial match is found, then it is recorded, and other matching
+ alternatives are tried as usual. If a complete match is then found,
+ then it's preferred to the partial match; in this case only the
+ complete match is reported. If instead no complete match is found (but
+ only the partial one), then the partial one is reported.
+
+ \value PartialPreferFirstMatch
+ The pattern string is matched partially against the subject string. If
+ a partial match is found, then matching stops and the partial match is
+ reported. In this case, other matching alternatives (potentially
+ leading to a complete match) are not tried. Moreover, this match type
+ assumes that the subject string only a substring of a larger text, and
+ that (in this text) there are other characters beyond the end of the
+ subject string. This can lead to surprising results; see the discussion
+ in the \l{partial matching} section for more details.
+*/
+
+/*!
+ \enum QRegularExpression::MatchOption
+
+ \value NoMatchOption
+ No match options are set.
+
+ \value AnchoredMatchOption
+ The match is constrained to start exactly at the offset passed to
+ match() in order to be successful, even if the pattern string does not
+ contain any metacharacter that anchors the match at that point.
+*/
+
+/*!
+ \internal
+*/
+static int convertToPcreOptions(QRegularExpression::PatternOptions patternOptions)
+{
+ int options = 0;
+
+ if (patternOptions & QRegularExpression::CaseInsensitiveOption)
+ options |= PCRE_CASELESS;
+ if (patternOptions & QRegularExpression::DotMatchesEverythingOption)
+ options |= PCRE_DOTALL;
+ if (patternOptions & QRegularExpression::MultilineOption)
+ options |= PCRE_MULTILINE;
+ if (patternOptions & QRegularExpression::ExtendedPatternSyntaxOption)
+ options |= PCRE_EXTENDED;
+ if (patternOptions & QRegularExpression::InvertedGreedinessOption)
+ options |= PCRE_UNGREEDY;
+ if (patternOptions & QRegularExpression::DontCaptureOption)
+ options |= PCRE_NO_AUTO_CAPTURE;
+ if (patternOptions & QRegularExpression::UseUnicodePropertiesOption)
+ options |= PCRE_UCP;
+
+ return options;
+}
+
+/*!
+ \internal
+*/
+static int convertToPcreOptions(QRegularExpression::MatchOptions matchOptions)
+{
+ int options = 0;
+
+ if (matchOptions & QRegularExpression::AnchoredMatchOption)
+ options |= PCRE_ANCHORED;
+
+ return options;
+}
+
+struct QRegularExpressionPrivate : QSharedData
+{
+ QRegularExpressionPrivate();
+ ~QRegularExpressionPrivate();
+ QRegularExpressionPrivate(const QRegularExpressionPrivate &other);
+
+ void cleanCompiledPattern();
+ void compilePattern();
+ void getPatternInfo();
+ void optimizePattern();
+
+ QRegularExpressionMatchPrivate *doMatch(const QString &subject,
+ int offset,
+ QRegularExpression::MatchType matchType,
+ QRegularExpression::MatchOptions matchOptions,
+ const QRegularExpressionMatchPrivate *previous = 0) const;
+
+ int captureIndexForName(const QString &name) const;
+
+ QString pattern;
+ QRegularExpression::PatternOptions patternOptions;
+
+ // *All* of the following members are set managed while holding this mutex,
+ // except for isDirty which is set to true by QRegularExpression setters
+ // (right after a detach happened).
+ // On the other hand, after the compilation and studying,
+ // it's safe to *use* (i.e. read) them from multiple threads at the same time.
+ // Therefore, doMatch doesn't need to lock this mutex.
+ QMutex mutex;
+
+ // The PCRE pointers are reference-counted by the QRegularExpressionPrivate
+ // objects themselves; when the private is copied (i.e. a detach happened)
+ // they are set to 0
+ pcre16 *compiledPattern;
+ pcre16_extra *studyData;
+ const char *errorString;
+ int errorOffset;
+ int capturingCount;
+ unsigned int usedCount;
+ bool usingCrLfNewlines;
+ bool isDirty;
+};
+
+struct QRegularExpressionMatchPrivate : QSharedData
+{
+ QRegularExpressionMatchPrivate(const QRegularExpression &re,
+ const QString &subject,
+ QRegularExpression::MatchType matchType,
+ QRegularExpression::MatchOptions matchOptions,
+ int capturingCount);
+
+ QRegularExpressionMatch nextMatch() const;
+
+ QRegularExpression regularExpression;
+ QString subject;
+ // the capturedOffsets vector contains pairs of (start, end) positions
+ // for each captured substring
+ QVector<int> capturedOffsets;
+
+ QRegularExpression::MatchType matchType;
+ QRegularExpression::MatchOptions matchOptions;
+
+ int capturedCount;
+
+ bool hasMatch;
+ bool hasPartialMatch;
+ bool isValid;
+};
+
+struct QRegularExpressionMatchIteratorPrivate : QSharedData
+{
+ QRegularExpressionMatchIteratorPrivate(const QRegularExpression re,
+ QRegularExpression::MatchType matchType,
+ QRegularExpression::MatchOptions matchOptions,
+ const QRegularExpressionMatch &next);
+
+ bool hasNext() const;
+ QRegularExpressionMatch next;
+ QRegularExpression regularExpression;
+ QRegularExpression::MatchType matchType;
+ QRegularExpression::MatchOptions matchOptions;
+};
+
+/*!
+ \internal
+*/
+QRegularExpression::QRegularExpression(QRegularExpressionPrivate &dd)
+ : d(&dd)
+{
+}
+
+/*!
+ \internal
+*/
+QRegularExpressionPrivate::QRegularExpressionPrivate()
+ : pattern(), patternOptions(0),
+ mutex(),
+ compiledPattern(0), studyData(0),
+ errorString(0), errorOffset(-1),
+ capturingCount(0),
+ usedCount(0),
+ usingCrLfNewlines(false),
+ isDirty(true)
+{
+}
+
+/*!
+ \internal
+*/
+QRegularExpressionPrivate::~QRegularExpressionPrivate()
+{
+ cleanCompiledPattern();
+}
+
+/*!
+ \internal
+
+ Copies the private, which means copying only the pattern and the pattern
+ options. The compiledPattern and the studyData pointers are NOT copied (we
+ do not own them any more), and in general all the members set when
+ compiling a pattern are set to default values. isDirty is set back to true
+ so that the pattern has to be recompiled again.
+*/
+QRegularExpressionPrivate::QRegularExpressionPrivate(const QRegularExpressionPrivate &other)
+ : QSharedData(other),
+ pattern(other.pattern), patternOptions(other.patternOptions),
+ mutex(),
+ compiledPattern(0), studyData(0),
+ errorString(0),
+ errorOffset(-1), capturingCount(0),
+ usedCount(0),
+ usingCrLfNewlines(false), isDirty(true)
+{
+}
+
+/*!
+ \internal
+*/
+void QRegularExpressionPrivate::cleanCompiledPattern()
+{
+ pcre16_free(compiledPattern);
+ pcre16_free_study(studyData);
+ usedCount = 0;
+ compiledPattern = 0;
+ studyData = 0;
+ usingCrLfNewlines = false;
+ errorOffset = -1;
+ capturingCount = 0;
+}
+
+/*!
+ \internal
+*/
+void QRegularExpressionPrivate::compilePattern()
+{
+ QMutexLocker lock(&mutex);
+
+ if (!isDirty)
+ return;
+
+ isDirty = false;
+ cleanCompiledPattern();
+
+ int options = convertToPcreOptions(patternOptions);
+ options |= PCRE_UTF16;
+
+ int errorCode;
+ compiledPattern = pcre16_compile2(pattern.utf16(), options,
+ &errorCode, &errorString, &errorOffset, 0);
+
+ if (!compiledPattern)
+ return;
+
+ Q_ASSERT(errorCode == 0);
+ Q_ASSERT(studyData == 0); // studying (=>optimizing) is always done later
+ errorOffset = -1;
+
+ getPatternInfo();
+}
+
+/*!
+ \internal
+*/
+void QRegularExpressionPrivate::getPatternInfo()
+{
+ Q_ASSERT(compiledPattern);
+
+ pcre16_fullinfo(compiledPattern, 0, PCRE_INFO_CAPTURECOUNT, &capturingCount);
+
+ // detect the settings for the newline
+ int patternNewlineSetting;
+ pcre16_fullinfo(compiledPattern, studyData, PCRE_INFO_OPTIONS, &patternNewlineSetting);
+ patternNewlineSetting &= PCRE_NEWLINE_CR | PCRE_NEWLINE_LF | PCRE_NEWLINE_CRLF
+ | PCRE_NEWLINE_ANY | PCRE_NEWLINE_ANYCRLF;
+ if (patternNewlineSetting == 0) {
+ // no option was specified in the regexp, grab PCRE build defaults
+ int pcreNewlineSetting;
+ pcre16_config(PCRE_CONFIG_NEWLINE, &pcreNewlineSetting);
+ switch (pcreNewlineSetting) {
+ case 13:
+ patternNewlineSetting = PCRE_NEWLINE_CR; break;
+ case 10:
+ patternNewlineSetting = PCRE_NEWLINE_LF; break;
+ case 3338: // (13<<8 | 10)
+ patternNewlineSetting = PCRE_NEWLINE_CRLF; break;
+ case -2:
+ patternNewlineSetting = PCRE_NEWLINE_ANYCRLF; break;
+ case -1:
+ patternNewlineSetting = PCRE_NEWLINE_ANY; break;
+ default:
+ qWarning("QRegularExpressionPrivate::compilePattern(): "
+ "PCRE_CONFIG_NEWLINE returned an unknown newline");
+ break;
+ }
+ }
+
+ usingCrLfNewlines = (patternNewlineSetting == PCRE_NEWLINE_CRLF) ||
+ (patternNewlineSetting == PCRE_NEWLINE_ANY) ||
+ (patternNewlineSetting == PCRE_NEWLINE_ANYCRLF);
+}
+
+/*!
+ \internal
+*/
+void QRegularExpressionPrivate::optimizePattern()
+{
+ Q_ASSERT(compiledPattern);
+
+ QMutexLocker lock(&mutex);
+
+ if (studyData || (++usedCount != OPTIMIZE_AFTER_USE_COUNT))
+ return;
+
+ int studyOptions = PCRE_STUDY_JIT_COMPILE;
+ const char *err;
+ studyData = pcre16_study(compiledPattern, studyOptions, &err);
+
+ if (!studyData && err)
+ qWarning("QRegularExpressionPrivate::optimizePattern(): pcre_study failed: %s", err);
+}
+
+/*!
+ \internal
+
+ Returns the capturing group number for the given name. Duplicated names for
+ capturing groups are not supported.
+*/
+int QRegularExpressionPrivate::captureIndexForName(const QString &name) const
+{
+ Q_ASSERT(!name.isEmpty());
+
+ int index = pcre16_get_stringnumber(compiledPattern, name.utf16());
+ if (index >= 0)
+ return index;
+
+ return -1;
+}
+
+/*!
+ \internal
+
+ Performs a match of type \a matchType on the given \a subject string with
+ options \a matchOptions and returns the QRegularExpressionMatchPrivate of
+ the result. It also advances a match if a previous result is given as \a
+ previous.
+
+ Advancing a match is a tricky algorithm. If the previous match matched a
+ non-empty string, we just do an ordinary match at the offset position.
+
+ If the previous match matched an empty string, then an anchored, non-empty
+ match is attempted at the offset position. If that succeeds, then we got
+ the next match and we can return it. Otherwise, we advance by 1 position
+ (which can be one or two code units in UTF-16!) and reattempt a "normal"
+ match. We also have the problem of detecting the current newline format: if
+ the new advanced offset is pointing to the beginning of a CRLF sequence, we
+ must advance over it.
+*/
+QRegularExpressionMatchPrivate *QRegularExpressionPrivate::doMatch(const QString &subject,
+ int offset,
+ QRegularExpression::MatchType matchType,
+ QRegularExpression::MatchOptions matchOptions,
+ const QRegularExpressionMatchPrivate *previous) const
+{
+ if (offset < 0)
+ offset += subject.length();
+
+ QRegularExpression re(*const_cast<QRegularExpressionPrivate *>(this));
+
+ if (offset < 0 || offset > subject.length())
+ return new QRegularExpressionMatchPrivate(re, subject, matchType, matchOptions, 0);
+
+ if (!compiledPattern) {
+ qWarning("QRegularExpressionPrivate::doMatch(): called on an invalid QRegularExpression object");
+ return new QRegularExpressionMatchPrivate(re, subject, matchType, matchOptions, 0);
+ }
+
+ QRegularExpressionMatchPrivate *priv = new QRegularExpressionMatchPrivate(re, subject,
+ matchType, matchOptions,
+ capturingCount);
+
+ // this is mutex protected
+ const_cast<QRegularExpressionPrivate *>(this)->optimizePattern();
+
+ int pcreOptions = convertToPcreOptions(matchOptions);
+
+ if (matchType == QRegularExpression::PartialPreferCompleteMatch)
+ pcreOptions |= PCRE_PARTIAL_SOFT;
+ else if (matchType == QRegularExpression::PartialPreferFirstMatch)
+ pcreOptions |= PCRE_PARTIAL_HARD;
+
+ bool previousMatchWasEmpty = false;
+ if (previous && previous->hasMatch &&
+ (previous->capturedOffsets.at(0) == previous->capturedOffsets.at(1))) {
+ previousMatchWasEmpty = true;
+ }
+
+ int * const captureOffsets = priv->capturedOffsets.data();
+ const int captureOffsetsCount = priv->capturedOffsets.size();
+
+ const unsigned short * const subjectUtf16 = subject.utf16();
+ const int subjectLength = subject.length();
+
+ int result;
+
+ if (!previousMatchWasEmpty) {
+ result = pcre16_exec(compiledPattern, studyData,
+ subjectUtf16, subjectLength,
+ offset, pcreOptions,
+ captureOffsets, captureOffsetsCount);
+ } else {
+ result = pcre16_exec(compiledPattern, studyData,
+ subjectUtf16, subjectLength,
+ offset, pcreOptions | PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED,
+ captureOffsets, captureOffsetsCount);
+
+ if (result == PCRE_ERROR_NOMATCH) {
+ ++offset;
+
+ if (usingCrLfNewlines
+ && offset < subjectLength
+ && subjectUtf16[offset - 1] == QLatin1Char('\r')
+ && subjectUtf16[offset] == QLatin1Char('\n')) {
+ ++offset;
+ } else if (offset < subjectLength
+ && QChar::isLowSurrogate(subjectUtf16[offset])) {
+ ++offset;
+ }
+
+ result = pcre16_exec(compiledPattern, studyData,
+ subjectUtf16, subjectLength,
+ offset, pcreOptions,
+ captureOffsets, captureOffsetsCount);
+ }
+ }
+
+#ifdef QREGULAREXPRESSION_DEBUG
+ qDebug() << "Matching" << pattern << "against" << subject
+ << offset << matchType << matchOptions << previousMatchWasEmpty
+ << "result" << result;
+#endif
+
+ // result == 0 means not enough space in captureOffsets; should never happen
+ Q_ASSERT(result != 0);
+
+ if (result > 0) {
+ // full match
+ priv->isValid = true;
+ priv->hasMatch = true;
+ priv->capturedCount = result;
+ priv->capturedOffsets.resize(result * 2);
+ } else {
+ // no match, partial match or error
+ priv->hasPartialMatch = (result == PCRE_ERROR_PARTIAL);
+ priv->isValid = (result == PCRE_ERROR_NOMATCH || result == PCRE_ERROR_PARTIAL);
+
+ if (result == PCRE_ERROR_PARTIAL) {
+ // partial match:
+ // leave the start and end capture offsets (i.e. cap(0))
+ priv->capturedCount = 1;
+ priv->capturedOffsets.resize(2);
+ } else {
+ // no match or error
+ priv->capturedCount = 0;
+ priv->capturedOffsets.clear();
+ }
+ }
+
+ return priv;
+}
+
+/*!
+ \internal
+*/
+QRegularExpressionMatchPrivate::QRegularExpressionMatchPrivate(const QRegularExpression &re,
+ const QString &subject,
+ QRegularExpression::MatchType matchType,
+ QRegularExpression::MatchOptions matchOptions,
+ int capturingCount)
+ : regularExpression(re), subject(subject),
+ matchType(matchType), matchOptions(matchOptions),
+ capturedCount(0),
+ hasMatch(false), hasPartialMatch(false), isValid(false)
+{
+ Q_ASSERT(capturingCount >= 0);
+ const int captureOffsetsCount = (capturingCount + 1) * 3;
+ capturedOffsets.resize(captureOffsetsCount);
+}
+
+
+/*!
+ \internal
+*/
+QRegularExpressionMatch QRegularExpressionMatchPrivate::nextMatch() const
+{
+ Q_ASSERT(isValid);
+ Q_ASSERT(hasMatch || hasPartialMatch);
+
+ QRegularExpressionMatchPrivate *nextPrivate = regularExpression.d->doMatch(subject,
+ capturedOffsets.at(1),
+ matchType,
+ matchOptions,
+ this);
+ return QRegularExpressionMatch(*nextPrivate);
+}
+
+/*!
+ \internal
+*/
+QRegularExpressionMatchIteratorPrivate::QRegularExpressionMatchIteratorPrivate(const QRegularExpression re,
+ QRegularExpression::MatchType matchType,
+ QRegularExpression::MatchOptions matchOptions,
+ const QRegularExpressionMatch &next)
+ : next(next),
+ regularExpression(re),
+ matchType(matchType), matchOptions(matchOptions)
+{
+}
+
+/*!
+ \internal
+*/
+bool QRegularExpressionMatchIteratorPrivate::hasNext() const
+{
+ return next.isValid() && (next.hasMatch() || next.hasPartialMatch());
+}
+
+// PUBLIC API
+
+/*!
+ Constructs a QRegularExpression object with an empty pattern and no pattern
+ options.
+
+ \sa setPattern(), setPatternOptions()
+*/
+QRegularExpression::QRegularExpression()
+ : d(new QRegularExpressionPrivate)
+{
+}
+
+/*!
+ Constructs a QRegularExpression object using the given \a pattern as
+ pattern and the \a options as the pattern options.
+
+ \sa setPattern(), setPatternOptions()
+*/
+QRegularExpression::QRegularExpression(const QString &pattern, PatternOptions options)
+ : d(new QRegularExpressionPrivate)
+{
+ d->pattern = pattern;
+ d->patternOptions = options;
+}
+
+/*!
+ Constructs a QRegularExpression object as a copy of \a re.
+
+ \sa operator=()
+*/
+QRegularExpression::QRegularExpression(const QRegularExpression &re)
+ : d(re.d)
+{
+}
+
+/*!
+ Destroys the QRegularExpression object.
+*/
+QRegularExpression::~QRegularExpression()
+{
+}
+
+/*!
+ Assigns the regular expression \a re to this object, and returns a reference
+ to the copy. Both the pattern and the pattern options are copied.
+*/
+QRegularExpression &QRegularExpression::operator=(const QRegularExpression &re)
+{
+ d = re.d;
+ return *this;
+}
+
+/*!
+ \fn void QRegularExpression::swap(QRegularExpression &other)
+
+ Swaps the regular expression \a other with this regular expression. This
+ operation is very fast and never fails.
+*/
+
+/*!
+ Returns the pattern string of the regular expression.
+
+ \sa setPattern(), patternOptions()
+*/
+QString QRegularExpression::pattern() const
+{
+ return d->pattern;
+}
+
+/*!
+ Sets the pattern string of the regular expression to \a pattern. The
+ pattern options are left unchanged.
+
+ \sa pattern(), setPatternOptions()
+*/
+void QRegularExpression::setPattern(const QString &pattern)
+{
+ d.detach();
+ d->isDirty = true;
+ d->pattern = pattern;
+}
+
+/*!
+ Returns the pattern options for the regular expression.
+
+ \sa setPatternOptions(), pattern()
+*/
+QRegularExpression::PatternOptions QRegularExpression::patternOptions() const
+{
+ return d->patternOptions;
+}
+
+/*!
+ Sets the given \a options as the pattern options of the regular expression.
+ The pattern string is left unchanged.
+
+ \sa patternOptions(), setPattern()
+*/
+void QRegularExpression::setPatternOptions(PatternOptions options)
+{
+ d.detach();
+ d->isDirty = true;
+ d->patternOptions = options;
+}
+
+/*!
+ Returns true if the regular expression is a valid regular expression (that
+ is, it contains no syntax errors, etc.), or false otherwise. Use
+ errorString() to obtain a textual description of the error.
+
+ \sa errorString(), patternErrorOffset()
+*/
+bool QRegularExpression::isValid() const
+{
+ d.data()->compilePattern();
+ return d->compiledPattern;
+}
+
+/*!
+ Returns a textual description of the error found when checking the validity
+ of the regular expression, or "no error" if no error was found.
+
+ \sa isValid(), patternErrorOffset()
+*/
+QString QRegularExpression::errorString() const
+{
+ d.data()->compilePattern();
+ if (d->errorString)
+ return QCoreApplication::translate("QRegularExpression", d->errorString, 0, QCoreApplication::UnicodeUTF8);
+ return QCoreApplication::translate("QRegularExpression", "no error", 0, QCoreApplication::UnicodeUTF8);
+}
+
+/*!
+ Returns the offset, inside the pattern string, at which an error was found
+ when checking the validity of the regular expression. If no error was
+ found, then -1 is returned.
+
+ \sa pattern(), isValid(), errorString()
+*/
+int QRegularExpression::patternErrorOffset() const
+{
+ d.data()->compilePattern();
+ return d->errorOffset;
+}
+
+/*!
+ Attempts to match the regular expression against the given \a subject
+ string, starting at the position \a offset inside the subject, using a
+ match of type \a matchType and honoring the given \a matchOptions.
+
+ The returned QRegularExpressionMatch object contains the results of the
+ match.
+
+ \sa QRegularExpressionMatch, {normal matching}
+*/
+QRegularExpressionMatch QRegularExpression::match(const QString &subject,
+ int offset,
+ MatchType matchType,
+ MatchOptions matchOptions) const
+{
+ d.data()->compilePattern();
+
+ QRegularExpressionMatchPrivate *priv = d->doMatch(subject, offset, matchType, matchOptions);
+ return QRegularExpressionMatch(*priv);
+}
+
+/*!
+ Attempts to perform a global match of the regular expression against the
+ given \a subject string, starting at the position \a offset inside the
+ subject, using a match of type \a matchType and honoring the given \a
+ matchOptions.
+
+ The returned QRegularExpressionMatchIterator is positioned before the
+ first match result (if any).
+
+ \sa QRegularExpressionMatchIterator, {global matching}
+*/
+QRegularExpressionMatchIterator QRegularExpression::globalMatch(const QString &subject,
+ int offset,
+ MatchType matchType,
+ MatchOptions matchOptions) const
+{
+ QRegularExpressionMatchIteratorPrivate *priv =
+ new QRegularExpressionMatchIteratorPrivate(*this,
+ matchType,
+ matchOptions,
+ match(subject, offset, matchType, matchOptions));
+
+ return QRegularExpressionMatchIterator(*priv);
+}
+
+/*!
+ Returns true if the regular expression is equal to \a re, or false
+ otherwise. Two QRegularExpression objects are equal if they have
+ the same pattern string and the same pattern options.
+
+ \sa operator!=()
+*/
+bool QRegularExpression::operator==(const QRegularExpression &re) const
+{
+ return (pattern() == re.pattern() && patternOptions() == re.patternOptions());
+}
+
+/*!
+ \fn bool QRegularExpression::operator!=(const QRegularExpression &re) const
+
+ Returns true if the regular expression is different from \a re, or
+ false otherwise.
+
+ \sa operator==()
+*/
+
+/*!
+ Escapes all characters of \a str so that they no longer have any special
+ meaning when used as a regular expression pattern string, and returns
+ the escaped string. For instance:
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 26
+
+ This is very convenient in order to build patterns from arbitrary strings:
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 27
+
+ \note This function implements Perl's quotemeta algorithm and escapes with
+ a backslash all characters in \a str, except for the characters in the
+ \c{[A-Z]}, \c{[a-z]} and \c{[0-9]} ranges, as well as the underscore
+ (\c{_}) character. The only difference with Perl is that a literal NUL
+ inside \a str is escaped with the sequence \c{"\\\\0"} (backslash +
+ \c{'0'}), instead of \c{"\\\\\\0"} (backslash + \c{NUL}).
+*/
+QString QRegularExpression::escape(const QString &str)
+{
+ QString result;
+ const int count = str.size();
+ result.reserve(count * 2);
+
+ // everything but [a-zA-Z0-9_] gets escaped,
+ // cf. perldoc -f quotemeta
+ for (int i = 0; i < count; ++i) {
+ const QChar current = str.at(i);
+
+ if (current == QChar::Null) {
+ // unlike Perl, a literal NUL must be escaped with
+ // "\\0" (backslash + 0) and not "\\\0" (backslash + NUL),
+ // because pcre16_compile uses a NUL-terminated string
+ result.append(QLatin1Char('\\'));
+ result.append(QLatin1Char('0'));
+ } else if ( (current < QLatin1Char('a') || current > QLatin1Char('z')) &&
+ (current < QLatin1Char('A') || current > QLatin1Char('Z')) &&
+ (current < QLatin1Char('0') || current > QLatin1Char('9')) &&
+ current != QLatin1Char('_') )
+ {
+ result.append(QLatin1Char('\\'));
+ result.append(current);
+ if (current.isHighSurrogate() && i < (count - 1))
+ result.append(str.at(++i));
+ } else {
+ result.append(current);
+ }
+ }
+
+ result.squeeze();
+ return result;
+}
+
+/*!
+ Destroys the match result.
+*/
+QRegularExpressionMatch::~QRegularExpressionMatch()
+{
+}
+
+/*!
+ Constructs a match result by copying the result of the given \a match.
+
+ \sa operator=()
+*/
+QRegularExpressionMatch::QRegularExpressionMatch(const QRegularExpressionMatch &match)
+ : d(match.d)
+{
+}
+
+/*!
+ Assigns the match result \a match to this object, and returns a reference
+ to the copy.
+*/
+QRegularExpressionMatch &QRegularExpressionMatch::operator=(const QRegularExpressionMatch &match)
+{
+ d = match.d;
+ return *this;
+}
+
+/*!
+ \fn void QRegularExpressionMatch::swap(QRegularExpressionMatch &other)
+
+ Swaps the match result \a other with this match result. This
+ operation is very fast and never fails.
+*/
+
+/*!
+ \internal
+*/
+QRegularExpressionMatch::QRegularExpressionMatch(QRegularExpressionMatchPrivate &dd)
+ : d(&dd)
+{
+}
+
+/*!
+ Returns the QRegularExpression object whose match() function returned this
+ object.
+
+ \sa QRegularExpression::match(), matchType(), matchOptions()
+*/
+QRegularExpression QRegularExpressionMatch::regularExpression() const
+{
+ return d->regularExpression;
+}
+
+
+/*!
+ Returns the match type that was used to get this QRegularExpressionMatch
+ object, that is, the match type that was passed to
+ QRegularExpression::match() or QRegularExpression::globalMatch().
+
+ \sa QRegularExpression::match(), regularExpression(), matchOptions()
+*/
+QRegularExpression::MatchType QRegularExpressionMatch::matchType() const
+{
+ return d->matchType;
+}
+
+/*!
+ Returns the match options that were used to get this
+ QRegularExpressionMatch object, that is, the match options that were passed
+ to QRegularExpression::match() or QRegularExpression::globalMatch().
+
+ \sa QRegularExpression::match(), regularExpression(), matchType()
+*/
+QRegularExpression::MatchOptions QRegularExpressionMatch::matchOptions() const
+{
+ return d->matchOptions;
+}
+
+/*!
+ Returns the index of the last capturing group that captured something,
+ including the implicit capturing group 0. This can be used to extract all
+ the substrings that were captured:
+
+ \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 28
+
+ Note that some of the capturing groups with an index less than
+ lastCapturedIndex() could have not matched, and therefore captured nothing.
+
+ If the regular expression did not match, this function returns -1.
+
+ \sa captured(), capturedStart(), capturedEnd(), capturedLength()
+*/
+int QRegularExpressionMatch::lastCapturedIndex() const
+{
+ return d->capturedCount - 1;
+}
+
+/*!
+ Returns the substring captured by the \a nth capturing group. If the \a nth
+ capturing group did not capture a string or doesn't exist, returns a null
+ QString.
+
+ \sa capturedRef(), lastCapturedIndex(), capturedStart(), capturedEnd(),
+ capturedLength(), QString::isNull()
+*/
+QString QRegularExpressionMatch::captured(int nth) const
+{
+ if (nth < 0 || nth > lastCapturedIndex())
+ return QString();
+
+ int start = capturedStart(nth);
+
+ if (start == -1) // didn't capture
+ return QString();
+
+ return d->subject.mid(start, capturedLength(nth));
+}
+
+/*!
+ Returns a reference to the substring captured by the \a nth capturing group.
+ If the \a nth capturing group did not capture a string or doesn't exist,
+ returns a null QStringRef.
+
+ \sa captured(), lastCapturedIndex(), capturedStart(), capturedEnd(),
+ capturedLength(), QStringRef::isNull()
+*/
+QStringRef QRegularExpressionMatch::capturedRef(int nth) const
+{
+ if (nth < 0 || nth > lastCapturedIndex())
+ return QStringRef();
+
+ int start = capturedStart(nth);
+
+ if (start == -1) // didn't capture
+ return QStringRef();
+
+ return d->subject.midRef(start, capturedLength(nth));
+}
+
+/*!
+ Returns the substring captured by the capturing group named \a name. If the
+ capturing group named \a name did not capture a string or doesn't exist,
+ returns a null QString.
+
+ \sa capturedRef(), capturedStart(), capturedEnd(), capturedLength(),
+ QString::isNull()
+*/
+QString QRegularExpressionMatch::captured(const QString &name) const
+{
+ if (name.isEmpty()) {
+ qWarning("QRegularExpressionMatch::captured: empty capturing group name passed");
+ return QString();
+ }
+ int nth = d->regularExpression.d->captureIndexForName(name);
+ if (nth == -1)
+ return QString();
+ return captured(nth);
+}
+
+/*!
+ Returns a reference to the string captured by the capturing group named \a
+ name. If the capturing group named \a name did not capture a string or
+ doesn't exist, returns a null QStringRef.
+
+ \sa captured(), capturedStart(), capturedEnd(), capturedLength(),
+ QStringRef::isNull()
+*/
+QStringRef QRegularExpressionMatch::capturedRef(const QString &name) const
+{
+ if (name.isEmpty()) {
+ qWarning("QRegularExpressionMatch::capturedRef: empty capturing group name passed");
+ return QStringRef();
+ }
+ int nth = d->regularExpression.d->captureIndexForName(name);
+ if (nth == -1)
+ return QStringRef();
+ return capturedRef(nth);
+}
+
+/*!
+ Returns a list of all strings captured by capturing groups, in the order
+ the groups themselves appear in the pattern string.
+*/
+QStringList QRegularExpressionMatch::capturedTexts() const
+{
+ QStringList texts;
+ for (int i = 0; i <= lastCapturedIndex(); ++i)
+ texts << captured(i);
+ return texts;
+}
+
+/*!
+ Returns the offset inside the subject string corresponding to the
+ starting position of the substring captured by the \a nth capturing group.
+ If the \a nth capturing group did not capture a string or doesn't exist,
+ returns -1.
+
+ \sa capturedEnd(), capturedLength(), captured()
+*/
+int QRegularExpressionMatch::capturedStart(int nth) const
+{
+ if (nth < 0 || nth > lastCapturedIndex())
+ return -1;
+
+ return d->capturedOffsets.at(nth * 2);
+}
+
+/*!
+ Returns the length of the substring captured by the \a nth capturing group.
+
+ \note This function returns 0 if the \a nth capturing group did not capture
+ a string or doesn't exist.
+
+ \sa capturedStart(), capturedEnd(), captured()
+*/
+int QRegularExpressionMatch::capturedLength(int nth) const
+{
+ // bound checking performed by these two functions
+ return capturedEnd(nth) - capturedStart(nth);
+}
+
+/*!
+ Returns the offset inside the subject string immediately after the ending
+ position of the substring captured by the \a nth capturing group. If the \a
+ nth capturing group did not capture a string or doesn't exist, returns -1.
+
+ \sa capturedStart(), capturedLength(), captured()
+*/
+int QRegularExpressionMatch::capturedEnd(int nth) const
+{
+ if (nth < 0 || nth > lastCapturedIndex())
+ return -1;
+
+ return d->capturedOffsets.at(nth * 2 + 1);
+}
+
+/*!
+ Returns the offset inside the subject string corresponding to the starting
+ position of the substring captured by the capturing group named \a name.
+ If the capturing group named \a name did not capture a string or doesn't
+ exist, returns -1.
+
+ \sa capturedEnd(), capturedLength(), captured()
+*/
+int QRegularExpressionMatch::capturedStart(const QString &name) const
+{
+ if (name.isEmpty()) {
+ qWarning("QRegularExpressionMatch::capturedStart: empty capturing group name passed");
+ return -1;
+ }
+ int nth = d->regularExpression.d->captureIndexForName(name);
+ if (nth == -1)
+ return -1;
+ return capturedStart(nth);
+}
+
+/*!
+ Returns the offset inside the subject string corresponding to the starting
+ position of the substring captured by the capturing group named \a name.
+
+ \note This function returns 0 if the capturing group named \a name did not
+ capture a string or doesn't exist.
+
+ \sa capturedStart(), capturedEnd(), captured()
+*/
+int QRegularExpressionMatch::capturedLength(const QString &name) const
+{
+ if (name.isEmpty()) {
+ qWarning("QRegularExpressionMatch::capturedLength: empty capturing group name passed");
+ return 0;
+ }
+ int nth = d->regularExpression.d->captureIndexForName(name);
+ if (nth == -1)
+ return 0;
+ return capturedLength(nth);
+}
+
+/*!
+ Returns the offset inside the subject string immediately after the ending
+ position of the substring captured by the capturing group named \a name. If
+ the capturing group named \a name did not capture a string or doesn't
+ exist, returns -1.
+
+ \sa capturedStart(), capturedLength(), captured()
+*/
+int QRegularExpressionMatch::capturedEnd(const QString &name) const
+{
+ if (name.isEmpty()) {
+ qWarning("QRegularExpressionMatch::capturedEnd: empty capturing group name passed");
+ return -1;
+ }
+ int nth = d->regularExpression.d->captureIndexForName(name);
+ if (nth == -1)
+ return -1;
+ return capturedEnd(nth);
+}
+
+/*!
+ Returns true if the regular expression matched against the subject string,
+ or false otherwise.
+
+ \sa QRegularExpression::match(), hasPartialMatch()
+*/
+bool QRegularExpressionMatch::hasMatch() const
+{
+ return d->hasMatch;
+}
+
+/*!
+ Returns true if the regular expression partially matched against the
+ subject string, or false otherwise.
+
+ \note Only a match that explicitely used the one of the partial match types
+ can yield a partial match. Still, if such a match succeeds totally, this
+ function will return false, while hasMatch() will return true.
+
+ \sa QRegularExpression::match(), QRegularExpression::MatchType, hasMatch()
+*/
+bool QRegularExpressionMatch::hasPartialMatch() const
+{
+ return d->hasPartialMatch;
+}
+
+/*!
+ Returns true if the match object was obtained as a result from the
+ QRegularExpression::match() function invoked on a valid QRegularExpression
+ object; returns false if the QRegularExpression was invalid.
+
+ \sa QRegularExpression::match(), QRegularExpression::isValid()
+*/
+bool QRegularExpressionMatch::isValid() const
+{
+ return d->isValid;
+}
+
+/*!
+ \internal
+*/
+QRegularExpressionMatchIterator::QRegularExpressionMatchIterator(QRegularExpressionMatchIteratorPrivate &dd)
+ : d(&dd)
+{
+}
+
+/*!
+ Destroys the QRegularExpressionMatchIterator object.
+*/
+QRegularExpressionMatchIterator::~QRegularExpressionMatchIterator()
+{
+}
+
+/*!
+ Constructs a QRegularExpressionMatchIterator object as a copy of \a
+ iterator.
+
+ \sa operator=()
+*/
+QRegularExpressionMatchIterator::QRegularExpressionMatchIterator(const QRegularExpressionMatchIterator &iterator)
+ : d(iterator.d)
+{
+}
+
+/*!
+ Assigns the iterator \a iterator to this object, and returns a reference to
+ the copy.
+*/
+QRegularExpressionMatchIterator &QRegularExpressionMatchIterator::operator=(const QRegularExpressionMatchIterator &iterator)
+{
+ d = iterator.d;
+ return *this;
+}
+
+/*!
+ \fn void QRegularExpressionMatchIterator::swap(QRegularExpressionMatchIterator &other)
+
+ Swaps the iterator \a other with this iterator object. This operation is
+ very fast and never fails.
+*/
+
+/*!
+ Returns true if the iterator object was obtained as a result from the
+ QRegularExpression::globalMatch() function invoked on a valid
+ QRegularExpression object; returns false if the QRegularExpression was
+ invalid.
+
+ \sa QRegularExpression::globalMatch(), QRegularExpression::isValid()
+*/
+bool QRegularExpressionMatchIterator::isValid() const
+{
+ return d->next.isValid();
+}
+
+/*!
+ Returns true if there is at least one match result ahead of the iterator;
+ otherwise it returns false.
+
+ \sa next()
+*/
+bool QRegularExpressionMatchIterator::hasNext() const
+{
+ return d->hasNext();
+}
+
+/*!
+ Returns the next match result without moving the iterator.
+
+ \note Calling this function when the iterator is at the end of the result
+ set leads to undefined results.
+*/
+QRegularExpressionMatch QRegularExpressionMatchIterator::peekNext() const
+{
+ if (!hasNext())
+ qWarning("QRegularExpressionMatchIterator::peekNext() called on an iterator already at end");
+
+ return d->next;
+}
+
+/*!
+ Returns the next match result and advances the iterator by one position.
+
+ \note Calling this function when the iterator is at the end of the result
+ set leads to undefined results.
+*/
+QRegularExpressionMatch QRegularExpressionMatchIterator::next()
+{
+ if (!hasNext()) {
+ qWarning("QRegularExpressionMatchIterator::next() called on an iterator already at end");
+ return d->next;
+ }
+
+ QRegularExpressionMatch current = d->next;
+ d->next = d->next.d.constData()->nextMatch();
+ return current;
+}
+
+/*!
+ Returns the QRegularExpression object whose globalMatch() function returned
+ this object.
+
+ \sa QRegularExpression::globalMatch(), matchType(), matchOptions()
+*/
+QRegularExpression QRegularExpressionMatchIterator::regularExpression() const
+{
+ return d->regularExpression;
+}
+
+/*!
+ Returns the match type that was used to get this
+ QRegularExpressionMatchIterator object, that is, the match type that was
+ passed to QRegularExpression::globalMatch().
+
+ \sa QRegularExpression::globalMatch(), regularExpression(), matchOptions()
+*/
+QRegularExpression::MatchType QRegularExpressionMatchIterator::matchType() const
+{
+ return d->matchType;
+}
+
+/*!
+ Returns the match options that were used to get this
+ QRegularExpressionMatchIterator object, that is, the match options that
+ were passed to QRegularExpression::globalMatch().
+
+ \sa QRegularExpression::globalMatch(), regularExpression(), matchType()
+*/
+QRegularExpression::MatchOptions QRegularExpressionMatchIterator::matchOptions() const
+{
+ return d->matchOptions;
+}
+
+#ifndef QT_NO_DATASTREAM
+/*!
+ \relates QRegularExpression
+
+ Writes the regular expression \a re to stream \a out.
+
+ \sa {Serializing Qt Data Types}
+*/
+QDataStream &operator<<(QDataStream &out, const QRegularExpression &re)
+{
+ out << re.pattern() << quint32(re.patternOptions());
+ return out;
+}
+
+/*!
+ \relates QRegularExpression
+
+ Reads a regular expression from stream \a in into \a re.
+
+ \sa {Serializing Qt Data Types}
+*/
+QDataStream &operator>>(QDataStream &in, QRegularExpression &re)
+{
+ QString pattern;
+ quint32 patternOptions;
+ in >> pattern >> patternOptions;
+ re.setPattern(pattern);
+ re.setPatternOptions(QRegularExpression::PatternOptions(patternOptions));
+ return in;
+}
+#endif
+
+#ifndef QT_NO_DEBUG_STREAM
+/*!
+ \relates QRegularExpression
+
+ Writes the regular expression \a re into the debug object \a debug for
+ debugging purposes.
+
+ \sa {Debugging Techniques}
+*/
+QDebug operator<<(QDebug debug, const QRegularExpression &re)
+{
+ debug.nospace() << "QRegularExpression(" << re.pattern() << ", " << re.patternOptions() << ")";
+ return debug.space();
+}
+
+/*!
+ \relates QRegularExpressionMatch
+
+ Writes the match object \a match into the debug object \a debug for
+ debugging purposes.
+
+ \sa {Debugging Techniques}
+*/
+QDebug operator<<(QDebug debug, const QRegularExpressionMatch &match)
+{
+ debug.nospace() << "QRegularExpressionMatch(";
+
+ if (!match.isValid()) {
+ debug << "Invalid)";
+ return debug.space();
+ }
+
+ debug << "Valid";
+
+ if (match.hasMatch()) {
+ debug << ", has match: ";
+ for (int i = 0; i <= match.lastCapturedIndex(); ++i) {
+ debug << i
+ << ":(" << match.capturedStart(i) << ", " << match.capturedEnd(i)
+ << ", " << match.captured(i) << ")";
+ if (i < match.lastCapturedIndex())
+ debug << ", ";
+ }
+ } else if (match.hasPartialMatch()) {
+ debug << ", has partial match: ("
+ << match.capturedStart(0) << ", "
+ << match.capturedEnd(0) << ", "
+ << match.captured(0) << ")";
+ } else {
+ debug << ", no match";
+ }
+
+ debug << ")";
+
+ return debug.space();
+}
+#endif
+
+QT_END_NAMESPACE
diff --git a/src/corelib/tools/qregularexpression.h b/src/corelib/tools/qregularexpression.h
new file mode 100644
index 0000000000..c9bcb1e7ba
--- /dev/null
+++ b/src/corelib/tools/qregularexpression.h
@@ -0,0 +1,245 @@
+/****************************************************************************
+**
+** Copyright (C) 2012 Giuseppe D'Angelo <dangelog@gmail.com>.
+** Contact: http://www.qt-project.org/
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** GNU Lesser General Public License Usage
+** This file may be used under the terms of the GNU Lesser General Public
+** License version 2.1 as published by the Free Software Foundation and
+** appearing in the file LICENSE.LGPL included in the packaging of this
+** file. Please review the following information to ensure the GNU Lesser
+** General Public License version 2.1 requirements will be met:
+** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU General
+** Public License version 3.0 as published by the Free Software Foundation
+** and appearing in the file LICENSE.GPL included in the packaging of this
+** file. Please review the following information to ensure the GNU General
+** Public License version 3.0 requirements will be met:
+** http://www.gnu.org/copyleft/gpl.html.
+**
+** Other Usage
+** Alternatively, this file may be used in accordance with the terms and
+** conditions contained in a signed written agreement between you and Nokia.
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#ifndef QREGULAREXPRESSION_H
+#define QREGULAREXPRESSION_H
+
+#ifndef QT_NO_REGEXP
+
+#include <QtCore/qstring.h>
+#include <QtCore/qshareddata.h>
+#include <QtCore/qvariant.h>
+
+QT_BEGIN_HEADER
+
+QT_BEGIN_NAMESPACE
+
+class QRegularExpressionMatch;
+class QRegularExpressionMatchIterator;
+struct QRegularExpressionPrivate;
+
+class Q_CORE_EXPORT QRegularExpression
+{
+public:
+ enum PatternOption {
+ NoPatternOption = 0x0000,
+ CaseInsensitiveOption = 0x0001,
+ DotMatchesEverythingOption = 0x0002,
+ MultilineOption = 0x0004,
+ ExtendedPatternSyntaxOption = 0x0008,
+ InvertedGreedinessOption = 0x0010,
+ DontCaptureOption = 0x0020,
+ UseUnicodePropertiesOption = 0x0040
+ };
+ Q_DECLARE_FLAGS(PatternOptions, PatternOption)
+
+ PatternOptions patternOptions() const;
+ void setPatternOptions(PatternOptions options);
+
+ QRegularExpression();
+ explicit QRegularExpression(const QString &pattern, PatternOptions options = NoPatternOption);
+ QRegularExpression(const QRegularExpression &re);
+ ~QRegularExpression();
+ QRegularExpression &operator=(const QRegularExpression &re);
+
+#ifdef Q_COMPILER_RVALUE_REFS
+ inline QRegularExpression &operator=(QRegularExpression &&re)
+ { d.swap(re.d); return *this; }
+#endif
+
+ inline void swap(QRegularExpression &re) { d.swap(re.d); }
+
+ QString pattern() const;
+ void setPattern(const QString &pattern);
+
+ bool isValid() const;
+ int patternErrorOffset() const;
+ QString errorString() const;
+
+ enum MatchType {
+ NormalMatch = 0,
+ PartialPreferCompleteMatch,
+ PartialPreferFirstMatch
+ };
+
+ enum MatchOption {
+ NoMatchOption = 0x0000,
+ AnchoredMatchOption = 0x0001
+ };
+ Q_DECLARE_FLAGS(MatchOptions, MatchOption)
+
+ QRegularExpressionMatch match(const QString &subject,
+ int offset = 0,
+ MatchType matchType = NormalMatch,
+ MatchOptions matchOptions = NoMatchOption) const;
+
+ QRegularExpressionMatchIterator globalMatch(const QString &subject,
+ int offset = 0,
+ MatchType matchType = NormalMatch,
+ MatchOptions matchOptions = NoMatchOption) const;
+
+ static QString escape(const QString &str);
+
+ bool operator==(const QRegularExpression &re) const;
+ inline bool operator!=(const QRegularExpression &re) const { return !operator==(re); }
+
+private:
+ friend struct QRegularExpressionPrivate;
+ friend class QRegularExpressionMatch;
+ friend struct QRegularExpressionMatchPrivate;
+ friend class QRegularExpressionMatchIterator;
+
+ QRegularExpression(QRegularExpressionPrivate &dd);
+ QExplicitlySharedDataPointer<QRegularExpressionPrivate> d;
+};
+
+Q_DECLARE_OPERATORS_FOR_FLAGS(QRegularExpression::PatternOptions)
+Q_DECLARE_OPERATORS_FOR_FLAGS(QRegularExpression::MatchOptions)
+Q_DECLARE_TYPEINFO(QRegularExpression, Q_MOVABLE_TYPE);
+
+#ifndef QT_NO_DATASTREAM
+Q_CORE_EXPORT QDataStream &operator<<(QDataStream &out, const QRegularExpression &re);
+Q_CORE_EXPORT QDataStream &operator>>(QDataStream &in, QRegularExpression &re);
+#endif
+
+#ifndef QT_NO_DEBUG_STREAM
+Q_CORE_EXPORT QDebug operator<<(QDebug debug, const QRegularExpression &re);
+#endif
+
+struct QRegularExpressionMatchPrivate;
+
+class Q_CORE_EXPORT QRegularExpressionMatch
+{
+public:
+ ~QRegularExpressionMatch();
+ QRegularExpressionMatch(const QRegularExpressionMatch &match);
+ QRegularExpressionMatch &operator=(const QRegularExpressionMatch &match);
+
+#ifdef Q_COMPILER_RVALUE_REFS
+ inline QRegularExpressionMatch &operator=(QRegularExpressionMatch &&match)
+ { d.swap(match.d); return *this; }
+#endif
+ inline void swap(QRegularExpressionMatch &match) { d.swap(match.d); }
+
+ QRegularExpression regularExpression() const;
+ QRegularExpression::MatchType matchType() const;
+ QRegularExpression::MatchOptions matchOptions() const;
+
+ bool hasMatch() const;
+ bool hasPartialMatch() const;
+
+ bool isValid() const;
+
+ int lastCapturedIndex() const;
+
+ QString captured(int nth = 0) const;
+ QStringRef capturedRef(int nth = 0) const;
+
+ QString captured(const QString &name) const;
+ QStringRef capturedRef(const QString &name) const;
+
+ QStringList capturedTexts() const;
+
+ int capturedStart(int nth = 0) const;
+ int capturedLength(int nth = 0) const;
+ int capturedEnd(int nth = 0) const;
+
+ int capturedStart(const QString &name) const;
+ int capturedLength(const QString &name) const;
+ int capturedEnd(const QString &name) const;
+
+private:
+ friend class QRegularExpression;
+ friend struct QRegularExpressionMatchPrivate;
+ friend class QRegularExpressionMatchIterator;
+
+ QRegularExpressionMatch(QRegularExpressionMatchPrivate &dd);
+ QSharedDataPointer<QRegularExpressionMatchPrivate> d;
+};
+
+Q_DECLARE_TYPEINFO(QRegularExpressionMatch, Q_MOVABLE_TYPE);
+
+#ifndef QT_NO_DEBUG_STREAM
+Q_CORE_EXPORT QDebug operator<<(QDebug debug, const QRegularExpressionMatch &match);
+#endif
+
+struct QRegularExpressionMatchIteratorPrivate;
+
+class Q_CORE_EXPORT QRegularExpressionMatchIterator
+{
+public:
+ ~QRegularExpressionMatchIterator();
+ QRegularExpressionMatchIterator(const QRegularExpressionMatchIterator &iterator);
+ QRegularExpressionMatchIterator &operator=(const QRegularExpressionMatchIterator &iterator);
+#ifdef Q_COMPILER_RVALUE_REFS
+ inline QRegularExpressionMatchIterator &operator=(QRegularExpressionMatchIterator &&iterator)
+ { d.swap(iterator.d); return *this; }
+#endif
+ void swap(QRegularExpressionMatchIterator &iterator) { d.swap(iterator.d); }
+
+ bool isValid() const;
+
+ bool hasNext() const;
+ QRegularExpressionMatch next();
+ QRegularExpressionMatch peekNext() const;
+
+ QRegularExpression regularExpression() const;
+ QRegularExpression::MatchType matchType() const;
+ QRegularExpression::MatchOptions matchOptions() const;
+
+private:
+ friend class QRegularExpression;
+
+ QRegularExpressionMatchIterator(QRegularExpressionMatchIteratorPrivate &dd);
+ QSharedDataPointer<QRegularExpressionMatchIteratorPrivate> d;
+};
+
+Q_DECLARE_TYPEINFO(QRegularExpressionMatchIterator, Q_MOVABLE_TYPE);
+
+QT_END_NAMESPACE
+
+Q_DECLARE_METATYPE(QRegularExpression)
+
+QT_END_HEADER
+
+#endif // QT_NO_REGEXP
+
+#endif // QREGULAREXPRESSION_H
diff --git a/src/corelib/tools/tools.pri b/src/corelib/tools/tools.pri
index 3740975b12..250789a969 100644
--- a/src/corelib/tools/tools.pri
+++ b/src/corelib/tools/tools.pri
@@ -30,6 +30,7 @@ HEADERS += \
tools/qqueue.h \
tools/qrect.h \
tools/qregexp.h \
+ tools/qregularexpression.h \
tools/qringbuffer_p.h \
tools/qrefcount.h \
tools/qscopedpointer.h \
@@ -75,6 +76,7 @@ SOURCES += \
tools/qcontiguouscache.cpp \
tools/qrect.cpp \
tools/qregexp.cpp \
+ tools/qregularexpression.cpp \
tools/qrefcount.cpp \
tools/qshareddata.cpp \
tools/qsharedpointer.cpp \
@@ -105,6 +107,12 @@ contains(QT_CONFIG,icu) {
DEFINES += QT_USE_ICU
}
+pcre {
+ include($$PWD/../../3rdparty/pcre.pri)
+} else {
+ LIBS_PRIVATE += -lpcre16
+}
+
DEFINES += HB_EXPORT=Q_CORE_EXPORT
INCLUDEPATH += ../3rdparty/harfbuzz/src
HEADERS += ../3rdparty/harfbuzz/src/harfbuzz.h
diff --git a/tests/auto/corelib/tools/qregularexpression/.gitignore b/tests/auto/corelib/tools/qregularexpression/.gitignore
new file mode 100644
index 0000000000..4a224d26fc
--- /dev/null
+++ b/tests/auto/corelib/tools/qregularexpression/.gitignore
@@ -0,0 +1 @@
+tst_qregularexpression
diff --git a/tests/auto/corelib/tools/qregularexpression/qregularexpression.pro b/tests/auto/corelib/tools/qregularexpression/qregularexpression.pro
new file mode 100644
index 0000000000..bce6643be6
--- /dev/null
+++ b/tests/auto/corelib/tools/qregularexpression/qregularexpression.pro
@@ -0,0 +1,4 @@
+CONFIG += testcase parallel_test
+TARGET = tst_qregularexpression
+QT = core testlib
+SOURCES = tst_qregularexpression.cpp
diff --git a/tests/auto/corelib/tools/qregularexpression/tst_qregularexpression.cpp b/tests/auto/corelib/tools/qregularexpression/tst_qregularexpression.cpp
new file mode 100644
index 0000000000..93cb6823cd
--- /dev/null
+++ b/tests/auto/corelib/tools/qregularexpression/tst_qregularexpression.cpp
@@ -0,0 +1,1150 @@
+/****************************************************************************
+**
+** Copyright (C) 2012 Giuseppe D'Angelo <dangelog@gmail.com>.
+** Contact: http://www.qt-project.org/
+**
+** This file is part of the test suite of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** GNU Lesser General Public License Usage
+** This file may be used under the terms of the GNU Lesser General Public
+** License version 2.1 as published by the Free Software Foundation and
+** appearing in the file LICENSE.LGPL included in the packaging of this
+** file. Please review the following information to ensure the GNU Lesser
+** General Public License version 2.1 requirements will be met:
+** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU General
+** Public License version 3.0 as published by the Free Software Foundation
+** and appearing in the file LICENSE.GPL included in the packaging of this
+** file. Please review the following information to ensure the GNU General
+** Public License version 3.0 requirements will be met:
+** http://www.gnu.org/copyleft/gpl.html.
+**
+** Other Usage
+** Alternatively, this file may be used in accordance with the terms and
+** conditions contained in a signed written agreement between you and Nokia.
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include <QtTest/QtTest>
+#include <qregularexpression.h>
+#include <qstring.h>
+#include <qlist.h>
+#include <qstringlist.h>
+#include <qhash.h>
+
+Q_DECLARE_METATYPE(QRegularExpression::PatternOptions)
+Q_DECLARE_METATYPE(QRegularExpression::MatchType)
+Q_DECLARE_METATYPE(QRegularExpression::MatchOptions)
+
+class tst_QRegularExpression : public QObject
+{
+ Q_OBJECT
+
+private slots:
+ void gettersSetters_data();
+ void gettersSetters();
+ void escape_data();
+ void escape();
+ void validity_data();
+ void validity();
+ void patternOptions_data();
+ void patternOptions();
+ void normalMatch_data();
+ void normalMatch();
+ void partialMatch_data();
+ void partialMatch();
+ void globalMatch_data();
+ void globalMatch();
+ void serialize_data();
+ void serialize();
+
+private:
+ void provideRegularExpressions();
+};
+
+struct Match
+{
+ Match()
+ {
+ clear();
+ }
+
+ void clear()
+ {
+ isValid = false;
+ hasMatch = false;
+ hasPartialMatch = false;
+ captured.clear();
+ namedCaptured.clear();
+ }
+
+ bool isValid;
+ bool hasMatch;
+ bool hasPartialMatch;
+ QStringList captured;
+ QHash<QString, QString> namedCaptured;
+};
+
+Q_DECLARE_METATYPE(Match)
+Q_DECLARE_METATYPE(QList<Match>)
+
+bool operator==(const QRegularExpressionMatch &rem, const Match &m)
+{
+ if (rem.isValid() != m.isValid)
+ return false;
+ if (!rem.isValid())
+ return true;
+ if ((rem.hasMatch() != m.hasMatch) || (rem.hasPartialMatch() != m.hasPartialMatch))
+ return false;
+ if (rem.hasMatch() || rem.hasPartialMatch()) {
+ if (rem.lastCapturedIndex() != (m.captured.size() - 1))
+ return false;
+ for (int i = 0; i <= rem.lastCapturedIndex(); ++i) {
+ QString remCaptured = rem.captured(i);
+ QString mCaptured = m.captured.at(i);
+ if (remCaptured != mCaptured
+ || remCaptured.isNull() != mCaptured.isNull()
+ || remCaptured.isEmpty() != mCaptured.isEmpty()) {
+ return false;
+ }
+ }
+
+ Q_FOREACH (const QString &name, m.namedCaptured.keys()) {
+ if (rem.captured(name) != m.namedCaptured.value(name))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool operator==(const Match &m, const QRegularExpressionMatch &rem)
+{
+ return operator==(rem, m);
+}
+
+bool operator!=(const QRegularExpressionMatch &rem, const Match &m)
+{
+ return !operator==(rem, m);
+}
+
+bool operator!=(const Match &m, const QRegularExpressionMatch &rem)
+{
+ return !operator==(m, rem);
+}
+
+
+bool operator==(const QRegularExpressionMatchIterator &iterator, const QList<Match> &expectedMatchList)
+{
+ QRegularExpressionMatchIterator i = iterator;
+ if (i.isValid() != (!expectedMatchList.isEmpty()))
+ return false;
+
+ foreach (const Match &expectedMatch, expectedMatchList)
+ {
+ if (!i.hasNext())
+ return false;
+
+ QRegularExpressionMatch match = i.next();
+ if (match != expectedMatch)
+ return false;
+ }
+
+ if (i.hasNext())
+ return false;
+
+ return true;
+}
+
+bool operator==(const QList<Match> &expectedMatchList, const QRegularExpressionMatchIterator &iterator)
+{
+ return operator==(iterator, expectedMatchList);
+}
+
+bool operator!=(const QRegularExpressionMatchIterator &iterator, const QList<Match> &expectedMatchList)
+{
+ return !operator==(iterator, expectedMatchList);
+}
+
+bool operator!=(const QList<Match> &expectedMatchList, const QRegularExpressionMatchIterator &iterator)
+{
+ return !operator==(expectedMatchList, iterator);
+}
+
+void consistencyCheck(const QRegularExpressionMatch &match)
+{
+ QVERIFY(match.isValid() == match.regularExpression().isValid());
+
+ if (match.isValid()) {
+ QVERIFY(!(match.hasMatch() && match.hasPartialMatch()));
+
+ if (match.hasMatch() || match.hasPartialMatch()) {
+ QVERIFY(match.lastCapturedIndex() >= 0);
+ if (match.hasPartialMatch())
+ QVERIFY(match.lastCapturedIndex() == 0);
+
+ for (int i = 0; i <= match.lastCapturedIndex(); ++i) {
+ int startPos = match.capturedStart(i);
+ int endPos = match.capturedEnd(i);
+ int length = match.capturedLength(i);
+ QString captured = match.captured(i);
+ QStringRef capturedRef = match.capturedRef(i);
+
+ if (!captured.isNull()) {
+ QVERIFY(startPos >= 0);
+ QVERIFY(endPos >= 0);
+ QVERIFY(length >= 0);
+ QVERIFY(endPos >= startPos);
+ QVERIFY((endPos - startPos) == length);
+ QVERIFY(captured == capturedRef);
+ } else {
+ QVERIFY(startPos == -1);
+ QVERIFY(endPos == -1);
+ QVERIFY((endPos - startPos) == length);
+ QVERIFY(capturedRef.isNull());
+ }
+ }
+ }
+ } else {
+ QVERIFY(!match.hasMatch());
+ QVERIFY(!match.hasPartialMatch());
+ QVERIFY(match.captured(0).isNull());
+ QVERIFY(match.capturedStart(0) == -1);
+ QVERIFY(match.capturedEnd(0) == -1);
+ QVERIFY(match.capturedLength(0) == 0);
+ }
+}
+
+void consistencyCheck(const QRegularExpressionMatchIterator &iterator)
+{
+ QRegularExpressionMatchIterator i(iterator); // make a copy, we modify it
+ if (i.isValid()) {
+ while (i.hasNext()) {
+ QRegularExpressionMatch peeked = i.peekNext();
+ QRegularExpressionMatch match = i.next();
+ consistencyCheck(peeked);
+ consistencyCheck(match);
+ QVERIFY(match.isValid());
+ QVERIFY(match.hasMatch() || match.hasPartialMatch());
+ QCOMPARE(i.regularExpression(), match.regularExpression());
+ QCOMPARE(i.matchOptions(), match.matchOptions());
+ QCOMPARE(i.matchType(), match.matchType());
+
+ QVERIFY(peeked.isValid() == match.isValid());
+ QVERIFY(peeked.hasMatch() == match.hasMatch());
+ QVERIFY(peeked.hasPartialMatch() == match.hasPartialMatch());
+ QVERIFY(peeked.lastCapturedIndex() == match.lastCapturedIndex());
+ for (int i = 0; i <= peeked.lastCapturedIndex(); ++i) {
+ QVERIFY(peeked.captured(i) == match.captured(i));
+ QVERIFY(peeked.capturedStart(i) == match.capturedStart(i));
+ QVERIFY(peeked.capturedEnd(i) == match.capturedEnd(i));
+ }
+ }
+ } else {
+ QVERIFY(!i.hasNext());
+ QRegularExpressionMatch peeked = i.peekNext();
+ QRegularExpressionMatch match = i.next();
+ consistencyCheck(peeked);
+ consistencyCheck(match);
+ QVERIFY(!match.isValid());
+ QVERIFY(!peeked.isValid());
+ }
+
+}
+
+void tst_QRegularExpression::provideRegularExpressions()
+{
+ QTest::addColumn<QString>("pattern");
+ QTest::addColumn<QRegularExpression::PatternOptions>("patternOptions");
+
+ QTest::newRow("emptynull01") << QString()
+ << QRegularExpression::PatternOptions(0);
+ QTest::newRow("emptynull02") << QString()
+ << QRegularExpression::PatternOptions(QRegularExpression::CaseInsensitiveOption
+ | QRegularExpression::DotMatchesEverythingOption
+ | QRegularExpression::MultilineOption);
+ QTest::newRow("emptynull03") << ""
+ << QRegularExpression::PatternOptions(0);
+ QTest::newRow("emptynull04") << ""
+ << QRegularExpression::PatternOptions(QRegularExpression::CaseInsensitiveOption
+ | QRegularExpression::DotMatchesEverythingOption
+ | QRegularExpression::MultilineOption);
+
+ QTest::newRow("regexp01") << "a pattern"
+ << QRegularExpression::PatternOptions(0);
+ QTest::newRow("regexp02") << "^a (.*) more complicated(?<P>pattern)$"
+ << QRegularExpression::PatternOptions(0);
+ QTest::newRow("regexp03") << "(?:a) pAttErN"
+ << QRegularExpression::PatternOptions(QRegularExpression::CaseInsensitiveOption);
+ QTest::newRow("regexp04") << "a\nmultiline\npattern"
+ << QRegularExpression::PatternOptions(QRegularExpression::MultilineOption);
+ QTest::newRow("regexp05") << "an extended # IGNOREME\npattern"
+ << QRegularExpression::PatternOptions(QRegularExpression::ExtendedPatternSyntaxOption);
+ QTest::newRow("regexp06") << "a [sS]ingleline .* match"
+ << QRegularExpression::PatternOptions(QRegularExpression::DotMatchesEverythingOption);
+ QTest::newRow("regexp07") << "multiple.*options"
+ << QRegularExpression::PatternOptions(QRegularExpression::CaseInsensitiveOption
+ | QRegularExpression::DotMatchesEverythingOption
+ | QRegularExpression::MultilineOption
+ | QRegularExpression::DontCaptureOption
+ | QRegularExpression::InvertedGreedinessOption);
+
+ QTest::newRow("unicode01") << QString::fromUtf8("^s[ome] latin-1 \xc3\x80\xc3\x88\xc3\x8c\xc3\x92\xc3\x99 chars$")
+ << QRegularExpression::PatternOptions(0);
+ QTest::newRow("unicode02") << QString::fromUtf8("^s[ome] latin-1 \xc3\x80\xc3\x88\xc3\x8c\xc3\x92\xc3\x99 chars$")
+ << QRegularExpression::PatternOptions(QRegularExpression::CaseInsensitiveOption
+ | QRegularExpression::DotMatchesEverythingOption
+ | QRegularExpression::InvertedGreedinessOption);
+ QTest::newRow("unicode03") << QString::fromUtf8("Unicode \xf0\x9d\x85\x9d \xf0\x9d\x85\x9e\xf0\x9d\x85\x9f")
+ << QRegularExpression::PatternOptions(0);
+ QTest::newRow("unicode04") << QString::fromUtf8("Unicode \xf0\x9d\x85\x9d \xf0\x9d\x85\x9e\xf0\x9d\x85\x9f")
+ << QRegularExpression::PatternOptions(QRegularExpression::CaseInsensitiveOption
+ | QRegularExpression::DotMatchesEverythingOption
+ | QRegularExpression::InvertedGreedinessOption);
+}
+
+void tst_QRegularExpression::gettersSetters_data()
+{
+ provideRegularExpressions();
+}
+
+void tst_QRegularExpression::gettersSetters()
+{
+ QFETCH(QString, pattern);
+ QFETCH(QRegularExpression::PatternOptions, patternOptions);
+ {
+ QRegularExpression re;
+ re.setPattern(pattern);
+ QCOMPARE(re.pattern(), pattern);
+ QCOMPARE(re.patternOptions(), QRegularExpression::NoPatternOption);
+ }
+ {
+ QRegularExpression re;
+ re.setPatternOptions(patternOptions);
+ QCOMPARE(re.pattern(), QString());
+ QCOMPARE(re.patternOptions(), patternOptions);
+ }
+ {
+ QRegularExpression re(pattern);
+ QCOMPARE(re.pattern(), pattern);
+ QCOMPARE(re.patternOptions(), QRegularExpression::NoPatternOption);
+ }
+ {
+ QRegularExpression re(pattern, patternOptions);
+ QCOMPARE(re.pattern(), pattern);
+ QCOMPARE(re.patternOptions(), patternOptions);
+ }
+ {
+ QRegularExpression re(pattern, patternOptions);
+ QRegularExpression re2(pattern, patternOptions);
+ QVERIFY(re == re2);
+ }
+}
+
+void tst_QRegularExpression::escape_data()
+{
+ QTest::addColumn<QString>("string");
+ QTest::addColumn<QString>("escaped");
+ QTest::newRow("escape01") << "a normal pattern"
+ << "a\\ normal\\ pattern";
+
+ QTest::newRow("escape02") << "abcdefghijklmnopqrstuvzABCDEFGHIJKLMNOPQRSTUVZ1234567890_"
+ << "abcdefghijklmnopqrstuvzABCDEFGHIJKLMNOPQRSTUVZ1234567890_";
+
+ QTest::newRow("escape03") << "^\\ba\\b.*(?<NAME>reg|exp)$"
+ << "\\^\\\\ba\\\\b\\.\\*\\(\\?\\<NAME\\>reg\\|exp\\)\\$";
+
+ QString nulString("abcXabcXXabc");
+ nulString[3] = nulString[7] = nulString[8] = QChar(0, 0);
+ QTest::newRow("NUL") << nulString
+ << "abc\\0abc\\0\\0abc";
+
+ QTest::newRow("unicode01") << QString::fromUtf8("^s[ome] latin-1 \xc3\x80\xc3\x88\xc3\x8c\xc3\x92\xc3\x99 chars$")
+ << QString::fromUtf8("\\^s\\[ome\\]\\ latin\\-1\\ \\\xc3\x80\\\xc3\x88\\\xc3\x8c\\\xc3\x92\\\xc3\x99\\ chars\\$");
+ QTest::newRow("unicode02") << QString::fromUtf8("Unicode \xf0\x9d\x85\x9d \xf0\x9d\x85\x9e\xf0\x9d\x85\x9f")
+ << QString::fromUtf8("Unicode\\ \\\xf0\x9d\x85\x9d\\ \\\xf0\x9d\x85\x9e\\\xf0\x9d\x85\x9f");
+
+ QString unicodeAndNulString = QString::fromUtf8("^\xc3\x80\xc3\x88\xc3\x8cN\xc3\x92NN\xc3\x99 chars$");
+ unicodeAndNulString[4] = unicodeAndNulString[6] = unicodeAndNulString[7] = QChar(0, 0);
+ QTest::newRow("unicode03") << unicodeAndNulString
+ << QString::fromUtf8("\\^\\\xc3\x80\\\xc3\x88\\\xc3\x8c\\0\\\xc3\x92\\0\\0\\\xc3\x99\\ chars\\$");
+}
+
+void tst_QRegularExpression::escape()
+{
+ QFETCH(QString, string);
+ QFETCH(QString, escaped);
+ QCOMPARE(QRegularExpression::escape(string), escaped);
+ QRegularExpression re(escaped);
+ QCOMPARE(re.isValid(), true);
+}
+
+void tst_QRegularExpression::validity_data()
+{
+ QTest::addColumn<QString>("pattern");
+ QTest::addColumn<bool>("validity");
+
+ QTest::newRow("valid01") << "a pattern" << true;
+ QTest::newRow("valid02") << "(a|pattern)" << true;
+ QTest::newRow("valid03") << "a [pP]attern" << true;
+ QTest::newRow("valid04") << "^(?<article>a).*(?<noun>pattern)$" << true;
+ QTest::newRow("valid05") << "a \\P{Ll}attern" << true;
+
+ QTest::newRow("invalid01") << "a pattern\\" << false;
+ QTest::newRow("invalid02") << "(a|pattern" << false;
+ QTest::newRow("invalid03") << "a \\P{BLAH}attern" << false;
+
+ QString pattern;
+ // 0xD800 (high surrogate) not followed by a low surrogate
+ pattern = "abcdef";
+ pattern[3] = QChar(0x00, 0xD8);
+ QTest::newRow("invalidUnicode01") << pattern << false;
+}
+
+void tst_QRegularExpression::validity()
+{
+ QFETCH(QString, pattern);
+ QFETCH(bool, validity);
+ QRegularExpression re(pattern);
+ QCOMPARE(re.isValid(), validity);
+ if (!validity)
+ QTest::ignoreMessage(QtWarningMsg, "QRegularExpressionPrivate::doMatch(): called on an invalid QRegularExpression object");
+ QRegularExpressionMatch match = re.match("a pattern");
+ QCOMPARE(match.isValid(), validity);
+ consistencyCheck(match);
+
+ if (!validity)
+ QTest::ignoreMessage(QtWarningMsg, "QRegularExpressionPrivate::doMatch(): called on an invalid QRegularExpression object");
+ QRegularExpressionMatchIterator iterator = re.globalMatch("a pattern");
+ QCOMPARE(iterator.isValid(), validity);
+}
+
+void tst_QRegularExpression::patternOptions_data()
+{
+ QTest::addColumn<QRegularExpression>("regexp");
+ QTest::addColumn<QString>("subject");
+ QTest::addColumn<Match>("match");
+
+ // none of these would successfully match if the respective
+ // pattern option is not set
+
+ Match m;
+
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured << QString::fromUtf8("AbC\xc3\xa0");
+ QTest::newRow("/i") << QRegularExpression(QString::fromUtf8("abc\xc3\x80"), QRegularExpression::CaseInsensitiveOption)
+ << QString::fromUtf8("AbC\xc3\xa0")
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured << "abc123\n678def";
+ QTest::newRow("/s") << QRegularExpression("\\Aabc.*def\\z", QRegularExpression::DotMatchesEverythingOption)
+ << "abc123\n678def"
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured << "jumped over";
+ QTest::newRow("/m") << QRegularExpression("^\\w+ \\w+$", QRegularExpression::MultilineOption)
+ << "the quick fox\njumped over\nthe lazy\ndog"
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured << "abc 123456";
+ QTest::newRow("/x") << QRegularExpression("\\w+ # a word\n"
+ "\\ # a space\n"
+ "\\w+ # another word",
+ QRegularExpression::ExtendedPatternSyntaxOption)
+ << "abc 123456 def"
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured << "the quick fox" << "the" << "quick fox";
+ QTest::newRow("/U") << QRegularExpression("(.+) (.+?)", QRegularExpression::InvertedGreedinessOption)
+ << "the quick fox"
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured << "the quick fox" << "quick";
+ m.namedCaptured["named"] = "quick";
+ QTest::newRow("no cap") << QRegularExpression("(\\w+) (?<named>\\w+) (\\w+)", QRegularExpression::DontCaptureOption)
+ << "the quick fox"
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured << QString::fromUtf8("abc\xc3\x80\xc3\xa0 12\xdb\xb1\xdb\xb2\xf0\x9d\x9f\x98")
+ << QString::fromUtf8("abc\xc3\x80\xc3\xa0")
+ << QString::fromUtf8("12\xdb\xb1\xdb\xb2\xf0\x9d\x9f\x98");
+ QTest::newRow("unicode properties") << QRegularExpression("(\\w+) (\\d+)", QRegularExpression::UseUnicodePropertiesOption)
+ << QString::fromUtf8("abc\xc3\x80\xc3\xa0 12\xdb\xb1\xdb\xb2\xf0\x9d\x9f\x98")
+ << m;
+}
+
+void tst_QRegularExpression::patternOptions()
+{
+ QFETCH(QRegularExpression, regexp);
+ QFETCH(QString, subject);
+ QFETCH(Match, match);
+
+ QRegularExpressionMatch m = regexp.match(subject);
+ consistencyCheck(m);
+ QVERIFY(m == match);
+}
+
+void tst_QRegularExpression::normalMatch_data()
+{
+ QTest::addColumn<QRegularExpression>("regexp");
+ QTest::addColumn<QString>("subject");
+ QTest::addColumn<int>("offset");
+ QTest::addColumn<QRegularExpression::MatchOptions>("matchOptions");
+ QTest::addColumn<Match>("match");
+
+ Match m;
+
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured << "string" << "string";
+ QTest::newRow("match01") << QRegularExpression("(\\bstring\\b)")
+ << "a string"
+ << 0
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured << "a string" << "a" << "string";
+ QTest::newRow("match02") << QRegularExpression("(\\w+) (\\w+)")
+ << "a string"
+ << 0
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured << "a string" << "a" << "string";
+ m.namedCaptured["article"] = "a";
+ m.namedCaptured["noun"] = "string";
+ QTest::newRow("match03") << QRegularExpression("(?<article>\\w+) (?<noun>\\w+)")
+ << "a string"
+ << 0
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured << " string" << QString() << "string";
+ QTest::newRow("match04") << QRegularExpression("(\\w+)? (\\w+)")
+ << " string"
+ << 0
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured << " string" << QString("") << "string";
+ QTest::newRow("match05") << QRegularExpression("(\\w*) (\\w+)")
+ << " string"
+ << 0
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured << "c123def" << "c12" << "3" << "def";
+ QTest::newRow("match06") << QRegularExpression("(\\w*)(\\d+)(\\w*)")
+ << "abc123def"
+ << 2
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured << QString("");
+ QTest::newRow("match07") << QRegularExpression("\\w*")
+ << "abc123def"
+ << 9
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured << QString("a string") << QString("a string") << QString("");
+ QTest::newRow("match08") << QRegularExpression("(.*)(.*)")
+ << "a string"
+ << 0
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured << QString("a string") << QString("") << QString("a string");
+ QTest::newRow("match09") << QRegularExpression("(.*?)(.*)")
+ << "a string"
+ << 0
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ // ***
+
+ m.clear();
+ m.isValid = true;
+ QTest::newRow("nomatch01") << QRegularExpression("\\d+")
+ << "a string"
+ << 0
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true;
+ QTest::newRow("nomatch02") << QRegularExpression("(\\w+) (\\w+)")
+ << "a string"
+ << 1
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true;
+ QTest::newRow("nomatch03") << QRegularExpression("\\w+")
+ << "abc123def"
+ << 9
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ // ***
+
+ m.clear();
+ m.isValid = true;
+ QTest::newRow("anchoredmatch01") << QRegularExpression("\\d+")
+ << "abc123def"
+ << 0
+ << QRegularExpression::MatchOptions(QRegularExpression::AnchoredMatchOption)
+ << m;
+}
+
+
+void tst_QRegularExpression::normalMatch()
+{
+ QFETCH(QRegularExpression, regexp);
+ QFETCH(QString, subject);
+ QFETCH(int, offset);
+ QFETCH(QRegularExpression::MatchOptions, matchOptions);
+ QFETCH(Match, match);
+
+ QRegularExpressionMatch m = regexp.match(subject, offset, QRegularExpression::NormalMatch, matchOptions);
+ consistencyCheck(m);
+ QVERIFY(m == match);
+}
+
+
+void tst_QRegularExpression::partialMatch_data()
+{
+ QTest::addColumn<QRegularExpression>("regexp");
+ QTest::addColumn<QString>("subject");
+ QTest::addColumn<int>("offset");
+ QTest::addColumn<QRegularExpression::MatchType>("matchType");
+ QTest::addColumn<QRegularExpression::MatchOptions>("matchOptions");
+ QTest::addColumn<Match>("match");
+
+ Match m;
+
+ m.clear();
+ m.isValid = true; m.hasPartialMatch = true;
+ m.captured << "str";
+ QTest::newRow("softmatch01") << QRegularExpression("string")
+ << "a str"
+ << 0
+ << QRegularExpression::PartialPreferCompleteMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasPartialMatch = true;
+ m.captured << " str";
+ QTest::newRow("softmatch02") << QRegularExpression("\\bstring\\b")
+ << "a str"
+ << 0
+ << QRegularExpression::PartialPreferCompleteMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasPartialMatch = true;
+ m.captured << " str";
+ QTest::newRow("softmatch03") << QRegularExpression("(\\bstring\\b)")
+ << "a str"
+ << 0
+ << QRegularExpression::PartialPreferCompleteMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasPartialMatch = true;
+ m.captured << "8 Dec 19";
+ QTest::newRow("softmatch04") << QRegularExpression("^(\\d{1,2}) (\\w{3}) (\\d{4})$")
+ << "8 Dec 19"
+ << 0
+ << QRegularExpression::PartialPreferCompleteMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured << "8 Dec 1985" << "8" << "Dec" << "1985";
+ QTest::newRow("softmatch05") << QRegularExpression("^(\\d{1,2}) (\\w{3}) (\\d{4})$")
+ << "8 Dec 1985"
+ << 0
+ << QRegularExpression::PartialPreferCompleteMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured << "def";
+ QTest::newRow("softmatch06") << QRegularExpression("abc\\w+X|def")
+ << "abcdef"
+ << 0
+ << QRegularExpression::PartialPreferCompleteMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasPartialMatch = true;
+ m.captured << "abcdef";
+ QTest::newRow("softmatch07") << QRegularExpression("abc\\w+X|defY")
+ << "abcdef"
+ << 0
+ << QRegularExpression::PartialPreferCompleteMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasPartialMatch = true;
+ m.captured << "def";
+ QTest::newRow("softmatch08") << QRegularExpression("abc\\w+X|defY")
+ << "abcdef"
+ << 1
+ << QRegularExpression::PartialPreferCompleteMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ // ***
+
+ m.clear();
+ m.isValid = true; m.hasPartialMatch = true;
+ m.captured << "str";
+ QTest::newRow("hardmatch01") << QRegularExpression("string")
+ << "a str"
+ << 0
+ << QRegularExpression::PartialPreferFirstMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasPartialMatch = true;
+ m.captured << " str";
+ QTest::newRow("hardmatch02") << QRegularExpression("\\bstring\\b")
+ << "a str"
+ << 0
+ << QRegularExpression::PartialPreferFirstMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasPartialMatch = true;
+ m.captured << " str";
+ QTest::newRow("hardmatch03") << QRegularExpression("(\\bstring\\b)")
+ << "a str"
+ << 0
+ << QRegularExpression::PartialPreferFirstMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasPartialMatch = true;
+ m.captured << "8 Dec 19";
+ QTest::newRow("hardmatch04") << QRegularExpression("^(\\d{1,2}) (\\w{3}) (\\d{4})$")
+ << "8 Dec 19"
+ << 0
+ << QRegularExpression::PartialPreferFirstMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasPartialMatch = true;
+ m.captured << "8 Dec 1985";
+ QTest::newRow("hardmatch05") << QRegularExpression("^(\\d{1,2}) (\\w{3}) (\\d{4})$")
+ << "8 Dec 1985"
+ << 0
+ << QRegularExpression::PartialPreferFirstMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasPartialMatch = true;
+ m.captured << "abcdef";
+ QTest::newRow("hardmatch06") << QRegularExpression("abc\\w+X|def")
+ << "abcdef"
+ << 0
+ << QRegularExpression::PartialPreferFirstMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasPartialMatch = true;
+ m.captured << "abcdef";
+ QTest::newRow("hardmatch07") << QRegularExpression("abc\\w+X|defY")
+ << "abcdef"
+ << 0
+ << QRegularExpression::PartialPreferFirstMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasPartialMatch = true;
+ m.captured << "def";
+ QTest::newRow("hardmatch08") << QRegularExpression("abc\\w+X|defY")
+ << "abcdef"
+ << 1
+ << QRegularExpression::PartialPreferFirstMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasPartialMatch = true;
+ m.captured << "ab";
+ QTest::newRow("hardmatch09") << QRegularExpression("abc|ab")
+ << "ab"
+ << 0
+ << QRegularExpression::PartialPreferFirstMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasPartialMatch = true;
+ m.captured << "abc";
+ QTest::newRow("hardmatch10") << QRegularExpression("abc(def)?")
+ << "abc"
+ << 0
+ << QRegularExpression::PartialPreferFirstMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasPartialMatch = true;
+ m.captured << "abc";
+ QTest::newRow("hardmatch11") << QRegularExpression("(abc)*")
+ << "abc"
+ << 0
+ << QRegularExpression::PartialPreferFirstMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+
+ // ***
+
+ m.clear();
+ m.isValid = true;
+ QTest::newRow("nomatch01") << QRegularExpression("abc\\w+X|defY")
+ << "123456"
+ << 0
+ << QRegularExpression::PartialPreferCompleteMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true;
+ QTest::newRow("nomatch02") << QRegularExpression("abc\\w+X|defY")
+ << "123456"
+ << 0
+ << QRegularExpression::PartialPreferFirstMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true;
+ QTest::newRow("nomatch03") << QRegularExpression("abc\\w+X|defY")
+ << "ab123"
+ << 0
+ << QRegularExpression::PartialPreferCompleteMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true;
+ QTest::newRow("nomatch04") << QRegularExpression("abc\\w+X|defY")
+ << "ab123"
+ << 0
+ << QRegularExpression::PartialPreferFirstMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+}
+
+void tst_QRegularExpression::partialMatch()
+{
+ QFETCH(QRegularExpression, regexp);
+ QFETCH(QString, subject);
+ QFETCH(int, offset);
+ QFETCH(QRegularExpression::MatchType, matchType);
+ QFETCH(QRegularExpression::MatchOptions, matchOptions);
+ QFETCH(Match, match);
+
+ QRegularExpressionMatch m = regexp.match(subject, offset, matchType, matchOptions);
+ consistencyCheck(m);
+ QVERIFY(m == match);
+}
+
+void tst_QRegularExpression::globalMatch_data()
+{
+ QTest::addColumn<QRegularExpression>("regexp");
+ QTest::addColumn<QString>("subject");
+ QTest::addColumn<int>("offset");
+ QTest::addColumn<QRegularExpression::MatchType>("matchType");
+ QTest::addColumn<QRegularExpression::MatchOptions>("matchOptions");
+ QTest::addColumn<QList<Match> >("matchList");
+
+ QList<Match> matchList;
+ Match m;
+
+ matchList.clear();
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured = QStringList() << "the";
+ matchList << m;
+ m.captured = QStringList() << "quick";
+ matchList << m;
+ m.captured = QStringList() << "fox";
+ matchList << m;
+ QTest::newRow("globalmatch01") << QRegularExpression("\\w+")
+ << "the quick fox"
+ << 0
+ << QRegularExpression::NormalMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << matchList;
+
+ matchList.clear();
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured = QStringList() << "the" << "t" << "he";
+ matchList << m;
+ m.captured = QStringList() << "quick" << "q" << "uick";
+ matchList << m;
+ m.captured = QStringList() << "fox" << "f" << "ox";
+ matchList << m;
+ QTest::newRow("globalmatch02") << QRegularExpression("(\\w+?)(\\w+)")
+ << "the quick fox"
+ << 0
+ << QRegularExpression::NormalMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << matchList;
+
+ matchList.clear();
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured = QStringList() << "";
+ matchList << m;
+ m.captured = QStringList() << "c";
+ matchList << m;
+ m.captured = QStringList() << "";
+ matchList << m;
+ m.captured = QStringList() << "c";
+ matchList << m;
+ m.captured = QStringList() << "aabb";
+ matchList << m;
+ m.captured = QStringList() << "";
+ matchList << m;
+ m.captured = QStringList() << "";
+ matchList << m;
+
+ QTest::newRow("globalmatch_emptycaptures01") << QRegularExpression("a*b*|c")
+ << "ccaabbd"
+ << 0
+ << QRegularExpression::NormalMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << matchList;
+
+ matchList.clear();
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured = QStringList() << "the";
+ matchList << m;
+ m.captured = QStringList() << "";
+ matchList << m;
+ m.captured = QStringList() << "quick";
+ matchList << m;
+ m.captured = QStringList() << "";
+ matchList << m;
+ m.captured = QStringList() << "fox";
+ matchList << m;
+ m.captured = QStringList() << "";
+ matchList << m;
+
+ QTest::newRow("globalmatch_emptycaptures02") << QRegularExpression(".*")
+ << "the\nquick\nfox"
+ << 0
+ << QRegularExpression::NormalMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << matchList;
+
+ matchList.clear();
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured = QStringList() << "the";
+ matchList << m;
+ m.captured = QStringList() << "";
+ matchList << m;
+ m.captured = QStringList() << "quick";
+ matchList << m;
+ m.captured = QStringList() << "";
+ matchList << m;
+ m.captured = QStringList() << "fox";
+ matchList << m;
+ m.captured = QStringList() << "";
+ matchList << m;
+ m.captured = QStringList() << "";
+ matchList << m;
+
+ QTest::newRow("globalmatch_emptycaptures03") << QRegularExpression(".*")
+ << "the\nquick\nfox\n"
+ << 0
+ << QRegularExpression::NormalMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << matchList;
+
+ matchList.clear();
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured = QStringList() << "the";
+ matchList << m;
+ m.captured = QStringList() << "";
+ matchList << m;
+ m.captured = QStringList() << "quick";
+ matchList << m;
+ m.captured = QStringList() << "";
+ matchList << m;
+ m.captured = QStringList() << "fox";
+ matchList << m;
+ m.captured = QStringList() << "";
+ matchList << m;
+
+ QTest::newRow("globalmatch_emptycaptures04") << QRegularExpression("(*CRLF).*")
+ << "the\r\nquick\r\nfox"
+ << 0
+ << QRegularExpression::NormalMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << matchList;
+
+ matchList.clear();
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured = QStringList() << "the";
+ matchList << m;
+ m.captured = QStringList() << "";
+ matchList << m;
+ m.captured = QStringList() << "quick";
+ matchList << m;
+ m.captured = QStringList() << "";
+ matchList << m;
+ m.captured = QStringList() << "fox";
+ matchList << m;
+ m.captured = QStringList() << "";
+ matchList << m;
+ m.captured = QStringList() << "";
+ matchList << m;
+
+ QTest::newRow("globalmatch_emptycaptures05") << QRegularExpression("(*CRLF).*")
+ << "the\r\nquick\r\nfox\r\n"
+ << 0
+ << QRegularExpression::NormalMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << matchList;
+
+ matchList.clear();
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured = QStringList() << "the";
+ matchList << m;
+ m.captured = QStringList() << "";
+ matchList << m;
+ m.captured = QStringList() << "quick";
+ matchList << m;
+ m.captured = QStringList() << "";
+ matchList << m;
+ m.captured = QStringList() << "fox";
+ matchList << m;
+ m.captured = QStringList() << "";
+ matchList << m;
+ m.captured = QStringList() << "jumped";
+ matchList << m;
+ m.captured = QStringList() << "";
+ matchList << m;
+
+ QTest::newRow("globalmatch_emptycaptures06") << QRegularExpression("(*ANYCRLF).*")
+ << "the\r\nquick\nfox\rjumped"
+ << 0
+ << QRegularExpression::NormalMatch
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << matchList;
+}
+
+void tst_QRegularExpression::globalMatch()
+{
+ QFETCH(QRegularExpression, regexp);
+ QFETCH(QString, subject);
+ QFETCH(int, offset);
+ QFETCH(QRegularExpression::MatchType, matchType);
+ QFETCH(QRegularExpression::MatchOptions, matchOptions);
+ QFETCH(QList<Match>, matchList);
+
+ QRegularExpressionMatchIterator iterator = regexp.globalMatch(subject, offset, matchType, matchOptions);
+ consistencyCheck(iterator);
+ QVERIFY(iterator == matchList);
+}
+
+void tst_QRegularExpression::serialize_data()
+{
+ provideRegularExpressions();
+}
+
+void tst_QRegularExpression::serialize()
+{
+ QFETCH(QString, pattern);
+ QFETCH(QRegularExpression::PatternOptions, patternOptions);
+ QRegularExpression outRe(pattern, patternOptions);
+ QByteArray buffer;
+ {
+ QDataStream out(&buffer, QIODevice::WriteOnly);
+ out << outRe;
+ }
+ QRegularExpression inRe;
+ {
+ QDataStream in(&buffer, QIODevice::ReadOnly);
+ in >> inRe;
+ }
+ QCOMPARE(inRe, outRe);
+}
+
+QTEST_APPLESS_MAIN(tst_QRegularExpression)
+
+#include "tst_qregularexpression.moc"
+
diff --git a/tests/auto/corelib/tools/tools.pro b/tests/auto/corelib/tools/tools.pro
index 89bb3bc416..d8961559e5 100644
--- a/tests/auto/corelib/tools/tools.pro
+++ b/tests/auto/corelib/tools/tools.pro
@@ -25,6 +25,7 @@ SUBDIRS=\
qqueue \
qrect \
qregexp \
+ qregularexpression \
qringbuffer \
qscopedpointer \
qscopedvaluerollback \