summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/corelib/doc/snippets/code/src_corelib_text_qregularexpression.cpp12
-rw-r--r--src/corelib/text/qregularexpression.cpp71
-rw-r--r--src/corelib/text/qregularexpression.h7
-rw-r--r--tests/auto/corelib/text/qregularexpression/tst_qregularexpression.cpp238
4 files changed, 245 insertions, 83 deletions
diff --git a/src/corelib/doc/snippets/code/src_corelib_text_qregularexpression.cpp b/src/corelib/doc/snippets/code/src_corelib_text_qregularexpression.cpp
index 8bf67a9e2d..3bb9727ca6 100644
--- a/src/corelib/doc/snippets/code/src_corelib_text_qregularexpression.cpp
+++ b/src/corelib/doc/snippets/code/src_corelib_text_qregularexpression.cpp
@@ -374,4 +374,16 @@ QRegularExpression re(R"(\d\d \w+)");
//! [35]
}
+{
+//! [36]
+QRegularExpression re("([a-z]+)|([A-Z]+)");
+QRegularExpressionMatch m = re.match("UPPERCASE");
+if (m.hasMatch()) {
+ qDebug() << m.hasCaptured(0); // true
+ qDebug() << m.hasCaptured(1); // false
+ qDebug() << m.hasCaptured(2); // true
+}
+//! [36]
+}
+
}
diff --git a/src/corelib/text/qregularexpression.cpp b/src/corelib/text/qregularexpression.cpp
index a4bb19d0b2..7a425254ab 100644
--- a/src/corelib/text/qregularexpression.cpp
+++ b/src/corelib/text/qregularexpression.cpp
@@ -1259,6 +1259,12 @@ void QRegularExpressionPrivate::doMatch(QRegularExpressionMatchPrivate *priv,
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_16(matchData);
qsizetype *const capturedOffsets = priv->capturedOffsets.data();
+ // We rely on the fact that capturing groups that did not
+ // capture anything have offset -1, but PCRE technically
+ // returns "PCRE2_UNSET". Test that out, better safe than
+ // sorry...
+ static_assert(qsizetype(PCRE2_UNSET) == qsizetype(-1), "Internal error: PCRE2 changed its API");
+
for (int i = 0; i < priv->capturedCount * 2; ++i)
capturedOffsets[i] = qsizetype(ovector[i]);
@@ -2178,7 +2184,7 @@ QRegularExpression::MatchOptions QRegularExpressionMatch::matchOptions() const
If the regular expression did not match, this function returns -1.
- \sa captured(), capturedStart(), capturedEnd(), capturedLength()
+ \sa hasCaptured(), captured(), capturedStart(), capturedEnd(), capturedLength()
*/
int QRegularExpressionMatch::lastCapturedIndex() const
{
@@ -2186,6 +2192,63 @@ int QRegularExpressionMatch::lastCapturedIndex() const
}
/*!
+ \fn bool QRegularExpressionMatch::hasCaptured(const QString &name) const
+ \fn bool QRegularExpressionMatch::hasCaptured(QStringView name) const
+ \since 6.3
+
+ Returns true if the capturing group named \a name captured something
+ in the subject string, and false otherwise (or if there is no
+ capturing group called \a name).
+
+ \note Some capturing groups in a regular expression may not have
+ captured anything even if the regular expression matched. This may
+ happen, for instance, if a conditional operator is used in the
+ pattern:
+
+ \snippet code/src_corelib_text_qregularexpression.cpp 36
+
+ Similarly, a capturing group may capture a substring of length 0;
+ this function will return \c{true} for such a capturing group.
+
+ \sa captured(), hasMatch()
+*/
+bool QRegularExpressionMatch::hasCaptured(QStringView name) const
+{
+ const int nth = d->regularExpression.d->captureIndexForName(name);
+ return hasCaptured(nth);
+}
+
+/*!
+ \since 6.3
+
+ Returns true if the \a nth capturing group captured something
+ in the subject string, and false otherwise (or if there is no
+ such capturing group).
+
+ \note The implicit capturing group number 0 captures the substring
+ matched by the entire pattern.
+
+ \note Some capturing groups in a regular expression may not have
+ captured anything even if the regular expression matched. This may
+ happen, for instance, if a conditional operator is used in the
+ pattern:
+
+ \snippet code/src_corelib_text_qregularexpression.cpp 36
+
+ Similarly, a capturing group may capture a substring of length 0;
+ this function will return \c{true} for such a capturing group.
+
+ \sa captured(), lastCapturedIndex(), hasMatch()
+*/
+bool QRegularExpressionMatch::hasCaptured(int nth) const
+{
+ if (nth < 0 || nth > lastCapturedIndex())
+ return false;
+
+ return d->capturedOffsets.at(nth * 2) != -1;
+}
+
+/*!
Returns the substring captured by the \a nth capturing group.
If the \a nth capturing group did not capture a string, or if there is no
@@ -2218,7 +2281,7 @@ QString QRegularExpressionMatch::captured(int nth) const
*/
QStringView QRegularExpressionMatch::capturedView(int nth) const
{
- if (nth < 0 || nth > lastCapturedIndex())
+ if (!hasCaptured(nth))
return QStringView();
qsizetype start = capturedStart(nth);
@@ -2312,7 +2375,7 @@ QStringList QRegularExpressionMatch::capturedTexts() const
*/
qsizetype QRegularExpressionMatch::capturedStart(int nth) const
{
- if (nth < 0 || nth > lastCapturedIndex())
+ if (!hasCaptured(nth))
return -1;
return d->capturedOffsets.at(nth * 2);
@@ -2341,7 +2404,7 @@ qsizetype QRegularExpressionMatch::capturedLength(int nth) const
*/
qsizetype QRegularExpressionMatch::capturedEnd(int nth) const
{
- if (nth < 0 || nth > lastCapturedIndex())
+ if (!hasCaptured(nth))
return -1;
return d->capturedOffsets.at(nth * 2 + 1);
diff --git a/src/corelib/text/qregularexpression.h b/src/corelib/text/qregularexpression.h
index 570679800c..2d81d0c254 100644
--- a/src/corelib/text/qregularexpression.h
+++ b/src/corelib/text/qregularexpression.h
@@ -233,6 +233,13 @@ public:
int lastCapturedIndex() const;
+#if QT_STRINGVIEW_LEVEL < 2
+ bool hasCaptured(const QString &name) const
+ { return hasCaptured(QStringView(name)); }
+#endif
+ bool hasCaptured(QStringView name) const;
+ bool hasCaptured(int nth) const;
+
QString captured(int nth = 0) const;
QStringView capturedView(int nth = 0) const;
diff --git a/tests/auto/corelib/text/qregularexpression/tst_qregularexpression.cpp b/tests/auto/corelib/text/qregularexpression/tst_qregularexpression.cpp
index 48d4de0aa4..684727758c 100644
--- a/tests/auto/corelib/text/qregularexpression/tst_qregularexpression.cpp
+++ b/tests/auto/corelib/text/qregularexpression/tst_qregularexpression.cpp
@@ -37,6 +37,8 @@
#include <qregularexpression.h>
#include <qthread.h>
+#include <optional>
+
Q_DECLARE_METATYPE(QRegularExpression::PatternOptions)
Q_DECLARE_METATYPE(QRegularExpression::MatchType)
Q_DECLARE_METATYPE(QRegularExpression::MatchOptions)
@@ -92,6 +94,8 @@ private:
void provideRegularExpressions();
};
+using CapturedList = QVector<std::optional<QString>>;
+
struct Match
{
Match()
@@ -111,8 +115,8 @@ struct Match
bool isValid;
bool hasMatch;
bool hasPartialMatch;
- QStringList captured;
- QHash<QString, QString> namedCaptured;
+ CapturedList captured;
+ QHash<QString, std::optional<QString>> namedCaptured;
};
QT_BEGIN_NAMESPACE
Q_DECLARE_TYPEINFO(Match, Q_RELOCATABLE_TYPE);
@@ -129,27 +133,53 @@ bool operator==(const QRegularExpressionMatch &rem, const Match &m)
if ((rem.hasMatch() != m.hasMatch) || (rem.hasPartialMatch() != m.hasPartialMatch))
return false;
if (rem.hasMatch() || rem.hasPartialMatch()) {
+ if (!rem.hasCaptured(0))
+ return false;
if (rem.lastCapturedIndex() != (m.captured.size() - 1))
return false;
for (int i = 0; i <= rem.lastCapturedIndex(); ++i) {
+ auto mMaybeCaptured = m.captured.at(i);
QString remCaptured = rem.captured(i);
- QString mCaptured = m.captured.at(i);
- if (remCaptured != mCaptured
- || remCaptured.isNull() != mCaptured.isNull()
- || remCaptured.isEmpty() != mCaptured.isEmpty()) {
- return false;
+ if (!mMaybeCaptured) {
+ if (rem.hasCaptured(i))
+ return false;
+ if (!remCaptured.isNull())
+ return false;
+ } else {
+ if (!rem.hasCaptured(i))
+ return false;
+ QString mCaptured = *mMaybeCaptured;
+ if (remCaptured != mCaptured
+ || remCaptured.isNull() != mCaptured.isNull()
+ || remCaptured.isEmpty() != mCaptured.isEmpty()) {
+ return false;
+ }
}
}
for (auto it = m.namedCaptured.begin(), end = m.namedCaptured.end(); it != end; ++it) {
- const QString remCaptured = rem.captured(it.key());
- const QString mCaptured = it.value();
- if (remCaptured != mCaptured
- || remCaptured.isNull() != mCaptured.isNull()
- || remCaptured.isEmpty() != mCaptured.isEmpty()) {
- return false;
+ const QString capturedGroupName = it.key();
+ const QString remCaptured = rem.captured(capturedGroupName);
+ const auto mMaybeCaptured = it.value();
+ if (!mMaybeCaptured) {
+ if (rem.hasCaptured(capturedGroupName))
+ return false;
+ if (!remCaptured.isNull())
+ return false;
+ } else {
+ if (!rem.hasCaptured(capturedGroupName))
+ return false;
+ const auto mCaptured = *mMaybeCaptured;
+ if (remCaptured != mCaptured
+ || remCaptured.isNull() != mCaptured.isNull()
+ || remCaptured.isEmpty() != mCaptured.isEmpty()) {
+ return false;
+ }
}
}
+ } else {
+ if (rem.hasCaptured(0))
+ return false;
}
return true;
@@ -803,7 +833,7 @@ void tst_QRegularExpression::normalMatch_data()
m.clear();
m.isValid = true; m.hasMatch = true;
- m.captured << " string" << QString() << "string";
+ m.captured << " string" << std::nullopt << "string";
QTest::newRow("match04") << QRegularExpression("(\\w+)? (\\w+)")
<< " string"
<< qsizetype(0)
@@ -887,9 +917,9 @@ void tst_QRegularExpression::normalMatch_data()
m.captured << "a string" << "a" << "string";
m.namedCaptured["article"] = "a";
m.namedCaptured["noun"] = "string";
- m.namedCaptured["nonexisting1"] = QString();
- m.namedCaptured["nonexisting2"] = QString();
- m.namedCaptured["nonexisting3"] = QString();
+ m.namedCaptured["nonexisting1"] = std::nullopt;
+ m.namedCaptured["nonexisting2"] = std::nullopt;
+ m.namedCaptured["nonexisting3"] = std::nullopt;
QTest::newRow("match10") << QRegularExpression("(?<article>\\w+) (?<noun>\\w+)")
<< "a string"
<< qsizetype(0)
@@ -900,7 +930,7 @@ void tst_QRegularExpression::normalMatch_data()
m.isValid = true; m.hasMatch = true;
m.captured << "" << "";
m.namedCaptured["digits"] = ""; // empty VS null
- m.namedCaptured["nonexisting"] = QString();
+ m.namedCaptured["nonexisting"] = std::nullopt;
QTest::newRow("match11") << QRegularExpression("(?<digits>\\d*)")
<< "abcde"
<< qsizetype(0)
@@ -922,6 +952,56 @@ void tst_QRegularExpression::normalMatch_data()
// ***
m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured << QString() << QString();
+ QTest::newRow("capture-in-null-string")
+ << QRegularExpression("(a*)")
+ << QString()
+ << qsizetype(0)
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured << QString() << QString() << QString();
+ QTest::newRow("capture-in-null-string-2")
+ << QRegularExpression("(a*)(b*)")
+ << QString()
+ << qsizetype(0)
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured << QString();
+ QTest::newRow("no-capture-in-null-string")
+ << QRegularExpression("(a+)?")
+ << QString()
+ << qsizetype(0)
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured << "bb" << QString("") << "bb";
+ QTest::newRow("empty-capture-in-non-null-string")
+ << QRegularExpression("(a*)(b*)")
+ << QString("bbc")
+ << qsizetype(0)
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
+ m.isValid = true; m.hasMatch = true;
+ m.captured << "bb" << std::nullopt << "bb";
+ QTest::newRow("no-capture-in-non-null-string")
+ << QRegularExpression("(a+)?(b+)?")
+ << QString("bbc")
+ << qsizetype(0)
+ << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption)
+ << m;
+
+ m.clear();
m.isValid = true;
QTest::newRow("nomatch01") << QRegularExpression("\\d+")
<< "a string"
@@ -1316,11 +1396,11 @@ void tst_QRegularExpression::globalMatch_data()
matchList.clear();
m.clear();
m.isValid = true; m.hasMatch = true;
- m.captured = QStringList() << "the";
+ m.captured = CapturedList() << "the";
matchList << m;
- m.captured = QStringList() << "quick";
+ m.captured = CapturedList() << "quick";
matchList << m;
- m.captured = QStringList() << "fox";
+ m.captured = CapturedList() << "fox";
matchList << m;
QTest::newRow("globalmatch01") << QRegularExpression("\\w+")
<< "the quick fox"
@@ -1332,11 +1412,11 @@ void tst_QRegularExpression::globalMatch_data()
matchList.clear();
m.clear();
m.isValid = true; m.hasMatch = true;
- m.captured = QStringList() << "the" << "t" << "he";
+ m.captured = CapturedList() << "the" << "t" << "he";
matchList << m;
- m.captured = QStringList() << "quick" << "q" << "uick";
+ m.captured = CapturedList() << "quick" << "q" << "uick";
matchList << m;
- m.captured = QStringList() << "fox" << "f" << "ox";
+ m.captured = CapturedList() << "fox" << "f" << "ox";
matchList << m;
QTest::newRow("globalmatch02") << QRegularExpression("(\\w+?)(\\w+)")
<< "the quick fox"
@@ -1348,13 +1428,13 @@ void tst_QRegularExpression::globalMatch_data()
matchList.clear();
m.clear();
m.isValid = true; m.hasMatch = true;
- m.captured = QStringList() << "ACA""GTG""CGA""AAA";
+ m.captured = CapturedList() << "ACA""GTG""CGA""AAA";
matchList << m;
- m.captured = QStringList() << "AAA";
+ m.captured = CapturedList() << "AAA";
matchList << m;
- m.captured = QStringList() << "AAG""GAA""AAG""AAA";
+ m.captured = CapturedList() << "AAG""GAA""AAG""AAA";
matchList << m;
- m.captured = QStringList() << "AAA";
+ m.captured = CapturedList() << "AAA";
matchList << m;
QTest::newRow("globalmatch03") << QRegularExpression("\\G(?:\\w\\w\\w)*?AAA")
<< "ACA""GTG""CGA""AAA""AAA""AAG""GAA""AAG""AAA""AAA"
@@ -1373,19 +1453,19 @@ void tst_QRegularExpression::globalMatch_data()
matchList.clear();
m.clear();
m.isValid = true; m.hasMatch = true;
- m.captured = QStringList() << "";
+ m.captured = CapturedList() << "";
matchList << m;
- m.captured = QStringList() << "c";
+ m.captured = CapturedList() << "c";
matchList << m;
- m.captured = QStringList() << "";
+ m.captured = CapturedList() << "";
matchList << m;
- m.captured = QStringList() << "c";
+ m.captured = CapturedList() << "c";
matchList << m;
- m.captured = QStringList() << "aabb";
+ m.captured = CapturedList() << "aabb";
matchList << m;
- m.captured = QStringList() << "";
+ m.captured = CapturedList() << "";
matchList << m;
- m.captured = QStringList() << "";
+ m.captured = CapturedList() << "";
matchList << m;
QTest::newRow("globalmatch_emptycaptures01") << QRegularExpression("a*b*|c")
@@ -1398,17 +1478,17 @@ void tst_QRegularExpression::globalMatch_data()
matchList.clear();
m.clear();
m.isValid = true; m.hasMatch = true;
- m.captured = QStringList() << "the";
+ m.captured = CapturedList() << "the";
matchList << m;
- m.captured = QStringList() << "";
+ m.captured = CapturedList() << "";
matchList << m;
- m.captured = QStringList() << "quick";
+ m.captured = CapturedList() << "quick";
matchList << m;
- m.captured = QStringList() << "";
+ m.captured = CapturedList() << "";
matchList << m;
- m.captured = QStringList() << "fox";
+ m.captured = CapturedList() << "fox";
matchList << m;
- m.captured = QStringList() << "";
+ m.captured = CapturedList() << "";
matchList << m;
QTest::newRow("globalmatch_emptycaptures02") << QRegularExpression(".*")
@@ -1421,19 +1501,19 @@ void tst_QRegularExpression::globalMatch_data()
matchList.clear();
m.clear();
m.isValid = true; m.hasMatch = true;
- m.captured = QStringList() << "the";
+ m.captured = CapturedList() << "the";
matchList << m;
- m.captured = QStringList() << "";
+ m.captured = CapturedList() << "";
matchList << m;
- m.captured = QStringList() << "quick";
+ m.captured = CapturedList() << "quick";
matchList << m;
- m.captured = QStringList() << "";
+ m.captured = CapturedList() << "";
matchList << m;
- m.captured = QStringList() << "fox";
+ m.captured = CapturedList() << "fox";
matchList << m;
- m.captured = QStringList() << "";
+ m.captured = CapturedList() << "";
matchList << m;
- m.captured = QStringList() << "";
+ m.captured = CapturedList() << "";
matchList << m;
QTest::newRow("globalmatch_emptycaptures03") << QRegularExpression(".*")
@@ -1446,17 +1526,17 @@ void tst_QRegularExpression::globalMatch_data()
matchList.clear();
m.clear();
m.isValid = true; m.hasMatch = true;
- m.captured = QStringList() << "the";
+ m.captured = CapturedList() << "the";
matchList << m;
- m.captured = QStringList() << "";
+ m.captured = CapturedList() << "";
matchList << m;
- m.captured = QStringList() << "quick";
+ m.captured = CapturedList() << "quick";
matchList << m;
- m.captured = QStringList() << "";
+ m.captured = CapturedList() << "";
matchList << m;
- m.captured = QStringList() << "fox";
+ m.captured = CapturedList() << "fox";
matchList << m;
- m.captured = QStringList() << "";
+ m.captured = CapturedList() << "";
matchList << m;
QTest::newRow("globalmatch_emptycaptures04") << QRegularExpression("(*CRLF).*")
@@ -1469,19 +1549,19 @@ void tst_QRegularExpression::globalMatch_data()
matchList.clear();
m.clear();
m.isValid = true; m.hasMatch = true;
- m.captured = QStringList() << "the";
+ m.captured = CapturedList() << "the";
matchList << m;
- m.captured = QStringList() << "";
+ m.captured = CapturedList() << "";
matchList << m;
- m.captured = QStringList() << "quick";
+ m.captured = CapturedList() << "quick";
matchList << m;
- m.captured = QStringList() << "";
+ m.captured = CapturedList() << "";
matchList << m;
- m.captured = QStringList() << "fox";
+ m.captured = CapturedList() << "fox";
matchList << m;
- m.captured = QStringList() << "";
+ m.captured = CapturedList() << "";
matchList << m;
- m.captured = QStringList() << "";
+ m.captured = CapturedList() << "";
matchList << m;
QTest::newRow("globalmatch_emptycaptures05") << QRegularExpression("(*CRLF).*")
@@ -1494,21 +1574,21 @@ void tst_QRegularExpression::globalMatch_data()
matchList.clear();
m.clear();
m.isValid = true; m.hasMatch = true;
- m.captured = QStringList() << "the";
+ m.captured = CapturedList() << "the";
matchList << m;
- m.captured = QStringList() << "";
+ m.captured = CapturedList() << "";
matchList << m;
- m.captured = QStringList() << "quick";
+ m.captured = CapturedList() << "quick";
matchList << m;
- m.captured = QStringList() << "";
+ m.captured = CapturedList() << "";
matchList << m;
- m.captured = QStringList() << "fox";
+ m.captured = CapturedList() << "fox";
matchList << m;
- m.captured = QStringList() << "";
+ m.captured = CapturedList() << "";
matchList << m;
- m.captured = QStringList() << "jumped";
+ m.captured = CapturedList() << "jumped";
matchList << m;
- m.captured = QStringList() << "";
+ m.captured = CapturedList() << "";
matchList << m;
QTest::newRow("globalmatch_emptycaptures06") << QRegularExpression("(*ANYCRLF).*")
@@ -1521,17 +1601,17 @@ void tst_QRegularExpression::globalMatch_data()
matchList.clear();
m.clear();
m.isValid = true; m.hasMatch = true;
- m.captured = QStringList() << "ABC";
+ m.captured = CapturedList() << "ABC";
matchList << m;
- m.captured = QStringList() << "";
+ m.captured = CapturedList() << "";
matchList << m;
- m.captured = QStringList() << "DEF";
+ m.captured = CapturedList() << "DEF";
matchList << m;
- m.captured = QStringList() << "";
+ m.captured = CapturedList() << "";
matchList << m;
- m.captured = QStringList() << "GHI";
+ m.captured = CapturedList() << "GHI";
matchList << m;
- m.captured = QStringList() << "";
+ m.captured = CapturedList() << "";
matchList << m;
QTest::newRow("globalmatch_emptycaptures07") << QRegularExpression("[\\x{0000}-\\x{FFFF}]*")
<< QString::fromUtf8("ABC""\xf0\x9d\x85\x9d""DEF""\xf0\x9d\x85\x9e""GHI")
@@ -1543,13 +1623,13 @@ void tst_QRegularExpression::globalMatch_data()
matchList.clear();
m.clear();
m.isValid = true; m.hasMatch = true;
- m.captured = QStringList() << QString::fromUtf8("ABC""\xc3\x80");
+ m.captured = CapturedList() << QString::fromUtf8("ABC""\xc3\x80");
matchList << m;
- m.captured = QStringList() << "";
+ m.captured = CapturedList() << "";
matchList << m;
- m.captured = QStringList() << QString::fromUtf8("\xc3\x80""DEF""\xc3\x80");
+ m.captured = CapturedList() << QString::fromUtf8("\xc3\x80""DEF""\xc3\x80");
matchList << m;
- m.captured = QStringList() << "";
+ m.captured = CapturedList() << "";
matchList << m;
QTest::newRow("globalmatch_emptycaptures08") << QRegularExpression("[\\x{0000}-\\x{FFFF}]*")
<< QString::fromUtf8("ABC""\xc3\x80""\xf0\x9d\x85\x9d""\xc3\x80""DEF""\xc3\x80")