diff options
4 files changed, 245 insertions, 83 deletions
diff --git a/src/corelib/doc/snippets/code/src_corelib_text_qregularexpression.cpp b/src/corelib/doc/snippets/code/src_corelib_text_qregularexpression.cpp index 8bf67a9e2d..3bb9727ca6 100644 --- a/src/corelib/doc/snippets/code/src_corelib_text_qregularexpression.cpp +++ b/src/corelib/doc/snippets/code/src_corelib_text_qregularexpression.cpp @@ -374,4 +374,16 @@ QRegularExpression re(R"(\d\d \w+)"); //! [35] } +{ +//! [36] +QRegularExpression re("([a-z]+)|([A-Z]+)"); +QRegularExpressionMatch m = re.match("UPPERCASE"); +if (m.hasMatch()) { + qDebug() << m.hasCaptured(0); // true + qDebug() << m.hasCaptured(1); // false + qDebug() << m.hasCaptured(2); // true +} +//! [36] +} + } diff --git a/src/corelib/text/qregularexpression.cpp b/src/corelib/text/qregularexpression.cpp index a4bb19d0b2..7a425254ab 100644 --- a/src/corelib/text/qregularexpression.cpp +++ b/src/corelib/text/qregularexpression.cpp @@ -1259,6 +1259,12 @@ void QRegularExpressionPrivate::doMatch(QRegularExpressionMatchPrivate *priv, PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_16(matchData); qsizetype *const capturedOffsets = priv->capturedOffsets.data(); + // We rely on the fact that capturing groups that did not + // capture anything have offset -1, but PCRE technically + // returns "PCRE2_UNSET". Test that out, better safe than + // sorry... + static_assert(qsizetype(PCRE2_UNSET) == qsizetype(-1), "Internal error: PCRE2 changed its API"); + for (int i = 0; i < priv->capturedCount * 2; ++i) capturedOffsets[i] = qsizetype(ovector[i]); @@ -2178,7 +2184,7 @@ QRegularExpression::MatchOptions QRegularExpressionMatch::matchOptions() const If the regular expression did not match, this function returns -1. - \sa captured(), capturedStart(), capturedEnd(), capturedLength() + \sa hasCaptured(), captured(), capturedStart(), capturedEnd(), capturedLength() */ int QRegularExpressionMatch::lastCapturedIndex() const { @@ -2186,6 +2192,63 @@ int QRegularExpressionMatch::lastCapturedIndex() const } /*! + \fn bool QRegularExpressionMatch::hasCaptured(const QString &name) const + \fn bool QRegularExpressionMatch::hasCaptured(QStringView name) const + \since 6.3 + + Returns true if the capturing group named \a name captured something + in the subject string, and false otherwise (or if there is no + capturing group called \a name). + + \note Some capturing groups in a regular expression may not have + captured anything even if the regular expression matched. This may + happen, for instance, if a conditional operator is used in the + pattern: + + \snippet code/src_corelib_text_qregularexpression.cpp 36 + + Similarly, a capturing group may capture a substring of length 0; + this function will return \c{true} for such a capturing group. + + \sa captured(), hasMatch() +*/ +bool QRegularExpressionMatch::hasCaptured(QStringView name) const +{ + const int nth = d->regularExpression.d->captureIndexForName(name); + return hasCaptured(nth); +} + +/*! + \since 6.3 + + Returns true if the \a nth capturing group captured something + in the subject string, and false otherwise (or if there is no + such capturing group). + + \note The implicit capturing group number 0 captures the substring + matched by the entire pattern. + + \note Some capturing groups in a regular expression may not have + captured anything even if the regular expression matched. This may + happen, for instance, if a conditional operator is used in the + pattern: + + \snippet code/src_corelib_text_qregularexpression.cpp 36 + + Similarly, a capturing group may capture a substring of length 0; + this function will return \c{true} for such a capturing group. + + \sa captured(), lastCapturedIndex(), hasMatch() +*/ +bool QRegularExpressionMatch::hasCaptured(int nth) const +{ + if (nth < 0 || nth > lastCapturedIndex()) + return false; + + return d->capturedOffsets.at(nth * 2) != -1; +} + +/*! Returns the substring captured by the \a nth capturing group. If the \a nth capturing group did not capture a string, or if there is no @@ -2218,7 +2281,7 @@ QString QRegularExpressionMatch::captured(int nth) const */ QStringView QRegularExpressionMatch::capturedView(int nth) const { - if (nth < 0 || nth > lastCapturedIndex()) + if (!hasCaptured(nth)) return QStringView(); qsizetype start = capturedStart(nth); @@ -2312,7 +2375,7 @@ QStringList QRegularExpressionMatch::capturedTexts() const */ qsizetype QRegularExpressionMatch::capturedStart(int nth) const { - if (nth < 0 || nth > lastCapturedIndex()) + if (!hasCaptured(nth)) return -1; return d->capturedOffsets.at(nth * 2); @@ -2341,7 +2404,7 @@ qsizetype QRegularExpressionMatch::capturedLength(int nth) const */ qsizetype QRegularExpressionMatch::capturedEnd(int nth) const { - if (nth < 0 || nth > lastCapturedIndex()) + if (!hasCaptured(nth)) return -1; return d->capturedOffsets.at(nth * 2 + 1); diff --git a/src/corelib/text/qregularexpression.h b/src/corelib/text/qregularexpression.h index 570679800c..2d81d0c254 100644 --- a/src/corelib/text/qregularexpression.h +++ b/src/corelib/text/qregularexpression.h @@ -233,6 +233,13 @@ public: int lastCapturedIndex() const; +#if QT_STRINGVIEW_LEVEL < 2 + bool hasCaptured(const QString &name) const + { return hasCaptured(QStringView(name)); } +#endif + bool hasCaptured(QStringView name) const; + bool hasCaptured(int nth) const; + QString captured(int nth = 0) const; QStringView capturedView(int nth = 0) const; diff --git a/tests/auto/corelib/text/qregularexpression/tst_qregularexpression.cpp b/tests/auto/corelib/text/qregularexpression/tst_qregularexpression.cpp index 48d4de0aa4..684727758c 100644 --- a/tests/auto/corelib/text/qregularexpression/tst_qregularexpression.cpp +++ b/tests/auto/corelib/text/qregularexpression/tst_qregularexpression.cpp @@ -37,6 +37,8 @@ #include <qregularexpression.h> #include <qthread.h> +#include <optional> + Q_DECLARE_METATYPE(QRegularExpression::PatternOptions) Q_DECLARE_METATYPE(QRegularExpression::MatchType) Q_DECLARE_METATYPE(QRegularExpression::MatchOptions) @@ -92,6 +94,8 @@ private: void provideRegularExpressions(); }; +using CapturedList = QVector<std::optional<QString>>; + struct Match { Match() @@ -111,8 +115,8 @@ struct Match bool isValid; bool hasMatch; bool hasPartialMatch; - QStringList captured; - QHash<QString, QString> namedCaptured; + CapturedList captured; + QHash<QString, std::optional<QString>> namedCaptured; }; QT_BEGIN_NAMESPACE Q_DECLARE_TYPEINFO(Match, Q_RELOCATABLE_TYPE); @@ -129,27 +133,53 @@ bool operator==(const QRegularExpressionMatch &rem, const Match &m) if ((rem.hasMatch() != m.hasMatch) || (rem.hasPartialMatch() != m.hasPartialMatch)) return false; if (rem.hasMatch() || rem.hasPartialMatch()) { + if (!rem.hasCaptured(0)) + return false; if (rem.lastCapturedIndex() != (m.captured.size() - 1)) return false; for (int i = 0; i <= rem.lastCapturedIndex(); ++i) { + auto mMaybeCaptured = m.captured.at(i); QString remCaptured = rem.captured(i); - QString mCaptured = m.captured.at(i); - if (remCaptured != mCaptured - || remCaptured.isNull() != mCaptured.isNull() - || remCaptured.isEmpty() != mCaptured.isEmpty()) { - return false; + if (!mMaybeCaptured) { + if (rem.hasCaptured(i)) + return false; + if (!remCaptured.isNull()) + return false; + } else { + if (!rem.hasCaptured(i)) + return false; + QString mCaptured = *mMaybeCaptured; + if (remCaptured != mCaptured + || remCaptured.isNull() != mCaptured.isNull() + || remCaptured.isEmpty() != mCaptured.isEmpty()) { + return false; + } } } for (auto it = m.namedCaptured.begin(), end = m.namedCaptured.end(); it != end; ++it) { - const QString remCaptured = rem.captured(it.key()); - const QString mCaptured = it.value(); - if (remCaptured != mCaptured - || remCaptured.isNull() != mCaptured.isNull() - || remCaptured.isEmpty() != mCaptured.isEmpty()) { - return false; + const QString capturedGroupName = it.key(); + const QString remCaptured = rem.captured(capturedGroupName); + const auto mMaybeCaptured = it.value(); + if (!mMaybeCaptured) { + if (rem.hasCaptured(capturedGroupName)) + return false; + if (!remCaptured.isNull()) + return false; + } else { + if (!rem.hasCaptured(capturedGroupName)) + return false; + const auto mCaptured = *mMaybeCaptured; + if (remCaptured != mCaptured + || remCaptured.isNull() != mCaptured.isNull() + || remCaptured.isEmpty() != mCaptured.isEmpty()) { + return false; + } } } + } else { + if (rem.hasCaptured(0)) + return false; } return true; @@ -803,7 +833,7 @@ void tst_QRegularExpression::normalMatch_data() m.clear(); m.isValid = true; m.hasMatch = true; - m.captured << " string" << QString() << "string"; + m.captured << " string" << std::nullopt << "string"; QTest::newRow("match04") << QRegularExpression("(\\w+)? (\\w+)") << " string" << qsizetype(0) @@ -887,9 +917,9 @@ void tst_QRegularExpression::normalMatch_data() m.captured << "a string" << "a" << "string"; m.namedCaptured["article"] = "a"; m.namedCaptured["noun"] = "string"; - m.namedCaptured["nonexisting1"] = QString(); - m.namedCaptured["nonexisting2"] = QString(); - m.namedCaptured["nonexisting3"] = QString(); + m.namedCaptured["nonexisting1"] = std::nullopt; + m.namedCaptured["nonexisting2"] = std::nullopt; + m.namedCaptured["nonexisting3"] = std::nullopt; QTest::newRow("match10") << QRegularExpression("(?<article>\\w+) (?<noun>\\w+)") << "a string" << qsizetype(0) @@ -900,7 +930,7 @@ void tst_QRegularExpression::normalMatch_data() m.isValid = true; m.hasMatch = true; m.captured << "" << ""; m.namedCaptured["digits"] = ""; // empty VS null - m.namedCaptured["nonexisting"] = QString(); + m.namedCaptured["nonexisting"] = std::nullopt; QTest::newRow("match11") << QRegularExpression("(?<digits>\\d*)") << "abcde" << qsizetype(0) @@ -922,6 +952,56 @@ void tst_QRegularExpression::normalMatch_data() // *** m.clear(); + m.isValid = true; m.hasMatch = true; + m.captured << QString() << QString(); + QTest::newRow("capture-in-null-string") + << QRegularExpression("(a*)") + << QString() + << qsizetype(0) + << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption) + << m; + + m.clear(); + m.isValid = true; m.hasMatch = true; + m.captured << QString() << QString() << QString(); + QTest::newRow("capture-in-null-string-2") + << QRegularExpression("(a*)(b*)") + << QString() + << qsizetype(0) + << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption) + << m; + + m.clear(); + m.isValid = true; m.hasMatch = true; + m.captured << QString(); + QTest::newRow("no-capture-in-null-string") + << QRegularExpression("(a+)?") + << QString() + << qsizetype(0) + << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption) + << m; + + m.clear(); + m.isValid = true; m.hasMatch = true; + m.captured << "bb" << QString("") << "bb"; + QTest::newRow("empty-capture-in-non-null-string") + << QRegularExpression("(a*)(b*)") + << QString("bbc") + << qsizetype(0) + << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption) + << m; + + m.clear(); + m.isValid = true; m.hasMatch = true; + m.captured << "bb" << std::nullopt << "bb"; + QTest::newRow("no-capture-in-non-null-string") + << QRegularExpression("(a+)?(b+)?") + << QString("bbc") + << qsizetype(0) + << QRegularExpression::MatchOptions(QRegularExpression::NoMatchOption) + << m; + + m.clear(); m.isValid = true; QTest::newRow("nomatch01") << QRegularExpression("\\d+") << "a string" @@ -1316,11 +1396,11 @@ void tst_QRegularExpression::globalMatch_data() matchList.clear(); m.clear(); m.isValid = true; m.hasMatch = true; - m.captured = QStringList() << "the"; + m.captured = CapturedList() << "the"; matchList << m; - m.captured = QStringList() << "quick"; + m.captured = CapturedList() << "quick"; matchList << m; - m.captured = QStringList() << "fox"; + m.captured = CapturedList() << "fox"; matchList << m; QTest::newRow("globalmatch01") << QRegularExpression("\\w+") << "the quick fox" @@ -1332,11 +1412,11 @@ void tst_QRegularExpression::globalMatch_data() matchList.clear(); m.clear(); m.isValid = true; m.hasMatch = true; - m.captured = QStringList() << "the" << "t" << "he"; + m.captured = CapturedList() << "the" << "t" << "he"; matchList << m; - m.captured = QStringList() << "quick" << "q" << "uick"; + m.captured = CapturedList() << "quick" << "q" << "uick"; matchList << m; - m.captured = QStringList() << "fox" << "f" << "ox"; + m.captured = CapturedList() << "fox" << "f" << "ox"; matchList << m; QTest::newRow("globalmatch02") << QRegularExpression("(\\w+?)(\\w+)") << "the quick fox" @@ -1348,13 +1428,13 @@ void tst_QRegularExpression::globalMatch_data() matchList.clear(); m.clear(); m.isValid = true; m.hasMatch = true; - m.captured = QStringList() << "ACA""GTG""CGA""AAA"; + m.captured = CapturedList() << "ACA""GTG""CGA""AAA"; matchList << m; - m.captured = QStringList() << "AAA"; + m.captured = CapturedList() << "AAA"; matchList << m; - m.captured = QStringList() << "AAG""GAA""AAG""AAA"; + m.captured = CapturedList() << "AAG""GAA""AAG""AAA"; matchList << m; - m.captured = QStringList() << "AAA"; + m.captured = CapturedList() << "AAA"; matchList << m; QTest::newRow("globalmatch03") << QRegularExpression("\\G(?:\\w\\w\\w)*?AAA") << "ACA""GTG""CGA""AAA""AAA""AAG""GAA""AAG""AAA""AAA" @@ -1373,19 +1453,19 @@ void tst_QRegularExpression::globalMatch_data() matchList.clear(); m.clear(); m.isValid = true; m.hasMatch = true; - m.captured = QStringList() << ""; + m.captured = CapturedList() << ""; matchList << m; - m.captured = QStringList() << "c"; + m.captured = CapturedList() << "c"; matchList << m; - m.captured = QStringList() << ""; + m.captured = CapturedList() << ""; matchList << m; - m.captured = QStringList() << "c"; + m.captured = CapturedList() << "c"; matchList << m; - m.captured = QStringList() << "aabb"; + m.captured = CapturedList() << "aabb"; matchList << m; - m.captured = QStringList() << ""; + m.captured = CapturedList() << ""; matchList << m; - m.captured = QStringList() << ""; + m.captured = CapturedList() << ""; matchList << m; QTest::newRow("globalmatch_emptycaptures01") << QRegularExpression("a*b*|c") @@ -1398,17 +1478,17 @@ void tst_QRegularExpression::globalMatch_data() matchList.clear(); m.clear(); m.isValid = true; m.hasMatch = true; - m.captured = QStringList() << "the"; + m.captured = CapturedList() << "the"; matchList << m; - m.captured = QStringList() << ""; + m.captured = CapturedList() << ""; matchList << m; - m.captured = QStringList() << "quick"; + m.captured = CapturedList() << "quick"; matchList << m; - m.captured = QStringList() << ""; + m.captured = CapturedList() << ""; matchList << m; - m.captured = QStringList() << "fox"; + m.captured = CapturedList() << "fox"; matchList << m; - m.captured = QStringList() << ""; + m.captured = CapturedList() << ""; matchList << m; QTest::newRow("globalmatch_emptycaptures02") << QRegularExpression(".*") @@ -1421,19 +1501,19 @@ void tst_QRegularExpression::globalMatch_data() matchList.clear(); m.clear(); m.isValid = true; m.hasMatch = true; - m.captured = QStringList() << "the"; + m.captured = CapturedList() << "the"; matchList << m; - m.captured = QStringList() << ""; + m.captured = CapturedList() << ""; matchList << m; - m.captured = QStringList() << "quick"; + m.captured = CapturedList() << "quick"; matchList << m; - m.captured = QStringList() << ""; + m.captured = CapturedList() << ""; matchList << m; - m.captured = QStringList() << "fox"; + m.captured = CapturedList() << "fox"; matchList << m; - m.captured = QStringList() << ""; + m.captured = CapturedList() << ""; matchList << m; - m.captured = QStringList() << ""; + m.captured = CapturedList() << ""; matchList << m; QTest::newRow("globalmatch_emptycaptures03") << QRegularExpression(".*") @@ -1446,17 +1526,17 @@ void tst_QRegularExpression::globalMatch_data() matchList.clear(); m.clear(); m.isValid = true; m.hasMatch = true; - m.captured = QStringList() << "the"; + m.captured = CapturedList() << "the"; matchList << m; - m.captured = QStringList() << ""; + m.captured = CapturedList() << ""; matchList << m; - m.captured = QStringList() << "quick"; + m.captured = CapturedList() << "quick"; matchList << m; - m.captured = QStringList() << ""; + m.captured = CapturedList() << ""; matchList << m; - m.captured = QStringList() << "fox"; + m.captured = CapturedList() << "fox"; matchList << m; - m.captured = QStringList() << ""; + m.captured = CapturedList() << ""; matchList << m; QTest::newRow("globalmatch_emptycaptures04") << QRegularExpression("(*CRLF).*") @@ -1469,19 +1549,19 @@ void tst_QRegularExpression::globalMatch_data() matchList.clear(); m.clear(); m.isValid = true; m.hasMatch = true; - m.captured = QStringList() << "the"; + m.captured = CapturedList() << "the"; matchList << m; - m.captured = QStringList() << ""; + m.captured = CapturedList() << ""; matchList << m; - m.captured = QStringList() << "quick"; + m.captured = CapturedList() << "quick"; matchList << m; - m.captured = QStringList() << ""; + m.captured = CapturedList() << ""; matchList << m; - m.captured = QStringList() << "fox"; + m.captured = CapturedList() << "fox"; matchList << m; - m.captured = QStringList() << ""; + m.captured = CapturedList() << ""; matchList << m; - m.captured = QStringList() << ""; + m.captured = CapturedList() << ""; matchList << m; QTest::newRow("globalmatch_emptycaptures05") << QRegularExpression("(*CRLF).*") @@ -1494,21 +1574,21 @@ void tst_QRegularExpression::globalMatch_data() matchList.clear(); m.clear(); m.isValid = true; m.hasMatch = true; - m.captured = QStringList() << "the"; + m.captured = CapturedList() << "the"; matchList << m; - m.captured = QStringList() << ""; + m.captured = CapturedList() << ""; matchList << m; - m.captured = QStringList() << "quick"; + m.captured = CapturedList() << "quick"; matchList << m; - m.captured = QStringList() << ""; + m.captured = CapturedList() << ""; matchList << m; - m.captured = QStringList() << "fox"; + m.captured = CapturedList() << "fox"; matchList << m; - m.captured = QStringList() << ""; + m.captured = CapturedList() << ""; matchList << m; - m.captured = QStringList() << "jumped"; + m.captured = CapturedList() << "jumped"; matchList << m; - m.captured = QStringList() << ""; + m.captured = CapturedList() << ""; matchList << m; QTest::newRow("globalmatch_emptycaptures06") << QRegularExpression("(*ANYCRLF).*") @@ -1521,17 +1601,17 @@ void tst_QRegularExpression::globalMatch_data() matchList.clear(); m.clear(); m.isValid = true; m.hasMatch = true; - m.captured = QStringList() << "ABC"; + m.captured = CapturedList() << "ABC"; matchList << m; - m.captured = QStringList() << ""; + m.captured = CapturedList() << ""; matchList << m; - m.captured = QStringList() << "DEF"; + m.captured = CapturedList() << "DEF"; matchList << m; - m.captured = QStringList() << ""; + m.captured = CapturedList() << ""; matchList << m; - m.captured = QStringList() << "GHI"; + m.captured = CapturedList() << "GHI"; matchList << m; - m.captured = QStringList() << ""; + m.captured = CapturedList() << ""; matchList << m; QTest::newRow("globalmatch_emptycaptures07") << QRegularExpression("[\\x{0000}-\\x{FFFF}]*") << QString::fromUtf8("ABC""\xf0\x9d\x85\x9d""DEF""\xf0\x9d\x85\x9e""GHI") @@ -1543,13 +1623,13 @@ void tst_QRegularExpression::globalMatch_data() matchList.clear(); m.clear(); m.isValid = true; m.hasMatch = true; - m.captured = QStringList() << QString::fromUtf8("ABC""\xc3\x80"); + m.captured = CapturedList() << QString::fromUtf8("ABC""\xc3\x80"); matchList << m; - m.captured = QStringList() << ""; + m.captured = CapturedList() << ""; matchList << m; - m.captured = QStringList() << QString::fromUtf8("\xc3\x80""DEF""\xc3\x80"); + m.captured = CapturedList() << QString::fromUtf8("\xc3\x80""DEF""\xc3\x80"); matchList << m; - m.captured = QStringList() << ""; + m.captured = CapturedList() << ""; matchList << m; QTest::newRow("globalmatch_emptycaptures08") << QRegularExpression("[\\x{0000}-\\x{FFFF}]*") << QString::fromUtf8("ABC""\xc3\x80""\xf0\x9d\x85\x9d""\xc3\x80""DEF""\xc3\x80") |