diff options
author | Giuseppe D'Angelo <giuseppe.dangelo@kdab.com> | 2021-08-06 10:17:50 +0200 |
---|---|---|
committer | Giuseppe D'Angelo <giuseppe.dangelo@kdab.com> | 2021-08-25 00:06:42 +0200 |
commit | bac329a28b14ccddcdb45a4ef51cd8056eda9233 (patch) | |
tree | 06aa44b30eb36fd441be11215f73a98885d120d2 /src | |
parent | 145940e1ef8fc8334ff4603a44f7896886e646cb (diff) |
QRegularExpressionMatch: add a way to know if a capturing group captured
Relying on the fact that a given capturing group captured a null string
doesn't allow users to distinguish whether a capturing group did not
capture anything, or captured a null substring (say, from a null subject
string).
Perl allows for the distinction: the entries in the @- and @+ arrays are
set to values in case there is a capture, but they're undef otherwise.
PCRE2 gives us the information already in the results "ovector", but
it was simply not exposed to QREM users. So, expose it.
[ChangeLog][QtCore][QRegularExpressionMatch] Added the hasCaptured()
family of functions to know if a given capturing group has captured
something.
Change-Id: Ic1320933d4554e2e313c0a680be1b1b9dd95af0b
Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org>
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Reviewed-by: Volker Hilsheimer <volker.hilsheimer@qt.io>
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
Diffstat (limited to 'src')
-rw-r--r-- | src/corelib/doc/snippets/code/src_corelib_text_qregularexpression.cpp | 12 | ||||
-rw-r--r-- | src/corelib/text/qregularexpression.cpp | 71 | ||||
-rw-r--r-- | src/corelib/text/qregularexpression.h | 7 |
3 files changed, 86 insertions, 4 deletions
diff --git a/src/corelib/doc/snippets/code/src_corelib_text_qregularexpression.cpp b/src/corelib/doc/snippets/code/src_corelib_text_qregularexpression.cpp index 8bf67a9e2d..3bb9727ca6 100644 --- a/src/corelib/doc/snippets/code/src_corelib_text_qregularexpression.cpp +++ b/src/corelib/doc/snippets/code/src_corelib_text_qregularexpression.cpp @@ -374,4 +374,16 @@ QRegularExpression re(R"(\d\d \w+)"); //! [35] } +{ +//! [36] +QRegularExpression re("([a-z]+)|([A-Z]+)"); +QRegularExpressionMatch m = re.match("UPPERCASE"); +if (m.hasMatch()) { + qDebug() << m.hasCaptured(0); // true + qDebug() << m.hasCaptured(1); // false + qDebug() << m.hasCaptured(2); // true +} +//! [36] +} + } diff --git a/src/corelib/text/qregularexpression.cpp b/src/corelib/text/qregularexpression.cpp index a4bb19d0b2..7a425254ab 100644 --- a/src/corelib/text/qregularexpression.cpp +++ b/src/corelib/text/qregularexpression.cpp @@ -1259,6 +1259,12 @@ void QRegularExpressionPrivate::doMatch(QRegularExpressionMatchPrivate *priv, PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_16(matchData); qsizetype *const capturedOffsets = priv->capturedOffsets.data(); + // We rely on the fact that capturing groups that did not + // capture anything have offset -1, but PCRE technically + // returns "PCRE2_UNSET". Test that out, better safe than + // sorry... + static_assert(qsizetype(PCRE2_UNSET) == qsizetype(-1), "Internal error: PCRE2 changed its API"); + for (int i = 0; i < priv->capturedCount * 2; ++i) capturedOffsets[i] = qsizetype(ovector[i]); @@ -2178,7 +2184,7 @@ QRegularExpression::MatchOptions QRegularExpressionMatch::matchOptions() const If the regular expression did not match, this function returns -1. - \sa captured(), capturedStart(), capturedEnd(), capturedLength() + \sa hasCaptured(), captured(), capturedStart(), capturedEnd(), capturedLength() */ int QRegularExpressionMatch::lastCapturedIndex() const { @@ -2186,6 +2192,63 @@ int QRegularExpressionMatch::lastCapturedIndex() const } /*! + \fn bool QRegularExpressionMatch::hasCaptured(const QString &name) const + \fn bool QRegularExpressionMatch::hasCaptured(QStringView name) const + \since 6.3 + + Returns true if the capturing group named \a name captured something + in the subject string, and false otherwise (or if there is no + capturing group called \a name). + + \note Some capturing groups in a regular expression may not have + captured anything even if the regular expression matched. This may + happen, for instance, if a conditional operator is used in the + pattern: + + \snippet code/src_corelib_text_qregularexpression.cpp 36 + + Similarly, a capturing group may capture a substring of length 0; + this function will return \c{true} for such a capturing group. + + \sa captured(), hasMatch() +*/ +bool QRegularExpressionMatch::hasCaptured(QStringView name) const +{ + const int nth = d->regularExpression.d->captureIndexForName(name); + return hasCaptured(nth); +} + +/*! + \since 6.3 + + Returns true if the \a nth capturing group captured something + in the subject string, and false otherwise (or if there is no + such capturing group). + + \note The implicit capturing group number 0 captures the substring + matched by the entire pattern. + + \note Some capturing groups in a regular expression may not have + captured anything even if the regular expression matched. This may + happen, for instance, if a conditional operator is used in the + pattern: + + \snippet code/src_corelib_text_qregularexpression.cpp 36 + + Similarly, a capturing group may capture a substring of length 0; + this function will return \c{true} for such a capturing group. + + \sa captured(), lastCapturedIndex(), hasMatch() +*/ +bool QRegularExpressionMatch::hasCaptured(int nth) const +{ + if (nth < 0 || nth > lastCapturedIndex()) + return false; + + return d->capturedOffsets.at(nth * 2) != -1; +} + +/*! Returns the substring captured by the \a nth capturing group. If the \a nth capturing group did not capture a string, or if there is no @@ -2218,7 +2281,7 @@ QString QRegularExpressionMatch::captured(int nth) const */ QStringView QRegularExpressionMatch::capturedView(int nth) const { - if (nth < 0 || nth > lastCapturedIndex()) + if (!hasCaptured(nth)) return QStringView(); qsizetype start = capturedStart(nth); @@ -2312,7 +2375,7 @@ QStringList QRegularExpressionMatch::capturedTexts() const */ qsizetype QRegularExpressionMatch::capturedStart(int nth) const { - if (nth < 0 || nth > lastCapturedIndex()) + if (!hasCaptured(nth)) return -1; return d->capturedOffsets.at(nth * 2); @@ -2341,7 +2404,7 @@ qsizetype QRegularExpressionMatch::capturedLength(int nth) const */ qsizetype QRegularExpressionMatch::capturedEnd(int nth) const { - if (nth < 0 || nth > lastCapturedIndex()) + if (!hasCaptured(nth)) return -1; return d->capturedOffsets.at(nth * 2 + 1); diff --git a/src/corelib/text/qregularexpression.h b/src/corelib/text/qregularexpression.h index 570679800c..2d81d0c254 100644 --- a/src/corelib/text/qregularexpression.h +++ b/src/corelib/text/qregularexpression.h @@ -233,6 +233,13 @@ public: int lastCapturedIndex() const; +#if QT_STRINGVIEW_LEVEL < 2 + bool hasCaptured(const QString &name) const + { return hasCaptured(QStringView(name)); } +#endif + bool hasCaptured(QStringView name) const; + bool hasCaptured(int nth) const; + QString captured(int nth = 0) const; QStringView capturedView(int nth = 0) const; |