summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorGiuseppe D'Angelo <giuseppe.dangelo@kdab.com>2021-08-06 10:17:50 +0200
committerGiuseppe D'Angelo <giuseppe.dangelo@kdab.com>2021-08-25 00:06:42 +0200
commitbac329a28b14ccddcdb45a4ef51cd8056eda9233 (patch)
tree06aa44b30eb36fd441be11215f73a98885d120d2 /src
parent145940e1ef8fc8334ff4603a44f7896886e646cb (diff)
QRegularExpressionMatch: add a way to know if a capturing group captured
Relying on the fact that a given capturing group captured a null string doesn't allow users to distinguish whether a capturing group did not capture anything, or captured a null substring (say, from a null subject string). Perl allows for the distinction: the entries in the @- and @+ arrays are set to values in case there is a capture, but they're undef otherwise. PCRE2 gives us the information already in the results "ovector", but it was simply not exposed to QREM users. So, expose it. [ChangeLog][QtCore][QRegularExpressionMatch] Added the hasCaptured() family of functions to know if a given capturing group has captured something. Change-Id: Ic1320933d4554e2e313c0a680be1b1b9dd95af0b Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org> Reviewed-by: Thiago Macieira <thiago.macieira@intel.com> Reviewed-by: Volker Hilsheimer <volker.hilsheimer@qt.io> Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
Diffstat (limited to 'src')
-rw-r--r--src/corelib/doc/snippets/code/src_corelib_text_qregularexpression.cpp12
-rw-r--r--src/corelib/text/qregularexpression.cpp71
-rw-r--r--src/corelib/text/qregularexpression.h7
3 files changed, 86 insertions, 4 deletions
diff --git a/src/corelib/doc/snippets/code/src_corelib_text_qregularexpression.cpp b/src/corelib/doc/snippets/code/src_corelib_text_qregularexpression.cpp
index 8bf67a9e2d..3bb9727ca6 100644
--- a/src/corelib/doc/snippets/code/src_corelib_text_qregularexpression.cpp
+++ b/src/corelib/doc/snippets/code/src_corelib_text_qregularexpression.cpp
@@ -374,4 +374,16 @@ QRegularExpression re(R"(\d\d \w+)");
//! [35]
}
+{
+//! [36]
+QRegularExpression re("([a-z]+)|([A-Z]+)");
+QRegularExpressionMatch m = re.match("UPPERCASE");
+if (m.hasMatch()) {
+ qDebug() << m.hasCaptured(0); // true
+ qDebug() << m.hasCaptured(1); // false
+ qDebug() << m.hasCaptured(2); // true
+}
+//! [36]
+}
+
}
diff --git a/src/corelib/text/qregularexpression.cpp b/src/corelib/text/qregularexpression.cpp
index a4bb19d0b2..7a425254ab 100644
--- a/src/corelib/text/qregularexpression.cpp
+++ b/src/corelib/text/qregularexpression.cpp
@@ -1259,6 +1259,12 @@ void QRegularExpressionPrivate::doMatch(QRegularExpressionMatchPrivate *priv,
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_16(matchData);
qsizetype *const capturedOffsets = priv->capturedOffsets.data();
+ // We rely on the fact that capturing groups that did not
+ // capture anything have offset -1, but PCRE technically
+ // returns "PCRE2_UNSET". Test that out, better safe than
+ // sorry...
+ static_assert(qsizetype(PCRE2_UNSET) == qsizetype(-1), "Internal error: PCRE2 changed its API");
+
for (int i = 0; i < priv->capturedCount * 2; ++i)
capturedOffsets[i] = qsizetype(ovector[i]);
@@ -2178,7 +2184,7 @@ QRegularExpression::MatchOptions QRegularExpressionMatch::matchOptions() const
If the regular expression did not match, this function returns -1.
- \sa captured(), capturedStart(), capturedEnd(), capturedLength()
+ \sa hasCaptured(), captured(), capturedStart(), capturedEnd(), capturedLength()
*/
int QRegularExpressionMatch::lastCapturedIndex() const
{
@@ -2186,6 +2192,63 @@ int QRegularExpressionMatch::lastCapturedIndex() const
}
/*!
+ \fn bool QRegularExpressionMatch::hasCaptured(const QString &name) const
+ \fn bool QRegularExpressionMatch::hasCaptured(QStringView name) const
+ \since 6.3
+
+ Returns true if the capturing group named \a name captured something
+ in the subject string, and false otherwise (or if there is no
+ capturing group called \a name).
+
+ \note Some capturing groups in a regular expression may not have
+ captured anything even if the regular expression matched. This may
+ happen, for instance, if a conditional operator is used in the
+ pattern:
+
+ \snippet code/src_corelib_text_qregularexpression.cpp 36
+
+ Similarly, a capturing group may capture a substring of length 0;
+ this function will return \c{true} for such a capturing group.
+
+ \sa captured(), hasMatch()
+*/
+bool QRegularExpressionMatch::hasCaptured(QStringView name) const
+{
+ const int nth = d->regularExpression.d->captureIndexForName(name);
+ return hasCaptured(nth);
+}
+
+/*!
+ \since 6.3
+
+ Returns true if the \a nth capturing group captured something
+ in the subject string, and false otherwise (or if there is no
+ such capturing group).
+
+ \note The implicit capturing group number 0 captures the substring
+ matched by the entire pattern.
+
+ \note Some capturing groups in a regular expression may not have
+ captured anything even if the regular expression matched. This may
+ happen, for instance, if a conditional operator is used in the
+ pattern:
+
+ \snippet code/src_corelib_text_qregularexpression.cpp 36
+
+ Similarly, a capturing group may capture a substring of length 0;
+ this function will return \c{true} for such a capturing group.
+
+ \sa captured(), lastCapturedIndex(), hasMatch()
+*/
+bool QRegularExpressionMatch::hasCaptured(int nth) const
+{
+ if (nth < 0 || nth > lastCapturedIndex())
+ return false;
+
+ return d->capturedOffsets.at(nth * 2) != -1;
+}
+
+/*!
Returns the substring captured by the \a nth capturing group.
If the \a nth capturing group did not capture a string, or if there is no
@@ -2218,7 +2281,7 @@ QString QRegularExpressionMatch::captured(int nth) const
*/
QStringView QRegularExpressionMatch::capturedView(int nth) const
{
- if (nth < 0 || nth > lastCapturedIndex())
+ if (!hasCaptured(nth))
return QStringView();
qsizetype start = capturedStart(nth);
@@ -2312,7 +2375,7 @@ QStringList QRegularExpressionMatch::capturedTexts() const
*/
qsizetype QRegularExpressionMatch::capturedStart(int nth) const
{
- if (nth < 0 || nth > lastCapturedIndex())
+ if (!hasCaptured(nth))
return -1;
return d->capturedOffsets.at(nth * 2);
@@ -2341,7 +2404,7 @@ qsizetype QRegularExpressionMatch::capturedLength(int nth) const
*/
qsizetype QRegularExpressionMatch::capturedEnd(int nth) const
{
- if (nth < 0 || nth > lastCapturedIndex())
+ if (!hasCaptured(nth))
return -1;
return d->capturedOffsets.at(nth * 2 + 1);
diff --git a/src/corelib/text/qregularexpression.h b/src/corelib/text/qregularexpression.h
index 570679800c..2d81d0c254 100644
--- a/src/corelib/text/qregularexpression.h
+++ b/src/corelib/text/qregularexpression.h
@@ -233,6 +233,13 @@ public:
int lastCapturedIndex() const;
+#if QT_STRINGVIEW_LEVEL < 2
+ bool hasCaptured(const QString &name) const
+ { return hasCaptured(QStringView(name)); }
+#endif
+ bool hasCaptured(QStringView name) const;
+ bool hasCaptured(int nth) const;
+
QString captured(int nth = 0) const;
QStringView capturedView(int nth = 0) const;