From 53f9e77140a07eb8f36eeea460f13a54dad7330e Mon Sep 17 00:00:00 2001 From: Giuseppe D'Angelo Date: Sat, 19 Jan 2013 00:35:21 +0000 Subject: QRegularExpression: add method for extracting the capturing group names It may be useful to know which named capturing groups are defined in an regular expression, and for each of them, what's the corresponding index. This commit adds the needed method to QRegularExpression. Note that extracting the information doesn't happen while holding the mutex in the private -- pcre_fullinfo just reads information from the compiled pattern, so that's thread-safe. Task-number: QTBUG-29079 Change-Id: I50c00ee860f06427c2e6ea10417d5c0733cc8303 Reviewed-by: Lars Knoll Reviewed-by: Thiago Macieira --- dist/changes-5.1.0 | 5 ++ src/corelib/tools/qregularexpression.cpp | 48 +++++++++++++ src/corelib/tools/qregularexpression.h | 2 + .../qregularexpression/tst_qregularexpression.cpp | 81 ++++++++++++++++++++++ .../qregularexpression/tst_qregularexpression.h | 2 + 5 files changed, 138 insertions(+) diff --git a/dist/changes-5.1.0 b/dist/changes-5.1.0 index dffbb4df6b..8ee691055f 100644 --- a/dist/changes-5.1.0 +++ b/dist/changes-5.1.0 @@ -59,6 +59,11 @@ QtCore - QMessageAuthenticationCode * New class for hash-based message authentication code added to QtCore. + - QRegularExpression + * It's now possible to know the names of the named capturing groups + inside the pattern string, as well as the numerical index of each + named capturing group. + - QtGui diff --git a/src/corelib/tools/qregularexpression.cpp b/src/corelib/tools/qregularexpression.cpp index 503185dab5..510b7112af 100644 --- a/src/corelib/tools/qregularexpression.cpp +++ b/src/corelib/tools/qregularexpression.cpp @@ -1496,6 +1496,54 @@ int QRegularExpression::captureCount() const return d->capturingCount; } +/*! + \since 5.1 + + Returns a list of captureCount() elements, containing the names of the named + capturing groups in the pattern string. The list is sorted such that the + i-th element of the list is the name of the i-th capturing group, if it has + a name, or an empty string if the capturing group is unnamed. + + If the regular expression is not valid, returns an empty list. + + \sa isValid(), QRegularExpressionMatch::captured(), QString::isEmpty() +*/ +QStringList QRegularExpression::namedCaptureGroups() const +{ + if (!isValid()) // isValid() will compile the pattern + return QStringList(); + + // namedCapturingTable will point to a table of + // namedCapturingTableEntryCount entries, each one of which + // contains one ushort followed by the name, NUL terminated. + // The ushort is the numerical index of the name in the pattern. + // The length of each entry is namedCapturingTableEntrySize. + ushort *namedCapturingTable; + int namedCapturingTableEntryCount; + int namedCapturingTableEntrySize; + + pcre16_fullinfo(d->compiledPattern, 0, PCRE_INFO_NAMETABLE, &namedCapturingTable); + pcre16_fullinfo(d->compiledPattern, 0, PCRE_INFO_NAMECOUNT, &namedCapturingTableEntryCount); + pcre16_fullinfo(d->compiledPattern, 0, PCRE_INFO_NAMEENTRYSIZE, &namedCapturingTableEntrySize); + + QStringList result; + + // no QList::resize nor fill is available. The +1 is for the implicit group #0 + result.reserve(d->capturingCount + 1); + for (int i = 0; i < d->capturingCount + 1; ++i) + result.append(QString()); + + for (int i = 0; i < namedCapturingTableEntryCount; ++i) { + const ushort * const currentNamedCapturingTableRow = namedCapturingTable + + namedCapturingTableEntrySize * i; + + const int index = *currentNamedCapturingTableRow; + result[index] = QString::fromUtf16(currentNamedCapturingTableRow + 1); + } + + return result; +} + /*! Returns true if the regular expression is a valid regular expression (that is, it contains no syntax errors, etc.), or false otherwise. Use diff --git a/src/corelib/tools/qregularexpression.h b/src/corelib/tools/qregularexpression.h index a056b4f01b..97dbee9256 100644 --- a/src/corelib/tools/qregularexpression.h +++ b/src/corelib/tools/qregularexpression.h @@ -46,6 +46,7 @@ #ifndef QT_NO_REGULAREXPRESSION #include +#include #include #include @@ -94,6 +95,7 @@ public: QString errorString() const; int captureCount() const; + QStringList namedCaptureGroups() const; enum MatchType { NormalMatch = 0, diff --git a/tests/auto/corelib/tools/qregularexpression/tst_qregularexpression.cpp b/tests/auto/corelib/tools/qregularexpression/tst_qregularexpression.cpp index 139f831b3d..75487900d1 100644 --- a/tests/auto/corelib/tools/qregularexpression/tst_qregularexpression.cpp +++ b/tests/auto/corelib/tools/qregularexpression/tst_qregularexpression.cpp @@ -1,6 +1,7 @@ /**************************************************************************** ** ** Copyright (C) 2012 Giuseppe D'Angelo . +** Copyright (C) 2013 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Giuseppe D'Angelo ** Contact: http://www.qt-project.org/legal ** ** This file is part of the test suite of the Qt Toolkit. @@ -1418,6 +1419,86 @@ void tst_QRegularExpression::captureCount() QCOMPARE(re.captureCount(), -1); } +// the comma in the template breaks QFETCH... +typedef QMultiHash StringToIntMap; +Q_DECLARE_METATYPE(StringToIntMap) + +void tst_QRegularExpression::captureNames_data() +{ + QTest::addColumn("pattern"); + QTest::addColumn("namedCapturesIndexMap"); + StringToIntMap map; + + QTest::newRow("captureNames01") << "a pattern" << map; + QTest::newRow("captureNames02") << "a.*pattern" << map; + QTest::newRow("captureNames03") << "(a) pattern" << map; + QTest::newRow("captureNames04") << "(a).*(pattern)" << map; + + map.clear(); + map.replace("named", 1); + QTest::newRow("captureNames05") << "a.*(?pattern)" << map; + + map.clear(); + map.replace("named", 2); + QTest::newRow("captureNames06") << "(a).*(?pattern)" << map; + + map.clear(); + map.replace("name1", 1); + map.replace("name2", 2); + QTest::newRow("captureNames07") << "(?a).*(?pattern)" << map; + + map.clear(); + map.replace("name1", 2); + map.replace("name2", 1); + QTest::newRow("captureNames08") << "(?a).*(?pattern)" << map; + + map.clear(); + map.replace("date", 1); + map.replace("month", 2); + map.replace("year", 3); + QTest::newRow("captureNames09") << "^(?\\d\\d)/(?\\d\\d)/(?\\d\\d\\d\\d)$" << map; + + map.clear(); + map.replace("date", 2); + map.replace("month", 1); + map.replace("year", 3); + QTest::newRow("captureNames10") << "^(?\\d\\d)/(?\\d\\d)/(?\\d\\d\\d\\d)$" << map; + + map.clear(); + map.replace("noun", 2); + QTest::newRow("captureNames11") << "(a)(?|(?b)|(?c))(d)" << map; + + map.clear(); + QTest::newRow("captureNames_invalid01") << "(.*" << map; + QTest::newRow("captureNames_invalid02") << "\\" << map; + QTest::newRow("captureNames_invalid03") << "(?a)|(?b))" << map; +} + +void tst_QRegularExpression::captureNames() +{ + QFETCH(QString, pattern); + QFETCH(StringToIntMap, namedCapturesIndexMap); + + const QRegularExpression re(pattern); + QStringList namedCaptureGroups = re.namedCaptureGroups(); + int namedCaptureGroupsCount = namedCaptureGroups.size(); + + QCOMPARE(namedCaptureGroupsCount, re.captureCount() + 1); + + for (int i = 0; i < namedCaptureGroupsCount; ++i) { + const QString &name = namedCaptureGroups.at(i); + + if (name.isEmpty()) { + QVERIFY(!namedCapturesIndexMap.contains(name)); + } else { + QVERIFY(namedCapturesIndexMap.contains(name)); + QCOMPARE(i, namedCapturesIndexMap.value(name)); + } + } + +} + void tst_QRegularExpression::pcreJitStackUsage_data() { QTest::addColumn("pattern"); diff --git a/tests/auto/corelib/tools/qregularexpression/tst_qregularexpression.h b/tests/auto/corelib/tools/qregularexpression/tst_qregularexpression.h index 6df7b80ac4..6de21f7cd0 100644 --- a/tests/auto/corelib/tools/qregularexpression/tst_qregularexpression.h +++ b/tests/auto/corelib/tools/qregularexpression/tst_qregularexpression.h @@ -72,6 +72,8 @@ private slots: void operatoreq(); void captureCount_data(); void captureCount(); + void captureNames_data(); + void captureNames(); void pcreJitStackUsage_data(); void pcreJitStackUsage(); void regularExpressionMatch_data(); -- cgit v1.2.3