diff options
Diffstat (limited to 'src/corelib/tools/qregularexpression.cpp')
-rw-r--r-- | src/corelib/tools/qregularexpression.cpp | 188 |
1 files changed, 165 insertions, 23 deletions
diff --git a/src/corelib/tools/qregularexpression.cpp b/src/corelib/tools/qregularexpression.cpp index a18d8af898..a50c7da6cc 100644 --- a/src/corelib/tools/qregularexpression.cpp +++ b/src/corelib/tools/qregularexpression.cpp @@ -1,6 +1,7 @@ /**************************************************************************** ** ** Copyright (C) 2012 Giuseppe D'Angelo <dangelog@gmail.com>. +** Copyright (C) 2012 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Giuseppe D'Angelo <giuseppe.dangelo@kdab.com> ** Copyright (C) 2013 Digia Plc and/or its subsidiary(-ies). ** Contact: http://www.qt-project.org/legal ** @@ -49,6 +50,7 @@ #include <QtCore/qdebug.h> #include <QtCore/qthreadstorage.h> #include <QtCore/qglobal.h> +#include <QtCore/qatomic.h> #include <pcre.h> @@ -725,6 +727,13 @@ QT_BEGIN_NAMESPACE that (in this text) there are other characters beyond the end of the subject string. This can lead to surprising results; see the discussion in the \l{partial matching} section for more details. + + \value NoMatch + No matching is done. This value is returned as the match type by a + default constructed QRegularExpressionMatch or + QRegularExpressionMatchIterator. Using this match type is not very + useful for the user, as no matching ever happens. This enum value + has been introduced in Qt 5.1. */ /*! @@ -793,7 +802,7 @@ struct QRegularExpressionPrivate : QSharedData void cleanCompiledPattern(); void compilePattern(); void getPatternInfo(); - pcre16_extra *optimizePattern(); + void optimizePattern(); QRegularExpressionMatchPrivate *doMatch(const QString &subject, int offset, @@ -819,7 +828,7 @@ struct QRegularExpressionPrivate : QSharedData // objects themselves; when the private is copied (i.e. a detach happened) // they are set to 0 pcre16 *compiledPattern; - pcre16_extra *studyData; + QAtomicPointer<pcre16_extra> studyData; const char *errorString; int errorOffset; int capturingCount; @@ -834,7 +843,7 @@ struct QRegularExpressionMatchPrivate : QSharedData const QString &subject, QRegularExpression::MatchType matchType, QRegularExpression::MatchOptions matchOptions, - int capturingCount); + int capturingCount = 0); QRegularExpressionMatch nextMatch() const; @@ -926,10 +935,10 @@ QRegularExpressionPrivate::QRegularExpressionPrivate(const QRegularExpressionPri void QRegularExpressionPrivate::cleanCompiledPattern() { pcre16_free(compiledPattern); - pcre16_free_study(studyData); + pcre16_free_study(studyData.load()); usedCount = 0; compiledPattern = 0; - studyData = 0; + studyData.store(0); usingCrLfNewlines = false; errorOffset = -1; capturingCount = 0; @@ -959,7 +968,7 @@ void QRegularExpressionPrivate::compilePattern() return; Q_ASSERT(errorCode == 0); - Q_ASSERT(studyData == 0); // studying (=>optimizing) is always done later + Q_ASSERT(studyData.load() == 0); // studying (=>optimizing) is always done later errorOffset = -1; getPatternInfo(); @@ -971,12 +980,13 @@ void QRegularExpressionPrivate::compilePattern() void QRegularExpressionPrivate::getPatternInfo() { Q_ASSERT(compiledPattern); + Q_ASSERT(studyData.load() == 0); pcre16_fullinfo(compiledPattern, 0, PCRE_INFO_CAPTURECOUNT, &capturingCount); // detect the settings for the newline unsigned long int patternNewlineSetting; - pcre16_fullinfo(compiledPattern, studyData, PCRE_INFO_OPTIONS, &patternNewlineSetting); + pcre16_fullinfo(compiledPattern, 0, PCRE_INFO_OPTIONS, &patternNewlineSetting); patternNewlineSetting &= PCRE_NEWLINE_CR | PCRE_NEWLINE_LF | PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY | PCRE_NEWLINE_ANYCRLF; if (patternNewlineSetting == 0) { @@ -1004,6 +1014,14 @@ void QRegularExpressionPrivate::getPatternInfo() usingCrLfNewlines = (patternNewlineSetting == PCRE_NEWLINE_CRLF) || (patternNewlineSetting == PCRE_NEWLINE_ANY) || (patternNewlineSetting == PCRE_NEWLINE_ANYCRLF); + + int hasJOptionChanged; + pcre16_fullinfo(compiledPattern, 0, PCRE_INFO_JCHANGED, &hasJOptionChanged); + if (hasJOptionChanged) { + qWarning("QRegularExpressionPrivate::getPatternInfo(): the pattern '%s'\n" + " is using the (?J) option; duplicate capturing group names are not supported by Qt", + qPrintable(pattern)); + } } @@ -1084,17 +1102,18 @@ static bool isJitEnabled() leaving studyData to NULL); but before calling pcre16_exec to perform the match, another thread performs the studying and sets studyData to something else. Although the assignment to studyData is itself atomic, the release of - the memory pointed by studyData isn't. Therefore, the current studyData - value is returned and used by doMatch. + the memory pointed by studyData isn't. Therefore, we work on a local copy + (localStudyData) before using storeRelease on studyData. In doMatch there's + the corresponding loadAcquire. */ -pcre16_extra *QRegularExpressionPrivate::optimizePattern() +void QRegularExpressionPrivate::optimizePattern() { Q_ASSERT(compiledPattern); QMutexLocker lock(&mutex); - if (studyData || (++usedCount != qt_qregularexpression_optimize_after_use_count)) - return studyData; + if (studyData.load() || (++usedCount != qt_qregularexpression_optimize_after_use_count)) + return; static const bool enableJit = isJitEnabled(); @@ -1103,15 +1122,15 @@ pcre16_extra *QRegularExpressionPrivate::optimizePattern() studyOptions |= PCRE_STUDY_JIT_COMPILE; const char *err; - studyData = pcre16_study(compiledPattern, studyOptions, &err); + pcre16_extra * const localStudyData = pcre16_study(compiledPattern, studyOptions, &err); - if (studyData && studyData->flags & PCRE_EXTRA_EXECUTABLE_JIT) - pcre16_assign_jit_stack(studyData, qtPcreCallback, 0); + if (localStudyData && localStudyData->flags & PCRE_EXTRA_EXECUTABLE_JIT) + pcre16_assign_jit_stack(localStudyData, qtPcreCallback, 0); - if (!studyData && err) + if (!localStudyData && err) qWarning("QRegularExpressionPrivate::optimizePattern(): pcre_study failed: %s", err); - return studyData; + studyData.storeRelease(localStudyData); } /*! @@ -1193,19 +1212,33 @@ QRegularExpressionMatchPrivate *QRegularExpressionPrivate::doMatch(const QString QRegularExpression re(*const_cast<QRegularExpressionPrivate *>(this)); if (offset < 0 || offset > subject.length()) - return new QRegularExpressionMatchPrivate(re, subject, matchType, matchOptions, 0); + return new QRegularExpressionMatchPrivate(re, subject, matchType, matchOptions); if (!compiledPattern) { qWarning("QRegularExpressionPrivate::doMatch(): called on an invalid QRegularExpression object"); - return new QRegularExpressionMatchPrivate(re, subject, matchType, matchOptions, 0); + return new QRegularExpressionMatchPrivate(re, subject, matchType, matchOptions); + } + + // skip optimizing and doing the actual matching if NoMatch type was requested + if (matchType == QRegularExpression::NoMatch) { + QRegularExpressionMatchPrivate *priv = new QRegularExpressionMatchPrivate(re, subject, + matchType, matchOptions); + priv->isValid = true; + return priv; } + // capturingCount doesn't include the implicit "0" capturing group QRegularExpressionMatchPrivate *priv = new QRegularExpressionMatchPrivate(re, subject, matchType, matchOptions, - capturingCount); + capturingCount + 1); // this is mutex protected - const pcre16_extra *currentStudyData = const_cast<QRegularExpressionPrivate *>(this)->optimizePattern(); + const_cast<QRegularExpressionPrivate *>(this)->optimizePattern(); + + // work with a local copy of the study data, as we are running pcre_exec + // potentially more than once, and we don't want to run call it + // with different study data + const pcre16_extra * const currentStudyData = studyData.loadAcquire(); int pcreOptions = convertToPcreOptions(matchOptions); @@ -1311,8 +1344,10 @@ QRegularExpressionMatchPrivate::QRegularExpressionMatchPrivate(const QRegularExp hasMatch(false), hasPartialMatch(false), isValid(false) { Q_ASSERT(capturingCount >= 0); - const int captureOffsetsCount = (capturingCount + 1) * 3; - capturedOffsets.resize(captureOffsetsCount); + if (capturingCount > 0) { + const int captureOffsetsCount = capturingCount * 3; + capturedOffsets.resize(captureOffsetsCount); + } } @@ -1468,6 +1503,8 @@ void QRegularExpression::setPatternOptions(PatternOptions options) Returns the number of capturing groups inside the pattern string, or -1 if the regular expression is not valid. + \note The implicit capturing group 0 is \e{not} included in the returned number. + \sa isValid() */ int QRegularExpression::captureCount() const @@ -1478,6 +1515,71 @@ int QRegularExpression::captureCount() const } /*! + \since 5.1 + + Returns a list of captureCount() + 1 elements, containing the names of the + named capturing groups in the pattern string. The list is sorted such that + the element of the list at position \c{i} is the name of the \c{i}-th + capturing group, if it has a name, or an empty string if that capturing + group is unnamed. + + For instance, given the regular expression + + \code + (?<day>\d\d)-(?<month>\d\d)-(?<year>\d\d\d\d) (\w+) (?<name>\w+) + \endcode + + namedCaptureGroups() will return the following list: + + \code + ("", "day", "month", "year", "", "name") + \endcode + + which corresponds to the fact that the capturing group #0 (corresponding to + the whole match) has no name, the capturing group #1 has name "day", the + capturing group #2 has name "month", etc. + + If the regular expression is not valid, returns an empty list. + + \sa isValid(), QRegularExpressionMatch::captured(), QString::isEmpty() +*/ +QStringList QRegularExpression::namedCaptureGroups() const +{ + if (!isValid()) // isValid() will compile the pattern + return QStringList(); + + // namedCapturingTable will point to a table of + // namedCapturingTableEntryCount entries, each one of which + // contains one ushort followed by the name, NUL terminated. + // The ushort is the numerical index of the name in the pattern. + // The length of each entry is namedCapturingTableEntrySize. + ushort *namedCapturingTable; + int namedCapturingTableEntryCount; + int namedCapturingTableEntrySize; + + pcre16_fullinfo(d->compiledPattern, 0, PCRE_INFO_NAMETABLE, &namedCapturingTable); + pcre16_fullinfo(d->compiledPattern, 0, PCRE_INFO_NAMECOUNT, &namedCapturingTableEntryCount); + pcre16_fullinfo(d->compiledPattern, 0, PCRE_INFO_NAMEENTRYSIZE, &namedCapturingTableEntrySize); + + QStringList result; + + // no QList::resize nor fill is available. The +1 is for the implicit group #0 + result.reserve(d->capturingCount + 1); + for (int i = 0; i < d->capturingCount + 1; ++i) + result.append(QString()); + + for (int i = 0; i < namedCapturingTableEntryCount; ++i) { + const ushort * const currentNamedCapturingTableRow = namedCapturingTable + + namedCapturingTableEntrySize * i; + + const int index = *currentNamedCapturingTableRow; + result[index] = QString::fromUtf16(currentNamedCapturingTableRow + 1); + } + + return result; +} + +/*! Returns true if the regular expression is a valid regular expression (that is, it contains no syntax errors, etc.), or false otherwise. Use errorString() to obtain a textual description of the error. @@ -1639,6 +1741,26 @@ QString QRegularExpression::escape(const QString &str) } /*! + \since 5.1 + + Constructs a valid, empty QRegularExpressionMatch object. The regular + expression is set to a default-constructed one; the match type to + QRegularExpression::NoMatch and the match options to + QRegularExpression::NoMatchOption. + + The object will report no match through the hasMatch() and the + hasPartialMatch() member functions. +*/ +QRegularExpressionMatch::QRegularExpressionMatch() + : d(new QRegularExpressionMatchPrivate(QRegularExpression(), + QString(), + QRegularExpression::NoMatch, + QRegularExpression::NoMatchOption)) +{ + d->isValid = true; +} + +/*! Destroys the match result. */ QRegularExpressionMatch::~QRegularExpressionMatch() @@ -1982,6 +2104,26 @@ QRegularExpressionMatchIterator::QRegularExpressionMatchIterator(QRegularExpress } /*! + \since 5.1 + + Constructs an empty, valid QRegularExpressionMatchIterator object. The + regular expression is set to a default-constructed one; the match type to + QRegularExpression::NoMatch and the match options to + QRegularExpression::NoMatchOption. + + Invoking the hasNext() member function on the constructed object will + return false, as the iterator is not iterating on a valid sequence of + matches. +*/ +QRegularExpressionMatchIterator::QRegularExpressionMatchIterator() + : d(new QRegularExpressionMatchIteratorPrivate(QRegularExpression(), + QRegularExpression::NoMatch, + QRegularExpression::NoMatchOption, + QRegularExpressionMatch())) +{ +} + +/*! Destroys the QRegularExpressionMatchIterator object. */ QRegularExpressionMatchIterator::~QRegularExpressionMatchIterator() |