diff options
Diffstat (limited to 'src/corelib/tools/qregularexpression.cpp')
-rw-r--r-- | src/corelib/tools/qregularexpression.cpp | 156 |
1 files changed, 121 insertions, 35 deletions
diff --git a/src/corelib/tools/qregularexpression.cpp b/src/corelib/tools/qregularexpression.cpp index 33894927fe..cbc5a0486e 100644 --- a/src/corelib/tools/qregularexpression.cpp +++ b/src/corelib/tools/qregularexpression.cpp @@ -2,8 +2,8 @@ ** ** Copyright (C) 2012 Giuseppe D'Angelo <dangelog@gmail.com>. ** Copyright (C) 2012 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Giuseppe D'Angelo <giuseppe.dangelo@kdab.com> -** Copyright (C) 2014 Digia Plc and/or its subsidiary(-ies). -** Contact: http://www.qt-project.org/legal +** Copyright (C) 2015 The Qt Company Ltd. +** Contact: http://www.qt.io/licensing/ ** ** This file is part of the QtCore module of the Qt Toolkit. ** @@ -12,9 +12,9 @@ ** Licensees holding valid commercial Qt licenses may use this file in ** accordance with the commercial license agreement provided with the ** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and Digia. For licensing terms and -** conditions see http://qt.digia.com/licensing. For further information -** use the contact form at http://qt.digia.com/contact-us. +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see http://www.qt.io/terms-conditions. For further +** information use the contact form at http://www.qt.io/contact-us. ** ** GNU Lesser General Public License Usage ** Alternatively, this file may be used under the terms of the GNU Lesser @@ -25,8 +25,8 @@ ** requirements will be met: https://www.gnu.org/licenses/lgpl.html and ** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. ** -** In addition, as a special exception, Digia gives you certain additional -** rights. These rights are described in the Digia Qt LGPL Exception +** As a special exception, The Qt Company gives you certain additional +** rights. These rights are described in The Qt Company LGPL Exception ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. ** ** $QT_END_LICENSE$ @@ -835,6 +835,8 @@ struct QRegularExpressionPrivate : QSharedData }; QRegularExpressionMatchPrivate *doMatch(const QString &subject, + int subjectStartPos, + int subjectLength, int offset, QRegularExpression::MatchType matchType, QRegularExpression::MatchOptions matchOptions, @@ -872,6 +874,8 @@ struct QRegularExpressionMatchPrivate : QSharedData { QRegularExpressionMatchPrivate(const QRegularExpression &re, const QString &subject, + int subjectStart, + int subjectLength, QRegularExpression::MatchType matchType, QRegularExpression::MatchOptions matchOptions, int capturingCount = 0); @@ -884,6 +888,9 @@ struct QRegularExpressionMatchPrivate : QSharedData // for each captured substring QVector<int> capturedOffsets; + const int subjectStart; + const int subjectLength; + const QRegularExpression::MatchType matchType; const QRegularExpression::MatchOptions matchOptions; @@ -1219,14 +1226,21 @@ static int pcre16SafeExec(const pcre16 *code, const pcre16_extra *extra, /*! \internal - Performs a match of type \a matchType on the given \a subject string with - options \a matchOptions and returns the QRegularExpressionMatchPrivate of - the result. It also advances a match if a previous result is given as \a + Performs a match on the substring of the given \a subject string, + substring which starts from \a subjectStart and up to + (but not including) \a subjectStart + \a subjectLength. The match + will be of type \a matchType and using the options \a matchOptions; + the matching \a offset is relative the substring, + and if negative, it's taken as an offset from the end of the substring. + + It also advances a match if a previous result is given as \a previous. The \a subject string goes a Unicode validity check if \a checkSubjectString is CheckSubjectString and the match options don't include DontCheckSubjectStringMatchOption (PCRE doesn't like illegal UTF-16 sequences). + Returns the QRegularExpressionMatchPrivate of the result. + Advancing a match is a tricky algorithm. If the previous match matched a non-empty string, we just do an ordinary match at the offset position. @@ -1239,6 +1253,8 @@ static int pcre16SafeExec(const pcre16 *code, const pcre16_extra *extra, must advance over it. */ QRegularExpressionMatchPrivate *QRegularExpressionPrivate::doMatch(const QString &subject, + int subjectStart, + int subjectLength, int offset, QRegularExpression::MatchType matchType, QRegularExpression::MatchOptions matchOptions, @@ -1246,21 +1262,22 @@ QRegularExpressionMatchPrivate *QRegularExpressionPrivate::doMatch(const QString const QRegularExpressionMatchPrivate *previous) const { if (offset < 0) - offset += subject.length(); + offset += subjectLength; QRegularExpression re(*const_cast<QRegularExpressionPrivate *>(this)); - if (offset < 0 || offset > subject.length()) - return new QRegularExpressionMatchPrivate(re, subject, matchType, matchOptions); + if (offset < 0 || offset > subjectLength) + return new QRegularExpressionMatchPrivate(re, subject, subjectStart, subjectLength, matchType, matchOptions); if (!compiledPattern) { qWarning("QRegularExpressionPrivate::doMatch(): called on an invalid QRegularExpression object"); - return new QRegularExpressionMatchPrivate(re, subject, matchType, matchOptions); + return new QRegularExpressionMatchPrivate(re, subject, subjectStart, subjectLength, matchType, matchOptions); } // skip optimizing and doing the actual matching if NoMatch type was requested if (matchType == QRegularExpression::NoMatch) { QRegularExpressionMatchPrivate *priv = new QRegularExpressionMatchPrivate(re, subject, + subjectStart, subjectLength, matchType, matchOptions); priv->isValid = true; return priv; @@ -1268,6 +1285,7 @@ QRegularExpressionMatchPrivate *QRegularExpressionPrivate::doMatch(const QString // capturingCount doesn't include the implicit "0" capturing group QRegularExpressionMatchPrivate *priv = new QRegularExpressionMatchPrivate(re, subject, + subjectStart, subjectLength, matchType, matchOptions, capturingCount + 1); @@ -1307,45 +1325,49 @@ QRegularExpressionMatchPrivate *QRegularExpressionPrivate::doMatch(const QString int * const captureOffsets = priv->capturedOffsets.data(); const int captureOffsetsCount = priv->capturedOffsets.size(); + int realOffset = offset + subjectStart; + const int realSubjectLength = subjectLength + subjectStart; + const unsigned short * const subjectUtf16 = subject.utf16(); - const int subjectLength = subject.length(); int result; if (!previousMatchWasEmpty) { result = pcre16SafeExec(compiledPattern, currentStudyData, - subjectUtf16, subjectLength, - offset, pcreOptions, + subjectUtf16, realSubjectLength, + realOffset, pcreOptions, captureOffsets, captureOffsetsCount); } else { result = pcre16SafeExec(compiledPattern, currentStudyData, - subjectUtf16, subjectLength, - offset, pcreOptions | PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED, + subjectUtf16, realSubjectLength, + realOffset, pcreOptions | PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED, captureOffsets, captureOffsetsCount); if (result == PCRE_ERROR_NOMATCH) { - ++offset; + ++realOffset; if (usingCrLfNewlines - && offset < subjectLength - && subjectUtf16[offset - 1] == QLatin1Char('\r') - && subjectUtf16[offset] == QLatin1Char('\n')) { - ++offset; - } else if (offset < subjectLength - && QChar::isLowSurrogate(subjectUtf16[offset])) { - ++offset; + && realOffset < realSubjectLength + && subjectUtf16[realOffset - 1] == QLatin1Char('\r') + && subjectUtf16[realOffset] == QLatin1Char('\n')) { + ++realOffset; + } else if (realOffset < realSubjectLength + && QChar::isLowSurrogate(subjectUtf16[realOffset])) { + ++realOffset; } result = pcre16SafeExec(compiledPattern, currentStudyData, - subjectUtf16, subjectLength, - offset, pcreOptions, + subjectUtf16, realSubjectLength, + realOffset, pcreOptions, captureOffsets, captureOffsetsCount); } } #ifdef QREGULAREXPRESSION_DEBUG qDebug() << "Matching" << pattern << "against" << subject - << offset << matchType << matchOptions << previousMatchWasEmpty + << "starting at" << subjectStart << "len" << subjectLength << "real len" << realSubjectLength + << "offset" << offset << "real offset" << realOffset + << matchType << matchOptions << previousMatchWasEmpty << "result" << result; #endif @@ -1383,10 +1405,13 @@ QRegularExpressionMatchPrivate *QRegularExpressionPrivate::doMatch(const QString */ QRegularExpressionMatchPrivate::QRegularExpressionMatchPrivate(const QRegularExpression &re, const QString &subject, + int subjectStart, + int subjectLength, QRegularExpression::MatchType matchType, QRegularExpression::MatchOptions matchOptions, int capturingCount) : regularExpression(re), subject(subject), + subjectStart(subjectStart), subjectLength(subjectLength), matchType(matchType), matchOptions(matchOptions), capturedCount(0), hasMatch(false), hasPartialMatch(false), isValid(false) @@ -1412,6 +1437,8 @@ QRegularExpressionMatch QRegularExpressionMatchPrivate::nextMatch() const // then that subject was already checked at least once (when this object // was created, or when the object that created this one was created, etc.) QRegularExpressionMatchPrivate *nextPrivate = regularExpression.d->doMatch(subject, + subjectStart, + subjectLength, capturedOffsets.at(1), matchType, matchOptions, @@ -1684,7 +1711,33 @@ QRegularExpressionMatch QRegularExpression::match(const QString &subject, { d.data()->compilePattern(); - QRegularExpressionMatchPrivate *priv = d->doMatch(subject, offset, matchType, matchOptions); + QRegularExpressionMatchPrivate *priv = d->doMatch(subject, 0, subject.length(), offset, matchType, matchOptions); + return QRegularExpressionMatch(*priv); +} + +/*! + \since 5.5 + \overload + + Attempts to match the regular expression against the given \a subjectRef + string reference, starting at the position \a offset inside the subject, using a + match of type \a matchType and honoring the given \a matchOptions. + + The returned QRegularExpressionMatch object contains the results of the + match. + + \sa QRegularExpressionMatch, {normal matching} +*/ +QRegularExpressionMatch QRegularExpression::match(const QStringRef &subjectRef, + int offset, + MatchType matchType, + MatchOptions matchOptions) const +{ + d.data()->compilePattern(); + + const QString subject = subjectRef.string() ? *subjectRef.string() : QString(); + + QRegularExpressionMatchPrivate *priv = d->doMatch(subject, subjectRef.position(), subjectRef.length(), offset, matchType, matchOptions); return QRegularExpressionMatch(*priv); } @@ -1714,6 +1767,34 @@ QRegularExpressionMatchIterator QRegularExpression::globalMatch(const QString &s } /*! + \since 5.5 + \overload + + Attempts to perform a global match of the regular expression against the + given \a subjectRef string reference, starting at the position \a offset inside the + subject, using a match of type \a matchType and honoring the given \a + matchOptions. + + The returned QRegularExpressionMatchIterator is positioned before the + first match result (if any). + + \sa QRegularExpressionMatchIterator, {global matching} +*/ +QRegularExpressionMatchIterator QRegularExpression::globalMatch(const QStringRef &subjectRef, + int offset, + MatchType matchType, + MatchOptions matchOptions) const +{ + QRegularExpressionMatchIteratorPrivate *priv = + new QRegularExpressionMatchIteratorPrivate(*this, + matchType, + matchOptions, + match(subjectRef, offset, matchType, matchOptions)); + + return QRegularExpressionMatchIterator(*priv); +} + +/*! \since 5.4 Forces an immediate optimization of the pattern, including @@ -1823,6 +1904,8 @@ QString QRegularExpression::escape(const QString &str) QRegularExpressionMatch::QRegularExpressionMatch() : d(new QRegularExpressionMatchPrivate(QRegularExpression(), QString(), + 0, + 0, QRegularExpression::NoMatch, QRegularExpression::NoMatchOption)) { @@ -2361,8 +2444,9 @@ QDataStream &operator>>(QDataStream &in, QRegularExpression &re) */ QDebug operator<<(QDebug debug, const QRegularExpression &re) { + QDebugStateSaver saver(debug); debug.nospace() << "QRegularExpression(" << re.pattern() << ", " << re.patternOptions() << ")"; - return debug.space(); + return debug; } /*! @@ -2375,6 +2459,7 @@ QDebug operator<<(QDebug debug, const QRegularExpression &re) */ QDebug operator<<(QDebug debug, QRegularExpression::PatternOptions patternOptions) { + QDebugStateSaver saver(debug); QByteArray flags; if (patternOptions == QRegularExpression::NoPatternOption) { @@ -2404,7 +2489,7 @@ QDebug operator<<(QDebug debug, QRegularExpression::PatternOptions patternOption debug.nospace() << "QRegularExpression::PatternOptions(" << flags << ")"; - return debug.space(); + return debug; } /*! \relates QRegularExpressionMatch @@ -2416,11 +2501,12 @@ QDebug operator<<(QDebug debug, QRegularExpression::PatternOptions patternOption */ QDebug operator<<(QDebug debug, const QRegularExpressionMatch &match) { + QDebugStateSaver saver(debug); debug.nospace() << "QRegularExpressionMatch("; if (!match.isValid()) { debug << "Invalid)"; - return debug.space(); + return debug; } debug << "Valid"; @@ -2445,7 +2531,7 @@ QDebug operator<<(QDebug debug, const QRegularExpressionMatch &match) debug << ")"; - return debug.space(); + return debug; } #endif |