summaryrefslogtreecommitdiffstats
path: root/src/corelib/tools/qregularexpression.cpp
diff options
context:
space:
mode:
authorGiuseppe D'Angelo <giuseppe.dangelo@kdab.com>2015-08-17 11:28:16 +0200
committerGiuseppe D'Angelo <giuseppe.dangelo@kdab.com>2015-08-18 14:18:48 +0000
commitee15bef3ea24a78e5fd4708c0a1cebee2e4fac5d (patch)
treecf1322ef5c14b2ae77602509fff582561dba9503 /src/corelib/tools/qregularexpression.cpp
parent4738b450d26b6cdaed6f9f11f0dc22b842c124df (diff)
QRegularExpression: fix matching over QStringRefs
Playing with the offset argument of pcre_exec is not equivalent to adjusting the pointer to the subject string. In particular, PCRE can go behind the offset to check for lookbehinds or "transition" metacharacters (\b, \B, etc.). This made the code that deals with QStringRefs not matching in behavior with the corresponding code dealing with QStrings. For instance, QString subject("Miss"); QRegularExpression re("(?<=M)iss"); re.match(subject.mid(1)); // doesn't match re.match(subject.midRef(1)); // matches!!! Instead, actually adjust the pointer to the subject string so that the behavior is identical. A broken test that relied on the equivalence is also removed. Change-Id: If96333241ef59621d7f5a6a170ebd0a186844874 Reviewed-by: Volker Krause <volker.krause@kdab.com> Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/corelib/tools/qregularexpression.cpp')
-rw-r--r--src/corelib/tools/qregularexpression.cpp45
1 files changed, 21 insertions, 24 deletions
diff --git a/src/corelib/tools/qregularexpression.cpp b/src/corelib/tools/qregularexpression.cpp
index 2e3c2ca79f..070e68154f 100644
--- a/src/corelib/tools/qregularexpression.cpp
+++ b/src/corelib/tools/qregularexpression.cpp
@@ -1,7 +1,7 @@
/****************************************************************************
**
-** Copyright (C) 2012 Giuseppe D'Angelo <dangelog@gmail.com>.
-** Copyright (C) 2012 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Giuseppe D'Angelo <giuseppe.dangelo@kdab.com>
+** Copyright (C) 2015 Giuseppe D'Angelo <dangelog@gmail.com>.
+** Copyright (C) 2015 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Giuseppe D'Angelo <giuseppe.dangelo@kdab.com>
** Copyright (C) 2015 The Qt Company Ltd.
** Contact: http://www.qt.io/licensing/
**
@@ -1325,48 +1325,45 @@ QRegularExpressionMatchPrivate *QRegularExpressionPrivate::doMatch(const QString
int * const captureOffsets = priv->capturedOffsets.data();
const int captureOffsetsCount = priv->capturedOffsets.size();
- int realOffset = offset + subjectStart;
- const int realSubjectLength = subjectLength + subjectStart;
-
- const unsigned short * const subjectUtf16 = subject.utf16();
+ const unsigned short * const subjectUtf16 = subject.utf16() + subjectStart;
int result;
if (!previousMatchWasEmpty) {
result = pcre16SafeExec(compiledPattern, currentStudyData,
- subjectUtf16, realSubjectLength,
- realOffset, pcreOptions,
+ subjectUtf16, subjectLength,
+ offset, pcreOptions,
captureOffsets, captureOffsetsCount);
} else {
result = pcre16SafeExec(compiledPattern, currentStudyData,
- subjectUtf16, realSubjectLength,
- realOffset, pcreOptions | PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED,
+ subjectUtf16, subjectLength,
+ offset, pcreOptions | PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED,
captureOffsets, captureOffsetsCount);
if (result == PCRE_ERROR_NOMATCH) {
- ++realOffset;
+ ++offset;
if (usingCrLfNewlines
- && realOffset < realSubjectLength
- && subjectUtf16[realOffset - 1] == QLatin1Char('\r')
- && subjectUtf16[realOffset] == QLatin1Char('\n')) {
- ++realOffset;
- } else if (realOffset < realSubjectLength
- && QChar::isLowSurrogate(subjectUtf16[realOffset])) {
- ++realOffset;
+ && offset < subjectLength
+ && subjectUtf16[offset - 1] == QLatin1Char('\r')
+ && subjectUtf16[offset] == QLatin1Char('\n')) {
+ ++offset;
+ } else if (offset < subjectLength
+ && QChar::isLowSurrogate(subjectUtf16[offset])) {
+ ++offset;
}
result = pcre16SafeExec(compiledPattern, currentStudyData,
- subjectUtf16, realSubjectLength,
- realOffset, pcreOptions,
+ subjectUtf16, subjectLength,
+ offset, pcreOptions,
captureOffsets, captureOffsetsCount);
}
}
#ifdef QREGULAREXPRESSION_DEBUG
qDebug() << "Matching" << pattern << "against" << subject
- << "starting at" << subjectStart << "len" << subjectLength << "real len" << realSubjectLength
- << "offset" << offset << "real offset" << realOffset
+ << "starting at" << subjectStart << "len" << subjectLength
+ << "offset" << offset
<< matchType << matchOptions << previousMatchWasEmpty
<< "result" << result;
#endif
@@ -2041,7 +2038,7 @@ QString QRegularExpressionMatch::captured(int nth) const
if (start == -1) // didn't capture
return QString();
- return d->subject.mid(start, capturedLength(nth));
+ return d->subject.mid(start + d->subjectStart, capturedLength(nth));
}
/*!
@@ -2062,7 +2059,7 @@ QStringRef QRegularExpressionMatch::capturedRef(int nth) const
if (start == -1) // didn't capture
return QStringRef();
- return d->subject.midRef(start, capturedLength(nth));
+ return d->subject.midRef(start + d->subjectStart, capturedLength(nth));
}
/*!