diff options
author | Giuseppe D'Angelo <dangelog@gmail.com> | 2012-02-12 01:04:16 +0000 |
---|---|---|
committer | Qt by Nokia <qt-info@nokia.com> | 2012-03-15 00:37:56 +0100 |
commit | c005c75080d6e40ac9fd8d458183aae32def9984 (patch) | |
tree | 8763db1cd8decf3638857537913285c2a2526879 /src/corelib/tools/qstring.cpp | |
parent | 2c944d6b9dd7d750835cca491aac60825cd7d1ed (diff) |
QRegularExpression: support for QString overloads
Added support for QString overloads taking a QRegularExpression.
Change-Id: I8608ab0b66e5fdd2e966992e1072cf1ef7883c8e
Reviewed-by: Lars Knoll <lars.knoll@nokia.com>
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/corelib/tools/qstring.cpp')
-rw-r--r-- | src/corelib/tools/qstring.cpp | 434 |
1 files changed, 403 insertions, 31 deletions
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index 584502f713..878403fe75 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -41,6 +41,7 @@ #include "qstringlist.h" #include "qregexp.h" +#include "qregularexpression.h" #include "qunicodetables_p.h" #ifndef QT_NO_TEXTCODEC #include <qtextcodec.h> @@ -1741,6 +1742,18 @@ QString &QString::remove(QChar ch, Qt::CaseSensitivity cs) */ /*! + \fn QString &QString::remove(const QRegularExpression &re) + \since 5.0 + + Removes every occurrence of the regular expression \a re in the + string, and returns a reference to the string. For example: + + \snippet doc/src/snippets/qstring/main.cpp 96 + + \sa indexOf(), lastIndexOf(), replace() +*/ + +/*! \fn QString &QString::replace(int position, int n, const QString &after) Replaces \a n characters beginning at index \a position with @@ -2923,6 +2936,138 @@ QString& QString::replace(const QRegExp &rx, const QString &after) } #endif +#ifndef QT_NO_REGEXP +#ifndef QT_BOOTSTRAPPED +/*! + \overload replace() + \since 5.0 + + Replaces every occurrence of the regular expression \a re in the + string with \a after. Returns a reference to the string. For + example: + + \snippet doc/src/snippets/qstring/main.cpp 87 + + For regular expressions containing capturing groups, + occurrences of \bold{\\1}, \bold{\\2}, ..., in \a after are replaced + with the string captured by the corresponding capturing group. + + \snippet doc/src/snippets/qstring/main.cpp 88 + + \sa indexOf(), lastIndexOf(), remove(), QRegularExpression, QRegularExpressionMatch +*/ +QString &QString::replace(const QRegularExpression &re, const QString &after) +{ + if (!re.isValid()) { + qWarning("QString::replace: invalid QRegularExpresssion object"); + return *this; + } + + const QString copy(*this); + QRegularExpressionMatchIterator iterator = re.globalMatch(copy); + if (!iterator.hasNext()) // no matches at all + return *this; + + realloc(); + + int numCaptures = re.captureCount(); + + // 1. build the backreferences vector, holding where the backreferences + // are in the replacement string + QVector<QStringCapture> backReferences; + const int al = after.length(); + const QChar *ac = after.unicode(); + + for (int i = 0; i < al - 1; i++) { + if (ac[i] == QLatin1Char('\\')) { + int no = ac[i + 1].digitValue(); + if (no > 0 && no <= numCaptures) { + QStringCapture backReference; + backReference.pos = i; + backReference.len = 2; + + if (i < al - 2) { + int secondDigit = ac[i + 2].digitValue(); + if (secondDigit != -1 && ((no * 10) + secondDigit) <= numCaptures) { + no = (no * 10) + secondDigit; + ++backReference.len; + } + } + + backReference.no = no; + backReferences.append(backReference); + } + } + } + + // 2. iterate on the matches. For every match, copy in chunks + // - the part before the match + // - the after string, with the proper replacements for the backreferences + + int newLength = 0; // length of the new string, with all the replacements + int lastEnd = 0; + QVector<QStringRef> chunks; + while (iterator.hasNext()) { + QRegularExpressionMatch match = iterator.next(); + int len; + // add the part before the match + len = match.capturedStart() - lastEnd; + if (len > 0) { + chunks << copy.midRef(lastEnd, len); + newLength += len; + } + + lastEnd = 0; + // add the after string, with replacements for the backreferences + foreach (const QStringCapture &backReference, backReferences) { + // part of "after" before the backreference + len = backReference.pos - lastEnd; + if (len > 0) { + chunks << after.midRef(lastEnd, len); + newLength += len; + } + + // backreference itself + len = match.capturedLength(backReference.no); + if (len > 0) { + chunks << copy.midRef(match.capturedStart(backReference.no), len); + newLength += len; + } + + lastEnd = backReference.pos + backReference.len; + } + + // add the last part of the after string + len = after.length() - lastEnd; + if (len > 0) { + chunks << after.midRef(lastEnd, len); + newLength += len; + } + + lastEnd = match.capturedEnd(); + } + + // 3. trailing string after the last match + if (copy.length() > lastEnd) { + chunks << copy.midRef(lastEnd); + newLength += copy.length() - lastEnd; + } + + // 4. assemble the chunks together + resize(newLength); + int i = 0; + QChar *uc = data(); + foreach (const QStringRef &chunk, chunks) { + int len = chunk.length(); + memcpy(uc + i, chunk.unicode(), len * sizeof(QChar)); + i += len; + } + + return *this; +} +#endif // QT_BOOTSTRAPPED +#endif // QT_NO_REGEXP + /*! Returns the number of (potentially overlapping) occurrences of the string \a str in this string. @@ -3122,6 +3267,118 @@ int QString::count(const QRegExp& rx) const } #endif // QT_NO_REGEXP +#ifndef QT_NO_REGEXP +#ifndef QT_BOOTSTRAPPED +/*! + \overload indexOf() + \since 5.0 + + Returns the index position of the first match of the regular + expression \a re in the string, searching forward from index + position \a from. Returns -1 if \a re didn't match anywhere. + + Example: + + \snippet doc/src/snippets/qstring/main.cpp 93 +*/ +int QString::indexOf(const QRegularExpression& re, int from) const +{ + if (!re.isValid()) { + qWarning("QString::indexOf: invalid QRegularExpresssion object"); + return -1; + } + + QRegularExpressionMatch match = re.match(*this, from); + if (match.hasMatch()) + return match.capturedStart(); + + return -1; +} + +/*! + \overload lastIndexOf() + \since 5.0 + + Returns the index position of the last match of the regular + expression \a re in the string, which starts before the index + position \a from. Returns -1 if \a re didn't match anywhere. + + Example: + + \snippet doc/src/snippets/qstring/main.cpp 94 +*/ +int QString::lastIndexOf(const QRegularExpression &re, int from) const +{ + if (!re.isValid()) { + qWarning("QString::lastIndexOf: invalid QRegularExpresssion object"); + return -1; + } + + int endpos = (from < 0) ? (size() + from + 1) : (from + 1); + + QRegularExpressionMatchIterator iterator = re.globalMatch(*this); + int lastIndex = -1; + while (iterator.hasNext()) { + QRegularExpressionMatch match = iterator.next(); + int start = match.capturedStart(); + if (start < endpos) + lastIndex = start; + else + break; + } + + return lastIndex; +} + +/*! \overload contains() + \since 5.0 + + Returns true if the regular expression \a re matches somewhere in + this string; otherwise returns false. +*/ +bool QString::contains(const QRegularExpression &re) const +{ + if (!re.isValid()) { + qWarning("QString::contains: invalid QRegularExpresssion object"); + return false; + } + QRegularExpressionMatch match = re.match(*this); + return match.hasMatch(); +} + +/*! + \overload count() + \since 5.0 + + Returns the number of times the regular expression \a re matches + in the string. + + This function counts overlapping matches, so in the example + below, there are four instances of "ana" or "ama": + + \snippet doc/src/snippets/qstring/main.cpp 95 +*/ +int QString::count(const QRegularExpression &re) const +{ + if (!re.isValid()) { + qWarning("QString::count: invalid QRegularExpresssion object"); + return 0; + } + int count = 0; + int index = -1; + int len = length(); + while (index < len - 1) { + QRegularExpressionMatch match = re.match(*this, index + 1); + if (!match.hasMatch()) + break; + index = match.capturedStart(); + count++; + } + return count; +} +#endif // QT_BOOTSTRAPPED +#endif // QT_NO_REGEXP + /*! \fn int QString::count() const \overload count() @@ -3249,6 +3506,49 @@ public: QString string; }; +static QString extractSections(const QList<qt_section_chunk> §ions, + int start, + int end, + QString::SectionFlags flags) +{ + if (start < 0) + start += sections.count(); + if (end < 0) + end += sections.count(); + + QString ret; + int x = 0; + int first_i = start, last_i = end; + for (int i = 0; x <= end && i < sections.size(); ++i) { + const qt_section_chunk §ion = sections.at(i); + const bool empty = (section.length == section.string.length()); + if (x >= start) { + if (x == start) + first_i = i; + if (x == end) + last_i = i; + if (x != start) + ret += section.string; + else + ret += section.string.mid(section.length); + } + if (!empty || !(flags & QString::SectionSkipEmpty)) + x++; + } + + if ((flags & QString::SectionIncludeLeadingSep) && first_i < sections.size()) { + const qt_section_chunk §ion = sections.at(first_i); + ret.prepend(section.string.left(section.length)); + } + + if ((flags & QString::SectionIncludeTrailingSep) && last_i+1 <= sections.size()-1) { + const qt_section_chunk §ion = sections.at(last_i+1); + ret += section.string.left(section.length); + } + + return ret; +} + /*! \overload section() @@ -3282,41 +3582,57 @@ QString QString::section(const QRegExp ®, int start, int end, SectionFlags fl } sections.append(qt_section_chunk(last_len, QString(uc + last_m, n - last_m))); - if(start < 0) - start += sections.count(); - if(end < 0) - end += sections.count(); + return extractSections(sections, start, end, flags); +} +#endif - QString ret; - int x = 0; - int first_i = start, last_i = end; - for (int i = 0; x <= end && i < sections.size(); ++i) { - const qt_section_chunk §ion = sections.at(i); - const bool empty = (section.length == section.string.length()); - if (x >= start) { - if(x == start) - first_i = i; - if(x == end) - last_i = i; - if(x != start) - ret += section.string; - else - ret += section.string.mid(section.length); - } - if (!empty || !(flags & SectionSkipEmpty)) - x++; - } - if((flags & SectionIncludeLeadingSep) && first_i < sections.size()) { - const qt_section_chunk §ion = sections.at(first_i); - ret.prepend(section.string.left(section.length)); +#ifndef QT_NO_REGEXP +#ifndef QT_BOOTSTRAPPED +/*! + \overload section() + \since 5.0 + + This string is treated as a sequence of fields separated by the + regular expression, \a re. + + \snippet doc/src/snippets/qstring/main.cpp 89 + + \warning Using this QRegularExpression version is much more expensive than + the overloaded string and character versions. + + \sa split() simplified() +*/ +QString QString::section(const QRegularExpression &re, int start, int end, SectionFlags flags) const +{ + if (!re.isValid()) { + qWarning("QString::section: invalid QRegularExpression object"); + return QString(); } - if((flags & SectionIncludeTrailingSep) && last_i+1 <= sections.size()-1) { - const qt_section_chunk §ion = sections.at(last_i+1); - ret += section.string.left(section.length); + + const QChar *uc = unicode(); + if (!uc) + return QString(); + + QRegularExpression sep(re); + if (flags & SectionCaseInsensitiveSeps) + sep.setPatternOptions(sep.patternOptions() | QRegularExpression::CaseInsensitiveOption); + + QList<qt_section_chunk> sections; + int n = length(), m = 0, last_m = 0, last_len = 0; + QRegularExpressionMatchIterator iterator = sep.globalMatch(*this); + while (iterator.hasNext()) { + QRegularExpressionMatch match = iterator.next(); + m = match.capturedStart(); + sections.append(qt_section_chunk(last_len, QString(uc + last_m, m - last_m))); + last_m = m; + last_len = match.capturedLength(); } - return ret; + sections.append(qt_section_chunk(last_len, QString(uc + last_m, n - last_m))); + + return extractSections(sections, start, end, flags); } -#endif +#endif // QT_BOOTSTRAPPED +#endif // QT_NO_REGEXP /*! Returns a substring that contains the \a n leftmost characters @@ -6077,6 +6393,62 @@ QStringList QString::split(const QRegExp &rx, SplitBehavior behavior) const } #endif +#ifndef QT_NO_REGEXP +#ifndef QT_BOOTSTRAPPED +/*! + \overload + \since 5.0 + + Splits the string into substrings wherever the regular expression + \a re matches, and returns the list of those strings. If \a re + does not match anywhere in the string, split() returns a + single-element list containing this string. + + Here's an example where we extract the words in a sentence + using one or more whitespace characters as the separator: + + \snippet doc/src/snippets/qstring/main.cpp 90 + + Here's a similar example, but this time we use any sequence of + non-word characters as the separator: + + \snippet doc/src/snippets/qstring/main.cpp 91 + + Here's a third example where we use a zero-length assertion, + \bold{\\b} (word boundary), to split the string into an + alternating sequence of non-word and word tokens: + + \snippet doc/src/snippets/qstring/main.cpp 92 + + \sa QStringList::join(), section() +*/ +QStringList QString::split(const QRegularExpression &re, SplitBehavior behavior) const +{ + QStringList list; + if (!re.isValid()) { + qWarning("QString::split: invalid QRegularExpression object"); + return list; + } + + int start = 0; + int end = 0; + QRegularExpressionMatchIterator iterator = re.globalMatch(*this); + while (iterator.hasNext()) { + QRegularExpressionMatch match = iterator.next(); + end = match.capturedStart(); + if (start != end || behavior == KeepEmptyParts) + list.append(mid(start, end - start)); + start = match.capturedEnd(); + } + + if (start != size() || behavior == KeepEmptyParts) + list.append(mid(start)); + + return list; +} +#endif // QT_BOOTSTRAPPED +#endif // QT_NO_REGEXP + /*! \enum QString::NormalizationForm |