diff options
Diffstat (limited to 'src/corelib/tools/qstring.cpp')
-rw-r--r-- | src/corelib/tools/qstring.cpp | 514 |
1 files changed, 227 insertions, 287 deletions
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index 29b546770c..76069225eb 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -70,6 +70,7 @@ #include "qchar.cpp" #include "qstringmatcher.cpp" #include "qstringiterator_p.h" +#include "qstringalgorithms_p.h" #include "qthreadstorage.h" #ifdef Q_OS_WIN @@ -731,12 +732,30 @@ inline char qToLower(char ch) const QString::Null QString::null = { }; /*! + \macro QT_RESTRICTED_CAST_FROM_ASCII + \relates QString + + Defining this macro disables most automatic conversions from source + literals and 8-bit data to unicode QStrings, but allows the use of + the \c{QChar(char)} and \c{QString(const char (&ch)[N]} constructors, + and the \c{QString::operator=(const char (&ch)[N])} assignment operator + giving most of the type-safety benefits of QT_NO_CAST_FROM_ASCII + but does not require user code to wrap character and string literals + with QLatin1Char, QLatin1String or similar. + + Using this macro together with source strings outside the 7-bit range, + non-literals, or literals with embedded NUL characters is undefined. + + \sa QT_NO_CAST_FROM_ASCII, QT_NO_CAST_TO_ASCII +*/ + +/*! \macro QT_NO_CAST_FROM_ASCII \relates QString Disables automatic conversions from 8-bit strings (char *) to unicode QStrings - \sa QT_NO_CAST_TO_ASCII, QT_NO_CAST_FROM_BYTEARRAY + \sa QT_NO_CAST_TO_ASCII, QT_RESTRICTED_CAST_FROM_ASCII, QT_NO_CAST_FROM_BYTEARRAY */ /*! @@ -745,7 +764,7 @@ const QString::Null QString::null = { }; disables automatic conversion from QString to 8-bit strings (char *) - \sa QT_NO_CAST_FROM_ASCII, QT_NO_CAST_FROM_BYTEARRAY + \sa QT_NO_CAST_FROM_ASCII, QT_RESTRICTED_CAST_FROM_ASCII, QT_NO_CAST_FROM_BYTEARRAY */ /*! @@ -759,7 +778,7 @@ const QString::Null QString::null = { }; Note: This only works for compilers that support warnings for deprecated API. - \sa QT_NO_CAST_TO_ASCII, QT_NO_CAST_FROM_ASCII + \sa QT_NO_CAST_TO_ASCII, QT_NO_CAST_FROM_ASCII, QT_RESTRICTED_CAST_FROM_ASCII */ /*! @@ -993,6 +1012,9 @@ const QString::Null QString::null = { }; \list \li \c QT_NO_CAST_FROM_ASCII disables automatic conversions from C string literals and pointers to Unicode. + \li \c QT_RESTRICTED_CAST_FROM_ASCII allows automatic conversions + from C characters and character arrays, but disables automatic + conversions from character pointers to Unicode. \li \c QT_NO_CAST_TO_ASCII disables automatic conversion from QString to C strings. \endlist @@ -1310,6 +1332,12 @@ const QString::Null QString::null = { }; can be useful if you want to ensure that all user-visible strings go through QObject::tr(), for example. + \note Defining QT_RESTRICTED_CAST_FROM_ASCII also disables + this constructor, but enables a \c{QString(const char (&ch)[N])} + constructor instead. Using non-literal input, or input with + embedded NUL characters, or non-7-bit characters is undefined + in this case. + \sa fromLatin1(), fromLocal8Bit(), fromUtf8() */ @@ -1656,8 +1684,11 @@ void QString::resize(int size) void QString::reallocData(uint alloc, bool grow) { - if (grow) + if (grow) { + if (alloc > (uint(MaxAllocSize) - sizeof(Data)) / sizeof(QChar)) + qBadAlloc(); alloc = qAllocMore(alloc * sizeof(QChar), sizeof(Data)) / sizeof(QChar); + } if (d->ref.isShared() || IS_RAW_DATA(d)) { Data::AllocationOptions allocOptions(d->capacityReserved ? Data::CapacityReserved : 0); @@ -1748,10 +1779,11 @@ QString &QString::operator=(const QString &other) Assigns \a str to this string. The const char pointer is converted to Unicode using the fromUtf8() function. - You can disable this operator by defining \c - QT_NO_CAST_FROM_ASCII when you compile your applications. This - can be useful if you want to ensure that all user-visible strings + You can disable this operator by defining \c QT_NO_CAST_FROM_ASCII + or \c QT_RESTRICTED_CAST_FROM_ASCII when you compile your applications. + This can be useful if you want to ensure that all user-visible strings go through QObject::tr(), for example. + */ /*! \fn QString &QString::operator=(char ch) @@ -3664,14 +3696,39 @@ int QString::count(const QRegExp& rx) const */ int QString::indexOf(const QRegularExpression& re, int from) const { + return indexOf(re, from, Q_NULLPTR); +} + +/*! + \overload + \since 5.5 + + Returns the index position of the first match of the regular + expression \a re in the string, searching forward from index + position \a from. Returns -1 if \a re didn't match anywhere. + + If the match is successful and \a rmatch is not a null pointer, it also + writes the results of the match into the QRegularExpressionMatch object + pointed to by \a rmatch. + + Example: + + \snippet qstring/main.cpp 97 +*/ +int QString::indexOf(const QRegularExpression &re, int from, QRegularExpressionMatch *rmatch) const +{ if (!re.isValid()) { qWarning("QString::indexOf: invalid QRegularExpression object"); return -1; } QRegularExpressionMatch match = re.match(*this, from); - if (match.hasMatch()) - return match.capturedStart(); + if (match.hasMatch()) { + const int ret = match.capturedStart(); + if (rmatch) + *rmatch = qMove(match); + return ret; + } return -1; } @@ -3690,22 +3747,45 @@ int QString::indexOf(const QRegularExpression& re, int from) const */ int QString::lastIndexOf(const QRegularExpression &re, int from) const { + return lastIndexOf(re, from, Q_NULLPTR); +} + +/*! + \overload + \since 5.5 + + Returns the index position of the last match of the regular + expression \a re in the string, which starts before the index + position \a from. Returns -1 if \a re didn't match anywhere. + + If the match is successful and \a rmatch is not a null pointer, it also + writes the results of the match into the QRegularExpressionMatch object + pointed to by \a rmatch. + + Example: + + \snippet qstring/main.cpp 98 +*/ +int QString::lastIndexOf(const QRegularExpression &re, int from, QRegularExpressionMatch *rmatch) const +{ if (!re.isValid()) { qWarning("QString::lastIndexOf: invalid QRegularExpression object"); return -1; } int endpos = (from < 0) ? (size() + from + 1) : (from + 1); - QRegularExpressionMatchIterator iterator = re.globalMatch(*this); int lastIndex = -1; while (iterator.hasNext()) { QRegularExpressionMatch match = iterator.next(); int start = match.capturedStart(); - if (start < endpos) + if (start < endpos) { lastIndex = start; - else + if (rmatch) + *rmatch = qMove(match); + } else { break; + } } return lastIndex; @@ -3719,12 +3799,7 @@ int QString::lastIndexOf(const QRegularExpression &re, int from) const */ bool QString::contains(const QRegularExpression &re) const { - if (!re.isValid()) { - qWarning("QString::contains: invalid QRegularExpression object"); - return false; - } - QRegularExpressionMatch match = re.match(*this); - return match.hasMatch(); + return contains(re, Q_NULLPTR); } /*! @@ -3744,13 +3819,13 @@ bool QString::contains(const QRegularExpression &re) const bool QString::contains(const QRegularExpression &re, QRegularExpressionMatch *match) const { if (!re.isValid()) { - qWarning("QString::contains: invalid QRegularExpresssion object"); + qWarning("QString::contains: invalid QRegularExpression object"); return false; } QRegularExpressionMatch m = re.match(*this); bool hasMatch = m.hasMatch(); if (hasMatch && match) - *match = m; + *match = qMove(m); return hasMatch; } @@ -3912,12 +3987,14 @@ QString QString::section(const QString &sep, int start, int end, SectionFlags fl #if !(defined(QT_NO_REGEXP) && defined(QT_NO_REGULAREXPRESSION)) class qt_section_chunk { public: - qt_section_chunk(int l, QString s) { length = l; string = s; } + qt_section_chunk() {} + qt_section_chunk(int l, QString s) : length(l), string(qMove(s)) {} int length; QString string; }; +Q_DECLARE_TYPEINFO(qt_section_chunk, Q_MOVABLE_TYPE); -static QString extractSections(const QList<qt_section_chunk> §ions, +static QString extractSections(const QVector<qt_section_chunk> §ions, int start, int end, QString::SectionFlags flags) @@ -4003,7 +4080,7 @@ QString QString::section(const QRegExp ®, int start, int end, SectionFlags fl sep.setCaseSensitivity((flags & SectionCaseInsensitiveSeps) ? Qt::CaseInsensitive : Qt::CaseSensitive); - QList<qt_section_chunk> sections; + QVector<qt_section_chunk> sections; int n = length(), m = 0, last_m = 0, last_len = 0; while ((m = sep.indexIn(*this, m)) != -1) { sections.append(qt_section_chunk(last_len, QString(uc + last_m, m - last_m))); @@ -4048,7 +4125,7 @@ QString QString::section(const QRegularExpression &re, int start, int end, Secti if (flags & SectionCaseInsensitiveSeps) sep.setPatternOptions(sep.patternOptions() | QRegularExpression::CaseInsensitiveOption); - QList<qt_section_chunk> sections; + QVector<qt_section_chunk> sections; int n = length(), m = 0, last_m = 0, last_len = 0; QRegularExpressionMatchIterator iterator = sep.globalMatch(*this); while (iterator.hasNext()) { @@ -4626,6 +4703,8 @@ QString& QString::setUnicode(const QChar *unicode, int size) */ /*! + \fn QString QString::simplified() const + Returns a string that has whitespace removed from the start and the end, and that has each sequence of internal whitespace replaced with a single space. @@ -4640,83 +4719,19 @@ QString& QString::setUnicode(const QChar *unicode, int size) \sa trimmed() */ -QString QString::simplified() const +QString QString::simplified_helper(const QString &str) { - if (d->size == 0) - return *this; - - const QChar * const start = reinterpret_cast<QChar *>(d->data()); - const QChar *from = start; - const QChar *fromEnd = start + d->size; - forever { - QChar ch = *from; - if (!ch.isSpace()) - break; - if (++from == fromEnd) { - // All-whitespace string - QStringDataPtr empty = { Data::allocate(0) }; - return QString(empty); - } - } - // This loop needs no underflow check, as we already determined that - // the string contains non-whitespace. If the string has exactly one - // non-whitespace, it will be checked twice - we can live with that. - while (fromEnd[-1].isSpace()) - fromEnd--; - // The rest of the function depends on the fact that we already know - // that the last character in the source is no whitespace. - const QChar *copyFrom = from; - int copyCount; - forever { - if (++from == fromEnd) { - // Only leading and/or trailing whitespace, if any at all - return mid(copyFrom - start, from - copyFrom); - } - QChar ch = *from; - if (!ch.isSpace()) - continue; - if (ch != QLatin1Char(' ')) { - copyCount = from - copyFrom; - break; - } - ch = *++from; - if (ch.isSpace()) { - copyCount = from - copyFrom - 1; - break; - } - } - // 'from' now points at the non-trailing whitespace which made the - // string not simplified in the first place. 'copyCount' is the number - // of already simplified characters - at least one, obviously - - // without a trailing space. - QString result((fromEnd - from) + copyCount, Qt::Uninitialized); - QChar *to = reinterpret_cast<QChar *>(result.d->data()); - ::memcpy(to, copyFrom, copyCount * 2); - to += copyCount; - fromEnd--; - QChar ch; - forever { - *to++ = QLatin1Char(' '); - do { - ch = *++from; - } while (ch.isSpace()); - if (from == fromEnd) - break; - do { - *to++ = ch; - ch = *++from; - if (from == fromEnd) - goto done; - } while (!ch.isSpace()); + return QStringAlgorithms<const QString>::simplified_helper(str); +} - } - done: - *to++ = ch; - result.truncate(to - reinterpret_cast<QChar *>(result.d->data())); - return result; +QString QString::simplified_helper(QString &str) +{ + return QStringAlgorithms<QString>::simplified_helper(str); } /*! + \fn QString QString::trimmed() const + Returns a string that has whitespace removed from the start and the end. @@ -4732,27 +4747,14 @@ QString QString::simplified() const \sa simplified() */ -QString QString::trimmed() const +QString QString::trimmed_helper(const QString &str) { - if (d->size == 0) - return *this; - const QChar *s = (const QChar*)d->data(); - if (!s->isSpace() && !s[d->size-1].isSpace()) - return *this; - int start = 0; - int end = d->size - 1; - while (start<=end && s[start].isSpace()) // skip white space from start - start++; - if (start <= end) { // only white space - while (end && s[end].isSpace()) // skip white space from end - end--; - } - int l = end - start + 1; - if (l <= 0) { - QStringDataPtr empty = { Data::allocate(0) }; - return QString(empty); - } - return QString(s + start, l); + return QStringAlgorithms<const QString>::trimmed_helper(str); +} + +QString QString::trimmed_helper(QString &str) +{ + return QStringAlgorithms<QString>::trimmed_helper(str); } /*! \fn const QChar QString::at(int position) const @@ -5519,6 +5521,8 @@ QString QString::rightJustified(int width, QChar fill, bool truncate) const } /*! + \fn QString QString::toLower() const + Returns a lowercase copy of the string. \snippet qstring/main.cpp 75 @@ -5529,132 +5533,118 @@ QString QString::rightJustified(int width, QChar fill, bool truncate) const \sa toUpper(), QLocale::toLower() */ -QString QString::toLower() const +namespace QUnicodeTables { +struct LowercaseTraits { - const ushort *p = d->data(); - if (!p) - return *this; + static signed short caseDiff(const Properties *prop) + { return prop->lowerCaseDiff; } + static bool caseSpecial(const Properties *prop) + { return prop->lowerCaseSpecial; } +}; - const ushort *e = p + d->size; - // this avoids out of bounds check in the loop - while (e != p && QChar::isHighSurrogate(*(e - 1))) - --e; +struct UppercaseTraits +{ + static signed short caseDiff(const Properties *prop) + { return prop->upperCaseDiff; } + static bool caseSpecial(const Properties *prop) + { return prop->upperCaseSpecial; } +}; - const QUnicodeTables::Properties *prop; - while (p != e) { - if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) { - ushort high = *p++; - prop = qGetProp(QChar::surrogateToUcs4(high, *p)); +struct CasefoldTraits +{ + static signed short caseDiff(const Properties *prop) + { return prop->caseFoldDiff; } + static bool caseSpecial(const Properties *prop) + { return prop->caseFoldSpecial; } +}; + +template <typename Traits, typename T> +#ifdef Q_CC_MSVC +__declspec(noinline) +#elif defined(Q_CC_GNU) +__attribute__((noinline)) +#endif +static QString detachAndConvertCase(T &str, QStringIterator it) +{ + QString s = qMove(str); // will copy if T is const QString + QChar *pp = s.begin() + it.index(); // will detach if necessary + uint uc = it.nextUnchecked(); + forever { + const QUnicodeTables::Properties *prop = qGetProp(uc); + signed short caseDiff = Traits::caseDiff(prop); + + if (Q_UNLIKELY(Traits::caseSpecial(prop))) { + // slow path + const ushort *specialCase = specialCaseMap + caseDiff; + ushort length = *specialCase++; + int pos = pp - s.constBegin(); + s.replace(pos, 1, reinterpret_cast<const QChar *>(specialCase), length); + pp = const_cast<QChar *>(s.constBegin()) + pos + length; + } else if (QChar::requiresSurrogates(uc)) { + *pp++ = QChar::highSurrogate(uc + caseDiff); + *pp++ = QChar::lowSurrogate(uc + caseDiff); } else { - prop = qGetProp(*p); + *pp++ = QChar(uc + caseDiff); } - if (prop->lowerCaseDiff) { - if (QChar::isLowSurrogate(*p)) - --p; // safe; diff is 0 for surrogates - QString s(d->size, Qt::Uninitialized); - memcpy(s.d->data(), d->data(), (p - d->data())*sizeof(ushort)); - ushort *pp = s.d->data() + (p - d->data()); - while (p != e) { - if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) { - *pp = *p++; - prop = qGetProp(QChar::surrogateToUcs4(*pp++, *p)); - } else { - prop = qGetProp(*p); - } - if (prop->lowerCaseSpecial) { - const ushort *specialCase = specialCaseMap + prop->lowerCaseDiff; - ushort length = *specialCase++; - int pos = pp - s.d->data(); - s.resize(s.d->size + length - 1); - pp = s.d->data() + pos; - while (length--) - *pp++ = *specialCase++; - } else { - *pp++ = *p + prop->lowerCaseDiff; - } - ++p; - } - - // this restores high surrogate parts eaten above, if any - while (e != d->data() + d->size) - *pp++ = *e++; + if (!it.hasNext()) return s; - } - ++p; + + uc = it.nextUnchecked(); } - return *this; } -/*! - Returns the case folded equivalent of the string. For most Unicode - characters this is the same as toLower(). -*/ -QString QString::toCaseFolded() const +template <typename Traits, typename T> +static QString convertCase(T &str) { - const ushort *p = d->data(); - if (!p) - return *this; + const QChar *p = str.constBegin(); + const QChar *e = p + str.size(); - const ushort *e = p + d->size; // this avoids out of bounds check in the loop - while (e != p && QChar::isHighSurrogate(*(e - 1))) + while (e != p && e[-1].isHighSurrogate()) --e; const QUnicodeTables::Properties *prop; - while (p != e) { - if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) { - ushort high = *p++; - prop = qGetProp(QChar::surrogateToUcs4(high, *p)); - } else { - prop = qGetProp(*p); - } - if (prop->caseFoldDiff) { - if (QChar::isLowSurrogate(*p)) - --p; // safe; diff is 0 for surrogates - QString s(d->size, Qt::Uninitialized); - memcpy(s.d->data(), d->data(), (p - d->data())*sizeof(ushort)); - ushort *pp = s.d->data() + (p - d->data()); - while (p != e) { - if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) { - *pp = *p++; - prop = qGetProp(QChar::surrogateToUcs4(*pp++, *p)); - } else { - prop = qGetProp(*p); - } - if (prop->caseFoldSpecial) { - const ushort *specialCase = specialCaseMap + prop->caseFoldDiff; - ushort length = *specialCase++; -#if 0 - int pos = pp - s.d->data; - s.resize(s.d->size + length - 1); - pp = s.d->data + pos; - while (length--) - *pp++ = *specialCase++; -#else - //### we currently don't support full case foldings - Q_ASSERT(length == 1); - Q_UNUSED(length) - *pp++ = *specialCase; -#endif - } else { - *pp++ = *p + prop->caseFoldDiff; - } - ++p; - } + QStringIterator it(p, e); + for ( ; it.hasNext(); it.advanceUnchecked()) { + prop = qGetProp(it.peekNextUnchecked()); + if (Traits::caseDiff(prop)) + return detachAndConvertCase<Traits>(str, it); + } + return qMove(str); +} +} // namespace QUnicodeTables - // this restores high surrogate parts eaten above, if any - while (e != d->data() + d->size) - *pp++ = *e++; +QString QString::toLower_helper(const QString &str) +{ + return QUnicodeTables::convertCase<QUnicodeTables::LowercaseTraits>(str); +} - return s; - } - ++p; - } - return *this; +QString QString::toLower_helper(QString &str) +{ + return QUnicodeTables::convertCase<QUnicodeTables::LowercaseTraits>(str); } /*! + \fn QString QString::toCaseFolded() const + + Returns the case folded equivalent of the string. For most Unicode + characters this is the same as toLower(). +*/ + +QString QString::toCaseFolded_helper(const QString &str) +{ + return QUnicodeTables::convertCase<QUnicodeTables::CasefoldTraits>(str); +} + +QString QString::toCaseFolded_helper(QString &str) +{ + return QUnicodeTables::convertCase<QUnicodeTables::CasefoldTraits>(str); +} + +/*! + \fn QString QString::toUpper() const + Returns an uppercase copy of the string. \snippet qstring/main.cpp 81 @@ -5664,63 +5654,18 @@ QString QString::toCaseFolded() const \sa toLower(), QLocale::toLower() */ -QString QString::toUpper() const -{ - const ushort *p = d->data(); - if (!p) - return *this; - const ushort *e = p + d->size; - // this avoids out of bounds check in the loop - while (e != p && QChar::isHighSurrogate(*(e - 1))) - --e; - - const QUnicodeTables::Properties *prop; - while (p != e) { - if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) { - ushort high = *p++; - prop = qGetProp(QChar::surrogateToUcs4(high, *p)); - } else { - prop = qGetProp(*p); - } - if (prop->upperCaseDiff) { - if (QChar::isLowSurrogate(*p)) - --p; // safe; diff is 0 for surrogates - QString s(d->size, Qt::Uninitialized); - memcpy(s.d->data(), d->data(), (p - d->data())*sizeof(ushort)); - ushort *pp = s.d->data() + (p - d->data()); - while (p != e) { - if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) { - *pp = *p++; - prop = qGetProp(QChar::surrogateToUcs4(*pp++, *p)); - } else { - prop = qGetProp(*p); - } - if (prop->upperCaseSpecial) { - const ushort *specialCase = specialCaseMap + prop->upperCaseDiff; - ushort length = *specialCase++; - int pos = pp - s.d->data(); - s.resize(s.d->size + length - 1); - pp = s.d->data() + pos; - while (length--) - *pp++ = *specialCase++; - } else { - *pp++ = *p + prop->upperCaseDiff; - } - ++p; - } - - // this restores high surrogate parts eaten above, if any - while (e != d->data() + d->size) - *pp++ = *e++; +QString QString::toUpper_helper(const QString &str) +{ + return QUnicodeTables::convertCase<QUnicodeTables::UppercaseTraits>(str); +} - return s; - } - ++p; - } - return *this; +QString QString::toUpper_helper(QString &str) +{ + return QUnicodeTables::convertCase<QUnicodeTables::UppercaseTraits>(str); } + // ### Qt 6: Consider whether this function shouldn't be removed See task 202871. /*! Safely builds a formatted string from the format string \a cformat @@ -9843,20 +9788,15 @@ QVector<uint> QStringRef::toUcs4() const */ QStringRef QStringRef::trimmed() const { - if (m_size == 0 || m_string == 0) + const QChar *begin = cbegin(); + const QChar *end = cend(); + QStringAlgorithms<const QStringRef>::trimmed_helper_positions(begin, end); + if (begin == cbegin() && end == cend()) return *this; - const QChar *s = m_string->constData() + m_position; - int start = 0; - int end = m_size - 1; - while (start <= end && s[start].isSpace()) // skip white space from start - start++; - if (start <= end) { // only white space - while (end && s[end].isSpace()) // skip white space from end - end--; - } - int l = end - start + 1; - Q_ASSERT(l >= 0); - return QStringRef(m_string, m_position + start, l); + if (begin == end) + return QStringRef(); + int position = m_position + (begin - cbegin()); + return QStringRef(m_string, position, end - begin); } /*! |