From 3888f5a251d6230cc290ec0ada211a6b45307615 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Tue, 29 Jul 2014 16:53:34 -0700 Subject: Unify and refactor QString::to{Upper,Lower,CaseFolded} This unifies the code for those three functions in one refactored template function, using QStringIterator. I don't think there's any loss of performance by doing that refactoring -- this is based on my reading of the disassembly, without running any benchmarks. Change-Id: I5893c6ed47462c473886c722a21577b1e8a23841 Reviewed-by: Marc Mutz --- src/corelib/tools/qstring.cpp | 237 ++++++++++++---------------------- src/corelib/tools/qstringiterator_p.h | 5 + 2 files changed, 84 insertions(+), 158 deletions(-) diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index 23c75f4e05..23329224e7 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -5510,6 +5510,8 @@ QString QString::rightJustified(int width, QChar fill, bool truncate) const } /*! + \fn QString QString::toLower() const + Returns a lowercase copy of the string. \snippet qstring/main.cpp 75 @@ -5520,129 +5522,100 @@ QString QString::rightJustified(int width, QChar fill, bool truncate) const \sa toUpper(), QLocale::toLower() */ -QString QString::toLower() const +namespace QUnicodeTables { +struct LowercaseTraits { - const ushort *p = d->data(); - if (!p) - return *this; + static signed short caseDiff(const Properties *prop) + { return prop->lowerCaseDiff; } + static bool caseSpecial(const Properties *prop) + { return prop->lowerCaseSpecial; } +}; - const ushort *e = p + d->size; - // this avoids out of bounds check in the loop - while (e != p && QChar::isHighSurrogate(*(e - 1))) - --e; +struct UppercaseTraits +{ + static signed short caseDiff(const Properties *prop) + { return prop->upperCaseDiff; } + static bool caseSpecial(const Properties *prop) + { return prop->upperCaseSpecial; } +}; - const QUnicodeTables::Properties *prop; - while (p != e) { - if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) { - ushort high = *p++; - prop = qGetProp(QChar::surrogateToUcs4(high, *p)); +struct CasefoldTraits +{ + static signed short caseDiff(const Properties *prop) + { return prop->caseFoldDiff; } + static bool caseSpecial(const Properties *prop) + { return prop->caseFoldSpecial; } +}; + +template +#ifdef Q_CC_MSVC +__declspec(noinline) +#elif defined(Q_CC_GNU) +__attribute__((noinline)) +#endif +static QString detachAndConvertCase(const QString &str, QStringIterator it) +{ + QString s(str); + QChar *pp = s.begin() + it.index(); + uint uc = it.nextUnchecked(); + forever { + const QUnicodeTables::Properties *prop = qGetProp(uc); + signed short caseDiff = Traits::caseDiff(prop); + + if (Q_UNLIKELY(Traits::caseSpecial(prop))) { + // slow path + const ushort *specialCase = specialCaseMap + caseDiff; + ushort length = *specialCase++; + int pos = pp - s.constBegin(); + s.replace(pos, 1, reinterpret_cast(specialCase), length); + pp = const_cast(s.constBegin()) + pos + length; + } else if (QChar::requiresSurrogates(uc)) { + *pp++ = QChar::highSurrogate(uc + caseDiff); + *pp++ = QChar::lowSurrogate(uc + caseDiff); } else { - prop = qGetProp(*p); + *pp++ = QChar(uc + caseDiff); } - if (prop->lowerCaseDiff) { - if (QChar::isLowSurrogate(*p)) - --p; // safe; diff is 0 for surrogates - QString s(d->size, Qt::Uninitialized); - memcpy(s.d->data(), d->data(), (p - d->data())*sizeof(ushort)); - ushort *pp = s.d->data() + (p - d->data()); - while (p != e) { - if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) { - *pp = *p++; - prop = qGetProp(QChar::surrogateToUcs4(*pp++, *p)); - } else { - prop = qGetProp(*p); - } - if (prop->lowerCaseSpecial) { - const ushort *specialCase = specialCaseMap + prop->lowerCaseDiff; - ushort length = *specialCase++; - int pos = pp - s.d->data(); - s.resize(s.d->size + length - 1); - pp = s.d->data() + pos; - while (length--) - *pp++ = *specialCase++; - } else { - *pp++ = *p + prop->lowerCaseDiff; - } - ++p; - } - - // this restores high surrogate parts eaten above, if any - while (e != d->data() + d->size) - *pp++ = *e++; + if (!it.hasNext()) return s; - } - ++p; + + uc = it.nextUnchecked(); } - return *this; } -/*! - Returns the case folded equivalent of the string. For most Unicode - characters this is the same as toLower(). -*/ -QString QString::toCaseFolded() const +template +static inline QString convertCase(const QString &str) { - const ushort *p = d->data(); - if (!p) - return *this; + const QChar *p = str.constBegin(); + const QChar *e = p + str.size(); - const ushort *e = p + d->size; // this avoids out of bounds check in the loop - while (e != p && QChar::isHighSurrogate(*(e - 1))) + while (e != p && e[-1].isHighSurrogate()) --e; const QUnicodeTables::Properties *prop; - while (p != e) { - if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) { - ushort high = *p++; - prop = qGetProp(QChar::surrogateToUcs4(high, *p)); - } else { - prop = qGetProp(*p); - } - if (prop->caseFoldDiff) { - if (QChar::isLowSurrogate(*p)) - --p; // safe; diff is 0 for surrogates - QString s(d->size, Qt::Uninitialized); - memcpy(s.d->data(), d->data(), (p - d->data())*sizeof(ushort)); - ushort *pp = s.d->data() + (p - d->data()); - while (p != e) { - if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) { - *pp = *p++; - prop = qGetProp(QChar::surrogateToUcs4(*pp++, *p)); - } else { - prop = qGetProp(*p); - } - if (prop->caseFoldSpecial) { - const ushort *specialCase = specialCaseMap + prop->caseFoldDiff; - ushort length = *specialCase++; -#if 0 - int pos = pp - s.d->data; - s.resize(s.d->size + length - 1); - pp = s.d->data + pos; - while (length--) - *pp++ = *specialCase++; -#else - //### we currently don't support full case foldings - Q_ASSERT(length == 1); - Q_UNUSED(length) - *pp++ = *specialCase; -#endif - } else { - *pp++ = *p + prop->caseFoldDiff; - } - ++p; - } + QStringIterator it(p, e); + for ( ; it.hasNext(); it.advanceUnchecked()) { + prop = qGetProp(it.peekNextUnchecked()); + if (Traits::caseDiff(prop)) + return detachAndConvertCase(str, it); + } + return str; +} +} // namespace QUnicodeTables - // this restores high surrogate parts eaten above, if any - while (e != d->data() + d->size) - *pp++ = *e++; +QString QString::toLower() const +{ + return QUnicodeTables::convertCase(*this); +} - return s; - } - ++p; - } - return *this; +/*! + Returns the case folded equivalent of the string. For most Unicode + characters this is the same as toLower(). +*/ +QString QString::toCaseFolded() const +{ + return QUnicodeTables::convertCase(*this); } /*! @@ -5657,59 +5630,7 @@ QString QString::toCaseFolded() const */ QString QString::toUpper() const { - const ushort *p = d->data(); - if (!p) - return *this; - - const ushort *e = p + d->size; - // this avoids out of bounds check in the loop - while (e != p && QChar::isHighSurrogate(*(e - 1))) - --e; - - const QUnicodeTables::Properties *prop; - while (p != e) { - if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) { - ushort high = *p++; - prop = qGetProp(QChar::surrogateToUcs4(high, *p)); - } else { - prop = qGetProp(*p); - } - if (prop->upperCaseDiff) { - if (QChar::isLowSurrogate(*p)) - --p; // safe; diff is 0 for surrogates - QString s(d->size, Qt::Uninitialized); - memcpy(s.d->data(), d->data(), (p - d->data())*sizeof(ushort)); - ushort *pp = s.d->data() + (p - d->data()); - while (p != e) { - if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) { - *pp = *p++; - prop = qGetProp(QChar::surrogateToUcs4(*pp++, *p)); - } else { - prop = qGetProp(*p); - } - if (prop->upperCaseSpecial) { - const ushort *specialCase = specialCaseMap + prop->upperCaseDiff; - ushort length = *specialCase++; - int pos = pp - s.d->data(); - s.resize(s.d->size + length - 1); - pp = s.d->data() + pos; - while (length--) - *pp++ = *specialCase++; - } else { - *pp++ = *p + prop->upperCaseDiff; - } - ++p; - } - - // this restores high surrogate parts eaten above, if any - while (e != d->data() + d->size) - *pp++ = *e++; - - return s; - } - ++p; - } - return *this; + return QUnicodeTables::convertCase(*this); } // ### Qt 6: Consider whether this function shouldn't be removed See task 202871. diff --git a/src/corelib/tools/qstringiterator_p.h b/src/corelib/tools/qstringiterator_p.h index c3986f0477..1e231792a2 100644 --- a/src/corelib/tools/qstringiterator_p.h +++ b/src/corelib/tools/qstringiterator_p.h @@ -71,6 +71,11 @@ public: return pos; } + inline int index() const + { + return pos - i; + } + inline void setPosition(QString::const_iterator position) { Q_ASSERT_X(i <= position && position <= e, Q_FUNC_INFO, "position out of bounds"); -- cgit v1.2.3