diff options
author | João Abecasis <joao.abecasis@nokia.com> | 2012-02-29 00:57:12 +0100 |
---|---|---|
committer | João Abecasis <joao.abecasis@nokia.com> | 2012-02-29 00:58:13 +0100 |
commit | c4ad58ed2252d5ed9f448a5c068ab33dce4cadd9 (patch) | |
tree | 7e9802171d7b4c641c2de1ef781023cab5d7d14c /src/corelib/tools/qstring.cpp | |
parent | 7da3a61b5fd5cc726f8fd62691aa5f84c7929800 (diff) | |
parent | fa1b9070af66edb81b2a3735c1951f78b22bd666 (diff) |
Merge remote-tracking branch 'gerrit/master' into containers
Change-Id: I97ba222435ff50a9e5422e6f2c73e4bb8d1b865c
Diffstat (limited to 'src/corelib/tools/qstring.cpp')
-rw-r--r-- | src/corelib/tools/qstring.cpp | 232 |
1 files changed, 109 insertions, 123 deletions
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index be6f48808c..d0c2dd7148 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -98,10 +98,6 @@ QT_BEGIN_NAMESPACE -#ifndef QT_NO_TEXTCODEC -QTextCodec *QString::codecForCStrings; -#endif - #ifdef QT_USE_ICU // qlocale_icu.cpp extern bool qt_ucol_strcoll(const QChar *source, int sourceLength, const QChar *target, int targetLength, int *result); @@ -473,9 +469,8 @@ const QString::Null QString::null = { }; \snippet doc/src/snippets/qstring/main.cpp 0 QString converts the \c{const char *} data into Unicode using the - fromAscii() function. By default, fromAscii() treats character - above 128 as Latin-1 characters, but this can be changed by - calling QTextCodec::setCodecForCStrings(). + fromAscii() function. fromAscii() treats ordinals above 128 as Latin-1 + characters. In all of the QString functions that take \c{const char *} parameters, the \c{const char *} is interpreted as a classic @@ -613,9 +608,7 @@ const QString::Null QString::null = { }; toLatin1(), toUtf8(), and toLocal8Bit(). \list - \o toAscii() returns an 8-bit string encoded using the codec - specified by QTextCodec::codecForCStrings (by default, that is - Latin 1). + \o toAscii() returns a Latin-1 (ISO 8859-1) encoded 8-bit string. \o toLatin1() returns a Latin-1 (ISO 8859-1) encoded 8-bit string. \o toUtf8() returns a UTF-8 encoded 8-bit string. UTF-8 is a superset of US-ASCII (ANSI X3.4-1986) that supports the entire @@ -723,11 +716,11 @@ const QString::Null QString::null = { }; \section1 More Efficient String Construction Many strings are known at compile time. But the trivial - constructor QString("Hello"), will convert the string literal - to a QString using the codecForCStrings(). To avoid this one - can use the QStringLiteral macro to directly create the required - data at compile time. Constructing a QString out of the literal - does then not cause any overhead at runtime. + constructor QString("Hello"), will copy the contents of the string, + treating the contents as Latin-1. To avoid this one can use the + QStringLiteral macro to directly create the required data at compile + time. Constructing a QString out of the literal does then not cause + any overhead at runtime. A slightly less efficient way is to use QLatin1String. This class wraps a C string literal, precalculates it length at compile time and can @@ -3631,9 +3624,7 @@ QByteArray QString::toLatin1() const /*! Returns an 8-bit representation of the string as a QByteArray. - If a codec has been set using QTextCodec::setCodecForCStrings(), - it is used to convert Unicode to 8-bit char; otherwise this - function does the same as toLatin1(). + This function does the same as toLatin1(). Note that, despite the name, this function does not necessarily return an US-ASCII (ANSI X3.4-1986) string and its result may not be US-ASCII compatible. @@ -3642,10 +3633,6 @@ QByteArray QString::toLatin1() const */ QByteArray QString::toAscii() const { -#ifndef QT_NO_TEXTCODEC - if (codecForCStrings) - return codecForCStrings->fromUnicode(*this); -#endif // QT_NO_TEXTCODEC return toLatin1(); } @@ -3779,23 +3766,6 @@ QString::Data *QString::fromLatin1_helper(const char *str, int size) QString::Data *QString::fromAscii_helper(const char *str, int size) { -#ifndef QT_NO_TEXTCODEC - if (codecForCStrings) { - Data *d; - if (!str) { - d = const_cast<Data *>(&shared_null.str); - } else if (size == 0 || (!*str && size < 0)) { - d = const_cast<Data *>(&shared_empty.str); - } else { - if (size < 0) - size = qstrlen(str); - QString s = codecForCStrings->toUnicode(str, size); - d = s.d; - d->ref.ref(); - } - return d; - } -#endif return fromLatin1_helper(str, size); } @@ -3844,11 +3814,7 @@ QString QString::fromLocal8Bit_helper(const char *str, int size) If \a size is -1 (default), it is taken to be strlen(\a str). - Note that, despite the name, this function actually uses the codec - defined by QTextCodec::setCodecForCStrings() to convert \a str to - Unicode. Depending on the codec, it may not accept valid US-ASCII (ANSI - X3.4-1986) input. If no codec has been set, this function does the same - as fromLatin1(). + This function does the same as fromLatin1(). \sa toAscii(), fromLatin1(), fromUtf8(), fromLocal8Bit() */ @@ -4050,6 +4016,7 @@ QString QString::simplified() const if (from == fromEnd) goto done; } while (!ch.isSpace()); + } done: *to++ = ch; @@ -4870,42 +4837,51 @@ QString QString::toLower() const const ushort *p = d->data(); if (!p) return *this; - if (!d->size) - return *this; - - const ushort *e = d->data() + d->size; - // this avoids one out of bounds check in the loop - if (QChar(*p).isLowSurrogate()) - ++p; + const ushort *e = p + d->size; + // this avoids out of bounds check in the loop + while (e != p && QChar::isHighSurrogate(*(e - 1))) + --e; + const QUnicodeTables::Properties *prop; while (p != e) { - uint c = *p; - if (QChar(c).isLowSurrogate() && QChar(*(p - 1)).isHighSurrogate()) - c = QChar::surrogateToUcs4(*(p - 1), c); - const QUnicodeTables::Properties *prop = qGetProp(c); - if (prop->lowerCaseDiff || prop->lowerCaseSpecial) { + if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) { + ushort high = *p++; + prop = qGetProp(QChar::surrogateToUcs4(high, *p)); + } else { + prop = qGetProp(*p); + } + if (prop->lowerCaseDiff) { + if (QChar::isLowSurrogate(*p)) + --p; // safe; diff is 0 for surrogates QString s(d->size, Qt::Uninitialized); memcpy(s.d->data(), d->data(), (p - d->data())*sizeof(ushort)); ushort *pp = s.d->data() + (p - d->data()); - while (p < e) { - uint c = *p; - if (QChar(c).isLowSurrogate() && QChar(*(p - 1)).isHighSurrogate()) - c = QChar::surrogateToUcs4(*(p - 1), c); - prop = qGetProp(c); + while (p != e) { + if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) { + *pp = *p++; + prop = qGetProp(QChar::surrogateToUcs4(*pp++, *p)); + } else { + prop = qGetProp(*p); + } if (prop->lowerCaseSpecial) { + const ushort *specialCase = specialCaseMap + prop->lowerCaseDiff; + ushort length = *specialCase++; int pos = pp - s.d->data(); - s.resize(s.d->size + SPECIAL_CASE_MAX_LEN); + s.resize(s.d->size + length - 1); pp = s.d->data() + pos; - const ushort *specialCase = specialCaseMap + prop->lowerCaseDiff; - while (*specialCase) + while (length--) *pp++ = *specialCase++; } else { *pp++ = *p + prop->lowerCaseDiff; } ++p; } - s.truncate(pp - s.d->data()); + + // this restores high surrogate parts eaten above, if any + while (e != d->data() + d->size) + *pp++ = *e++; + return s; } ++p; @@ -4919,31 +4895,51 @@ QString QString::toLower() const */ QString QString::toCaseFolded() const { - if (!d->size) - return *this; - const ushort *p = d->data(); if (!p) return *this; - const ushort *e = d->data() + d->size; + const ushort *e = p + d->size; + // this avoids out of bounds check in the loop + while (e != p && QChar::isHighSurrogate(*(e - 1))) + --e; - uint last = 0; - while (p < e) { - ushort folded = foldCase(*p, last); - if (folded != *p) { - QString s(*this); - s.detach(); + const QUnicodeTables::Properties *prop; + while (p != e) { + if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) { + ushort high = *p++; + prop = qGetProp(QChar::surrogateToUcs4(high, *p)); + } else { + prop = qGetProp(*p); + } + if (prop->caseFoldDiff) { + if (QChar::isLowSurrogate(*p)) + --p; // safe; diff is 0 for surrogates + QString s(d->size, Qt::Uninitialized); + memcpy(s.d->data(), d->data(), (p - d->data())*sizeof(ushort)); ushort *pp = s.d->data() + (p - d->data()); - const ushort *ppe = s.d->data() + s.d->size; - last = pp > s.d->data() ? *(pp - 1) : 0; - while (pp < ppe) { - *pp = foldCase(*pp, last); - ++pp; + while (p != e) { + if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) { + *pp = *p++; + prop = qGetProp(QChar::surrogateToUcs4(*pp++, *p)); + } else { + prop = qGetProp(*p); + } + if (prop->caseFoldSpecial) { + //### we currently don't support full case foldings + } else { + *pp++ = *p + prop->caseFoldDiff; + } + ++p; } + + // this restores high surrogate parts eaten above, if any + while (e != d->data() + d->size) + *pp++ = *e++; + return s; } - p++; + ++p; } return *this; } @@ -4958,48 +4954,56 @@ QString QString::toCaseFolded() const \sa toLower(), QLocale::toLower() */ - QString QString::toUpper() const { const ushort *p = d->data(); if (!p) return *this; - if (!d->size) - return *this; - const ushort *e = d->data() + d->size; - - // this avoids one out of bounds check in the loop - if (QChar(*p).isLowSurrogate()) - ++p; + const ushort *e = p + d->size; + // this avoids out of bounds check in the loop + while (e != p && QChar::isHighSurrogate(*(e - 1))) + --e; + const QUnicodeTables::Properties *prop; while (p != e) { - uint c = *p; - if (QChar(c).isLowSurrogate() && QChar(*(p - 1)).isHighSurrogate()) - c = QChar::surrogateToUcs4(*(p - 1), c); - const QUnicodeTables::Properties *prop = qGetProp(c); - if (prop->upperCaseDiff || prop->upperCaseSpecial) { + if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) { + ushort high = *p++; + prop = qGetProp(QChar::surrogateToUcs4(high, *p)); + } else { + prop = qGetProp(*p); + } + if (prop->upperCaseDiff) { + if (QChar::isLowSurrogate(*p)) + --p; // safe; diff is 0 for surrogates QString s(d->size, Qt::Uninitialized); memcpy(s.d->data(), d->data(), (p - d->data())*sizeof(ushort)); ushort *pp = s.d->data() + (p - d->data()); - while (p < e) { - uint c = *p; - if (QChar(c).isLowSurrogate() && QChar(*(p - 1)).isHighSurrogate()) - c = QChar::surrogateToUcs4(*(p - 1), c); - prop = qGetProp(c); + while (p != e) { + if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) { + *pp = *p++; + prop = qGetProp(QChar::surrogateToUcs4(*pp++, *p)); + } else { + prop = qGetProp(*p); + } if (prop->upperCaseSpecial) { + const ushort *specialCase = specialCaseMap + prop->upperCaseDiff; + ushort length = *specialCase++; int pos = pp - s.d->data(); - s.resize(s.d->size + SPECIAL_CASE_MAX_LEN); + s.resize(s.d->size + length - 1); pp = s.d->data() + pos; - const ushort *specialCase = specialCaseMap + prop->upperCaseDiff; - while (*specialCase) + while (length--) *pp++ = *specialCase++; } else { *pp++ = *p + prop->upperCaseDiff; } ++p; } - s.truncate(pp - s.d->data()); + + // this restores high surrogate parts eaten above, if any + while (e != d->data() + d->size) + *pp++ = *e++; + return s; } ++p; @@ -5079,19 +5083,8 @@ QString &QString::vsprintf(const char* cformat, va_list ap) const char *c = cformat; for (;;) { // Copy non-escape chars to result -#ifndef QT_NO_TEXTCODEC - int i = 0; - while (*(c + i) != '\0' && *(c + i) != '%') - ++i; - if (codecForCStrings) - result.append(codecForCStrings->toUnicode(c, i)); - else - result.append(fromLatin1(c, i)); - c += i; -#else while (*c != '\0' && *c != '%') result.append(QLatin1Char(*c++)); -#endif if (*c == '\0') break; @@ -7003,8 +6996,7 @@ bool QString::isRightToLeft() const This operator is mostly useful to pass a QString to a function that accepts a std::string object. - If the QString contains Unicode characters that the - QTextCodec::codecForCStrings() codec cannot handle, using this operator + If the QString contains non-Latin1 Unicode characters, using this can lead to loss of information. This operator is only available if Qt is configured with STL @@ -8673,9 +8665,7 @@ QByteArray QStringRef::toLatin1() const Returns an 8-bit representation of the string as a QByteArray. - If a codec has been set using QTextCodec::setCodecForCStrings(), - it is used to convert Unicode to 8-bit char; otherwise this - function does the same as toLatin1(). + This function does the same as toLatin1(). Note that, despite the name, this function does not necessarily return an US-ASCII (ANSI X3.4-1986) string and its result may not be US-ASCII compatible. @@ -8684,10 +8674,6 @@ QByteArray QStringRef::toLatin1() const */ QByteArray QStringRef::toAscii() const { -#ifndef QT_NO_TEXTCODEC - if (QString::codecForCStrings) - return QString::codecForCStrings->fromUnicode(unicode(), length()); -#endif // QT_NO_TEXTCODEC return toLatin1(); } |