Merge remote-tracking branch 'gerrit/master' into containers

Change-Id: I97ba222435ff50a9e5422e6f2c73e4bb8d1b865c
author: João Abecasis <joao.abecasis@nokia.com> 2012-02-29 00:57:12 +0100
committer: João Abecasis <joao.abecasis@nokia.com> 2012-02-29 00:58:13 +0100
commit: c4ad58ed2252d5ed9f448a5c068ab33dce4cadd9 (patch)
tree: 7e9802171d7b4c641c2de1ef781023cab5d7d14c /src/corelib/tools/qstring.cpp
parent: 7da3a61b5fd5cc726f8fd62691aa5f84c7929800 (diff)
parent: fa1b9070af66edb81b2a3735c1951f78b22bd666 (diff)
1 files changed, 109 insertions, 123 deletions
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp
index be6f48808c..d0c2dd7148 100644
--- a/src/corelib/tools/qstring.cpp
+++ b/src/corelib/tools/qstring.cpp
@@ -98,10 +98,6 @@
 
 QT_BEGIN_NAMESPACE
 
-#ifndef QT_NO_TEXTCODEC
-QTextCodec *QString::codecForCStrings;
-#endif
-
 #ifdef QT_USE_ICU
 // qlocale_icu.cpp
 extern bool qt_ucol_strcoll(const QChar *source, int sourceLength, const QChar *target, int targetLength, int *result);
@@ -473,9 +469,8 @@ const QString::Null QString::null = { };
     \snippet doc/src/snippets/qstring/main.cpp 0
 
     QString converts the \c{const char *} data into Unicode using the
-    fromAscii() function. By default, fromAscii() treats character
-    above 128 as Latin-1 characters, but this can be changed by
-    calling QTextCodec::setCodecForCStrings().
+    fromAscii() function. fromAscii() treats ordinals above 128 as Latin-1
+    characters.
 
     In all of the QString functions that take \c{const char *}
     parameters, the \c{const char *} is interpreted as a classic
@@ -613,9 +608,7 @@ const QString::Null QString::null = { };
     toLatin1(), toUtf8(), and toLocal8Bit().
 
     \list
-    \o toAscii() returns an 8-bit string encoded using the codec
-       specified by QTextCodec::codecForCStrings (by default, that is
-       Latin 1).
+    \o toAscii() returns a Latin-1 (ISO 8859-1) encoded 8-bit string.
     \o toLatin1() returns a Latin-1 (ISO 8859-1) encoded 8-bit string.
     \o toUtf8() returns a UTF-8 encoded 8-bit string. UTF-8 is a
        superset of US-ASCII (ANSI X3.4-1986) that supports the entire
@@ -723,11 +716,11 @@ const QString::Null QString::null = { };
     \section1 More Efficient String Construction
 
     Many strings are known at compile time. But the trivial
-    constructor QString("Hello"), will convert the string literal
-    to a QString using the codecForCStrings(). To avoid this one
-    can use the QStringLiteral macro to directly create the required
-    data at compile time. Constructing a QString out of the literal
-    does then not cause any overhead at runtime.
+    constructor QString("Hello"), will copy the contents of the string,
+    treating the contents as Latin-1. To avoid this one can use the
+    QStringLiteral macro to directly create the required data at compile
+    time. Constructing a QString out of the literal does then not cause
+    any overhead at runtime.
 
     A slightly less efficient way is to use QLatin1String. This class wraps
     a C string literal, precalculates it length at compile time and can
@@ -3631,9 +3624,7 @@ QByteArray QString::toLatin1() const
 /*!
     Returns an 8-bit representation of the string as a QByteArray.
 
-    If a codec has been set using QTextCodec::setCodecForCStrings(),
-    it is used to convert Unicode to 8-bit char; otherwise this
-    function does the same as toLatin1().
+    This function does the same as toLatin1().
 
     Note that, despite the name, this function does not necessarily return an US-ASCII
     (ANSI X3.4-1986) string and its result may not be US-ASCII compatible.
@@ -3642,10 +3633,6 @@ QByteArray QString::toLatin1() const
 */
 QByteArray QString::toAscii() const
 {
-#ifndef QT_NO_TEXTCODEC
-    if (codecForCStrings)
-        return codecForCStrings->fromUnicode(*this);
-#endif // QT_NO_TEXTCODEC
     return toLatin1();
 }
 
@@ -3779,23 +3766,6 @@ QString::Data *QString::fromLatin1_helper(const char *str, int size)
 
 QString::Data *QString::fromAscii_helper(const char *str, int size)
 {
-#ifndef QT_NO_TEXTCODEC
-    if (codecForCStrings) {
-        Data *d;
-        if (!str) {
-            d = const_cast<Data *>(&shared_null.str);
-        } else if (size == 0 || (!*str && size < 0)) {
-            d = const_cast<Data *>(&shared_empty.str);
-        } else {
-            if (size < 0)
-                size = qstrlen(str);
-            QString s = codecForCStrings->toUnicode(str, size);
-            d = s.d;
-            d->ref.ref();
-        }
-        return d;
-    }
-#endif
     return fromLatin1_helper(str, size);
 }
 
@@ -3844,11 +3814,7 @@ QString QString::fromLocal8Bit_helper(const char *str, int size)
     If \a size is -1 (default), it is taken to be strlen(\a
     str).
 
-    Note that, despite the name, this function actually uses the codec
-    defined by QTextCodec::setCodecForCStrings() to convert \a str to
-    Unicode. Depending on the codec, it may not accept valid US-ASCII (ANSI
-    X3.4-1986) input. If no codec has been set, this function does the same
-    as fromLatin1().
+    This function does the same as fromLatin1().
 
     \sa toAscii(), fromLatin1(), fromUtf8(), fromLocal8Bit()
 */
@@ -4050,6 +4016,7 @@ QString QString::simplified() const
             if (from == fromEnd)
                 goto done;
         } while (!ch.isSpace());
+
     }
   done:
     *to++ = ch;
@@ -4870,42 +4837,51 @@ QString QString::toLower() const
     const ushort *p = d->data();
     if (!p)
         return *this;
-    if (!d->size)
-        return *this;
-
-    const ushort *e = d->data() + d->size;
 
-    // this avoids one out of bounds check in the loop
-    if (QChar(*p).isLowSurrogate())
-        ++p;
+    const ushort *e = p + d->size;
+    // this avoids out of bounds check in the loop
+    while (e != p && QChar::isHighSurrogate(*(e - 1)))
+        --e;
 
+    const QUnicodeTables::Properties *prop;
     while (p != e) {
-        uint c = *p;
-        if (QChar(c).isLowSurrogate() && QChar(*(p - 1)).isHighSurrogate())
-            c = QChar::surrogateToUcs4(*(p - 1), c);
-        const QUnicodeTables::Properties *prop = qGetProp(c);
-        if (prop->lowerCaseDiff || prop->lowerCaseSpecial) {
+        if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) {
+            ushort high = *p++;
+            prop = qGetProp(QChar::surrogateToUcs4(high, *p));
+        } else {
+            prop = qGetProp(*p);
+        }
+        if (prop->lowerCaseDiff) {
+            if (QChar::isLowSurrogate(*p))
+                --p; // safe; diff is 0 for surrogates
             QString s(d->size, Qt::Uninitialized);
             memcpy(s.d->data(), d->data(), (p - d->data())*sizeof(ushort));
             ushort *pp = s.d->data() + (p - d->data());
-            while (p < e) {
-                uint c = *p;
-                if (QChar(c).isLowSurrogate() && QChar(*(p - 1)).isHighSurrogate())
-                    c = QChar::surrogateToUcs4(*(p - 1), c);
-                prop = qGetProp(c);
+            while (p != e) {
+                if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) {
+                    *pp = *p++;
+                    prop = qGetProp(QChar::surrogateToUcs4(*pp++, *p));
+                } else {
+                    prop = qGetProp(*p);
+                }
                 if (prop->lowerCaseSpecial) {
+                    const ushort *specialCase = specialCaseMap + prop->lowerCaseDiff;
+                    ushort length = *specialCase++;
                     int pos = pp - s.d->data();
-                    s.resize(s.d->size + SPECIAL_CASE_MAX_LEN);
+                    s.resize(s.d->size + length - 1);
                     pp = s.d->data() + pos;
-                    const ushort *specialCase = specialCaseMap + prop->lowerCaseDiff;
-                    while (*specialCase)
+                    while (length--)
                         *pp++ = *specialCase++;
                 } else {
                     *pp++ = *p + prop->lowerCaseDiff;
                 }
                 ++p;
             }
-            s.truncate(pp - s.d->data());
+
+            // this restores high surrogate parts eaten above, if any
+            while (e != d->data() + d->size)
+                *pp++ = *e++;
+
             return s;
         }
         ++p;
@@ -4919,31 +4895,51 @@ QString QString::toLower() const
 */
 QString QString::toCaseFolded() const
 {
-    if (!d->size)
-        return *this;
-
     const ushort *p = d->data();
     if (!p)
         return *this;
 
-    const ushort *e = d->data() + d->size;
+    const ushort *e = p + d->size;
+    // this avoids out of bounds check in the loop
+    while (e != p && QChar::isHighSurrogate(*(e - 1)))
+        --e;
 
-    uint last = 0;
-    while (p < e) {
-        ushort folded = foldCase(*p, last);
-        if (folded != *p) {
-            QString s(*this);
-            s.detach();
+    const QUnicodeTables::Properties *prop;
+    while (p != e) {
+        if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) {
+            ushort high = *p++;
+            prop = qGetProp(QChar::surrogateToUcs4(high, *p));
+        } else {
+            prop = qGetProp(*p);
+        }
+        if (prop->caseFoldDiff) {
+            if (QChar::isLowSurrogate(*p))
+                --p; // safe; diff is 0 for surrogates
+            QString s(d->size, Qt::Uninitialized);
+            memcpy(s.d->data(), d->data(), (p - d->data())*sizeof(ushort));
             ushort *pp = s.d->data() + (p - d->data());
-            const ushort *ppe = s.d->data() + s.d->size;
-            last = pp > s.d->data() ? *(pp - 1) : 0;
-            while (pp < ppe) {
-                *pp = foldCase(*pp, last);
-                ++pp;
+            while (p != e) {
+                if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) {
+                    *pp = *p++;
+                    prop = qGetProp(QChar::surrogateToUcs4(*pp++, *p));
+                } else {
+                    prop = qGetProp(*p);
+                }
+                if (prop->caseFoldSpecial) {
+                    //### we currently don't support full case foldings
+                } else {
+                    *pp++ = *p + prop->caseFoldDiff;
+                }
+                ++p;
             }
+
+            // this restores high surrogate parts eaten above, if any
+            while (e != d->data() + d->size)
+                *pp++ = *e++;
+
             return s;
         }
-        p++;
+        ++p;
     }
     return *this;
 }
@@ -4958,48 +4954,56 @@ QString QString::toCaseFolded() const
 
     \sa toLower(), QLocale::toLower()
 */
-
 QString QString::toUpper() const
 {
     const ushort *p = d->data();
     if (!p)
         return *this;
-    if (!d->size)
-        return *this;
 
-    const ushort *e = d->data() + d->size;
-
-    // this avoids one out of bounds check in the loop
-    if (QChar(*p).isLowSurrogate())
-        ++p;
+    const ushort *e = p + d->size;
+    // this avoids out of bounds check in the loop
+    while (e != p && QChar::isHighSurrogate(*(e - 1)))
+        --e;
 
+    const QUnicodeTables::Properties *prop;
     while (p != e) {
-        uint c = *p;
-        if (QChar(c).isLowSurrogate() && QChar(*(p - 1)).isHighSurrogate())
-            c = QChar::surrogateToUcs4(*(p - 1), c);
-        const QUnicodeTables::Properties *prop = qGetProp(c);
-        if (prop->upperCaseDiff || prop->upperCaseSpecial) {
+        if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) {
+            ushort high = *p++;
+            prop = qGetProp(QChar::surrogateToUcs4(high, *p));
+        } else {
+            prop = qGetProp(*p);
+        }
+        if (prop->upperCaseDiff) {
+            if (QChar::isLowSurrogate(*p))
+                --p; // safe; diff is 0 for surrogates
             QString s(d->size, Qt::Uninitialized);
             memcpy(s.d->data(), d->data(), (p - d->data())*sizeof(ushort));
             ushort *pp = s.d->data() + (p - d->data());
-            while (p < e) {
-                uint c = *p;
-                if (QChar(c).isLowSurrogate() && QChar(*(p - 1)).isHighSurrogate())
-                    c = QChar::surrogateToUcs4(*(p - 1), c);
-                prop = qGetProp(c);
+            while (p != e) {
+                if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) {
+                    *pp = *p++;
+                    prop = qGetProp(QChar::surrogateToUcs4(*pp++, *p));
+                } else {
+                    prop = qGetProp(*p);
+                }
                 if (prop->upperCaseSpecial) {
+                    const ushort *specialCase = specialCaseMap + prop->upperCaseDiff;
+                    ushort length = *specialCase++;
                     int pos = pp - s.d->data();
-                    s.resize(s.d->size + SPECIAL_CASE_MAX_LEN);
+                    s.resize(s.d->size + length - 1);
                     pp = s.d->data() + pos;
-                    const ushort *specialCase = specialCaseMap + prop->upperCaseDiff;
-                    while (*specialCase)
+                    while (length--)
                         *pp++ = *specialCase++;
                 } else {
                     *pp++ = *p + prop->upperCaseDiff;
                 }
                 ++p;
             }
-            s.truncate(pp - s.d->data());
+
+            // this restores high surrogate parts eaten above, if any
+            while (e != d->data() + d->size)
+                *pp++ = *e++;
+
             return s;
         }
         ++p;
@@ -5079,19 +5083,8 @@ QString &QString::vsprintf(const char* cformat, va_list ap)
     const char *c = cformat;
     for (;;) {
         // Copy non-escape chars to result
-#ifndef QT_NO_TEXTCODEC
-        int i = 0;
-        while (*(c + i) != '\0' && *(c + i) != '%')
-            ++i;
-        if (codecForCStrings)
-            result.append(codecForCStrings->toUnicode(c, i));
-        else
-            result.append(fromLatin1(c, i));
-        c += i;
-#else
         while (*c != '\0' && *c != '%')
             result.append(QLatin1Char(*c++));
-#endif
 
         if (*c == '\0')
             break;
@@ -7003,8 +6996,7 @@ bool QString::isRightToLeft() const
     This operator is mostly useful to pass a QString to a function
     that accepts a std::string object.
 
-    If the QString contains Unicode characters that the
-    QTextCodec::codecForCStrings() codec cannot handle, using this operator
+    If the QString contains non-Latin1 Unicode characters, using this
     can lead to loss of information.
 
     This operator is only available if Qt is configured with STL
@@ -8673,9 +8665,7 @@ QByteArray QStringRef::toLatin1() const
 
     Returns an 8-bit representation of the string as a QByteArray.
 
-    If a codec has been set using QTextCodec::setCodecForCStrings(),
-    it is used to convert Unicode to 8-bit char; otherwise this
-    function does the same as toLatin1().
+    This function does the same as toLatin1().
 
     Note that, despite the name, this function does not necessarily return an US-ASCII
     (ANSI X3.4-1986) string and its result may not be US-ASCII compatible.
@@ -8684,10 +8674,6 @@ QByteArray QStringRef::toLatin1() const
 */
 QByteArray QStringRef::toAscii() const
 {
-#ifndef QT_NO_TEXTCODEC
-    if (QString::codecForCStrings)
-        return QString::codecForCStrings->fromUnicode(unicode(), length());
-#endif // QT_NO_TEXTCODEC
     return toLatin1();
 }
author	João Abecasis <joao.abecasis@nokia.com>	2012-02-29 00:57:12 +0100
committer	João Abecasis <joao.abecasis@nokia.com>	2012-02-29 00:58:13 +0100
commit	c4ad58ed2252d5ed9f448a5c068ab33dce4cadd9 (patch)
tree	7e9802171d7b4c641c2de1ef781023cab5d7d14c /src/corelib/tools/qstring.cpp
parent	7da3a61b5fd5cc726f8fd62691aa5f84c7929800 (diff)
parent	fa1b9070af66edb81b2a3735c1951f78b22bd666 (diff)