summaryrefslogtreecommitdiffstats
path: root/src/corelib/tools/qstring.cpp
diff options
context:
space:
mode:
authorJoão Abecasis <joao.abecasis@nokia.com>2012-02-29 00:57:12 +0100
committerJoão Abecasis <joao.abecasis@nokia.com>2012-02-29 00:58:13 +0100
commitc4ad58ed2252d5ed9f448a5c068ab33dce4cadd9 (patch)
tree7e9802171d7b4c641c2de1ef781023cab5d7d14c /src/corelib/tools/qstring.cpp
parent7da3a61b5fd5cc726f8fd62691aa5f84c7929800 (diff)
parentfa1b9070af66edb81b2a3735c1951f78b22bd666 (diff)
Merge remote-tracking branch 'gerrit/master' into containers
Diffstat (limited to 'src/corelib/tools/qstring.cpp')
-rw-r--r--src/corelib/tools/qstring.cpp232
1 files changed, 109 insertions, 123 deletions
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp
index be6f48808c..d0c2dd7148 100644
--- a/src/corelib/tools/qstring.cpp
+++ b/src/corelib/tools/qstring.cpp
@@ -98,10 +98,6 @@
QT_BEGIN_NAMESPACE
-#ifndef QT_NO_TEXTCODEC
-QTextCodec *QString::codecForCStrings;
-#endif
-
#ifdef QT_USE_ICU
// qlocale_icu.cpp
extern bool qt_ucol_strcoll(const QChar *source, int sourceLength, const QChar *target, int targetLength, int *result);
@@ -473,9 +469,8 @@ const QString::Null QString::null = { };
\snippet doc/src/snippets/qstring/main.cpp 0
QString converts the \c{const char *} data into Unicode using the
- fromAscii() function. By default, fromAscii() treats character
- above 128 as Latin-1 characters, but this can be changed by
- calling QTextCodec::setCodecForCStrings().
+ fromAscii() function. fromAscii() treats ordinals above 128 as Latin-1
+ characters.
In all of the QString functions that take \c{const char *}
parameters, the \c{const char *} is interpreted as a classic
@@ -613,9 +608,7 @@ const QString::Null QString::null = { };
toLatin1(), toUtf8(), and toLocal8Bit().
\list
- \o toAscii() returns an 8-bit string encoded using the codec
- specified by QTextCodec::codecForCStrings (by default, that is
- Latin 1).
+ \o toAscii() returns a Latin-1 (ISO 8859-1) encoded 8-bit string.
\o toLatin1() returns a Latin-1 (ISO 8859-1) encoded 8-bit string.
\o toUtf8() returns a UTF-8 encoded 8-bit string. UTF-8 is a
superset of US-ASCII (ANSI X3.4-1986) that supports the entire
@@ -723,11 +716,11 @@ const QString::Null QString::null = { };
\section1 More Efficient String Construction
Many strings are known at compile time. But the trivial
- constructor QString("Hello"), will convert the string literal
- to a QString using the codecForCStrings(). To avoid this one
- can use the QStringLiteral macro to directly create the required
- data at compile time. Constructing a QString out of the literal
- does then not cause any overhead at runtime.
+ constructor QString("Hello"), will copy the contents of the string,
+ treating the contents as Latin-1. To avoid this one can use the
+ QStringLiteral macro to directly create the required data at compile
+ time. Constructing a QString out of the literal does then not cause
+ any overhead at runtime.
A slightly less efficient way is to use QLatin1String. This class wraps
a C string literal, precalculates it length at compile time and can
@@ -3631,9 +3624,7 @@ QByteArray QString::toLatin1() const
/*!
Returns an 8-bit representation of the string as a QByteArray.
- If a codec has been set using QTextCodec::setCodecForCStrings(),
- it is used to convert Unicode to 8-bit char; otherwise this
- function does the same as toLatin1().
+ This function does the same as toLatin1().
Note that, despite the name, this function does not necessarily return an US-ASCII
(ANSI X3.4-1986) string and its result may not be US-ASCII compatible.
@@ -3642,10 +3633,6 @@ QByteArray QString::toLatin1() const
*/
QByteArray QString::toAscii() const
{
-#ifndef QT_NO_TEXTCODEC
- if (codecForCStrings)
- return codecForCStrings->fromUnicode(*this);
-#endif // QT_NO_TEXTCODEC
return toLatin1();
}
@@ -3779,23 +3766,6 @@ QString::Data *QString::fromLatin1_helper(const char *str, int size)
QString::Data *QString::fromAscii_helper(const char *str, int size)
{
-#ifndef QT_NO_TEXTCODEC
- if (codecForCStrings) {
- Data *d;
- if (!str) {
- d = const_cast<Data *>(&shared_null.str);
- } else if (size == 0 || (!*str && size < 0)) {
- d = const_cast<Data *>(&shared_empty.str);
- } else {
- if (size < 0)
- size = qstrlen(str);
- QString s = codecForCStrings->toUnicode(str, size);
- d = s.d;
- d->ref.ref();
- }
- return d;
- }
-#endif
return fromLatin1_helper(str, size);
}
@@ -3844,11 +3814,7 @@ QString QString::fromLocal8Bit_helper(const char *str, int size)
If \a size is -1 (default), it is taken to be strlen(\a
str).
- Note that, despite the name, this function actually uses the codec
- defined by QTextCodec::setCodecForCStrings() to convert \a str to
- Unicode. Depending on the codec, it may not accept valid US-ASCII (ANSI
- X3.4-1986) input. If no codec has been set, this function does the same
- as fromLatin1().
+ This function does the same as fromLatin1().
\sa toAscii(), fromLatin1(), fromUtf8(), fromLocal8Bit()
*/
@@ -4050,6 +4016,7 @@ QString QString::simplified() const
if (from == fromEnd)
goto done;
} while (!ch.isSpace());
+
}
done:
*to++ = ch;
@@ -4870,42 +4837,51 @@ QString QString::toLower() const
const ushort *p = d->data();
if (!p)
return *this;
- if (!d->size)
- return *this;
-
- const ushort *e = d->data() + d->size;
- // this avoids one out of bounds check in the loop
- if (QChar(*p).isLowSurrogate())
- ++p;
+ const ushort *e = p + d->size;
+ // this avoids out of bounds check in the loop
+ while (e != p && QChar::isHighSurrogate(*(e - 1)))
+ --e;
+ const QUnicodeTables::Properties *prop;
while (p != e) {
- uint c = *p;
- if (QChar(c).isLowSurrogate() && QChar(*(p - 1)).isHighSurrogate())
- c = QChar::surrogateToUcs4(*(p - 1), c);
- const QUnicodeTables::Properties *prop = qGetProp(c);
- if (prop->lowerCaseDiff || prop->lowerCaseSpecial) {
+ if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) {
+ ushort high = *p++;
+ prop = qGetProp(QChar::surrogateToUcs4(high, *p));
+ } else {
+ prop = qGetProp(*p);
+ }
+ if (prop->lowerCaseDiff) {
+ if (QChar::isLowSurrogate(*p))
+ --p; // safe; diff is 0 for surrogates
QString s(d->size, Qt::Uninitialized);
memcpy(s.d->data(), d->data(), (p - d->data())*sizeof(ushort));
ushort *pp = s.d->data() + (p - d->data());
- while (p < e) {
- uint c = *p;
- if (QChar(c).isLowSurrogate() && QChar(*(p - 1)).isHighSurrogate())
- c = QChar::surrogateToUcs4(*(p - 1), c);
- prop = qGetProp(c);
+ while (p != e) {
+ if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) {
+ *pp = *p++;
+ prop = qGetProp(QChar::surrogateToUcs4(*pp++, *p));
+ } else {
+ prop = qGetProp(*p);
+ }
if (prop->lowerCaseSpecial) {
+ const ushort *specialCase = specialCaseMap + prop->lowerCaseDiff;
+ ushort length = *specialCase++;
int pos = pp - s.d->data();
- s.resize(s.d->size + SPECIAL_CASE_MAX_LEN);
+ s.resize(s.d->size + length - 1);
pp = s.d->data() + pos;
- const ushort *specialCase = specialCaseMap + prop->lowerCaseDiff;
- while (*specialCase)
+ while (length--)
*pp++ = *specialCase++;
} else {
*pp++ = *p + prop->lowerCaseDiff;
}
++p;
}
- s.truncate(pp - s.d->data());
+
+ // this restores high surrogate parts eaten above, if any
+ while (e != d->data() + d->size)
+ *pp++ = *e++;
+
return s;
}
++p;
@@ -4919,31 +4895,51 @@ QString QString::toLower() const
*/
QString QString::toCaseFolded() const
{
- if (!d->size)
- return *this;
-
const ushort *p = d->data();
if (!p)
return *this;
- const ushort *e = d->data() + d->size;
+ const ushort *e = p + d->size;
+ // this avoids out of bounds check in the loop
+ while (e != p && QChar::isHighSurrogate(*(e - 1)))
+ --e;
- uint last = 0;
- while (p < e) {
- ushort folded = foldCase(*p, last);
- if (folded != *p) {
- QString s(*this);
- s.detach();
+ const QUnicodeTables::Properties *prop;
+ while (p != e) {
+ if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) {
+ ushort high = *p++;
+ prop = qGetProp(QChar::surrogateToUcs4(high, *p));
+ } else {
+ prop = qGetProp(*p);
+ }
+ if (prop->caseFoldDiff) {
+ if (QChar::isLowSurrogate(*p))
+ --p; // safe; diff is 0 for surrogates
+ QString s(d->size, Qt::Uninitialized);
+ memcpy(s.d->data(), d->data(), (p - d->data())*sizeof(ushort));
ushort *pp = s.d->data() + (p - d->data());
- const ushort *ppe = s.d->data() + s.d->size;
- last = pp > s.d->data() ? *(pp - 1) : 0;
- while (pp < ppe) {
- *pp = foldCase(*pp, last);
- ++pp;
+ while (p != e) {
+ if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) {
+ *pp = *p++;
+ prop = qGetProp(QChar::surrogateToUcs4(*pp++, *p));
+ } else {
+ prop = qGetProp(*p);
+ }
+ if (prop->caseFoldSpecial) {
+ //### we currently don't support full case foldings
+ } else {
+ *pp++ = *p + prop->caseFoldDiff;
+ }
+ ++p;
}
+
+ // this restores high surrogate parts eaten above, if any
+ while (e != d->data() + d->size)
+ *pp++ = *e++;
+
return s;
}
- p++;
+ ++p;
}
return *this;
}
@@ -4958,48 +4954,56 @@ QString QString::toCaseFolded() const
\sa toLower(), QLocale::toLower()
*/
-
QString QString::toUpper() const
{
const ushort *p = d->data();
if (!p)
return *this;
- if (!d->size)
- return *this;
- const ushort *e = d->data() + d->size;
-
- // this avoids one out of bounds check in the loop
- if (QChar(*p).isLowSurrogate())
- ++p;
+ const ushort *e = p + d->size;
+ // this avoids out of bounds check in the loop
+ while (e != p && QChar::isHighSurrogate(*(e - 1)))
+ --e;
+ const QUnicodeTables::Properties *prop;
while (p != e) {
- uint c = *p;
- if (QChar(c).isLowSurrogate() && QChar(*(p - 1)).isHighSurrogate())
- c = QChar::surrogateToUcs4(*(p - 1), c);
- const QUnicodeTables::Properties *prop = qGetProp(c);
- if (prop->upperCaseDiff || prop->upperCaseSpecial) {
+ if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) {
+ ushort high = *p++;
+ prop = qGetProp(QChar::surrogateToUcs4(high, *p));
+ } else {
+ prop = qGetProp(*p);
+ }
+ if (prop->upperCaseDiff) {
+ if (QChar::isLowSurrogate(*p))
+ --p; // safe; diff is 0 for surrogates
QString s(d->size, Qt::Uninitialized);
memcpy(s.d->data(), d->data(), (p - d->data())*sizeof(ushort));
ushort *pp = s.d->data() + (p - d->data());
- while (p < e) {
- uint c = *p;
- if (QChar(c).isLowSurrogate() && QChar(*(p - 1)).isHighSurrogate())
- c = QChar::surrogateToUcs4(*(p - 1), c);
- prop = qGetProp(c);
+ while (p != e) {
+ if (QChar::isHighSurrogate(*p) && QChar::isLowSurrogate(p[1])) {
+ *pp = *p++;
+ prop = qGetProp(QChar::surrogateToUcs4(*pp++, *p));
+ } else {
+ prop = qGetProp(*p);
+ }
if (prop->upperCaseSpecial) {
+ const ushort *specialCase = specialCaseMap + prop->upperCaseDiff;
+ ushort length = *specialCase++;
int pos = pp - s.d->data();
- s.resize(s.d->size + SPECIAL_CASE_MAX_LEN);
+ s.resize(s.d->size + length - 1);
pp = s.d->data() + pos;
- const ushort *specialCase = specialCaseMap + prop->upperCaseDiff;
- while (*specialCase)
+ while (length--)
*pp++ = *specialCase++;
} else {
*pp++ = *p + prop->upperCaseDiff;
}
++p;
}
- s.truncate(pp - s.d->data());
+
+ // this restores high surrogate parts eaten above, if any
+ while (e != d->data() + d->size)
+ *pp++ = *e++;
+
return s;
}
++p;
@@ -5079,19 +5083,8 @@ QString &QString::vsprintf(const char* cformat, va_list ap)
const char *c = cformat;
for (;;) {
// Copy non-escape chars to result
-#ifndef QT_NO_TEXTCODEC
- int i = 0;
- while (*(c + i) != '\0' && *(c + i) != '%')
- ++i;
- if (codecForCStrings)
- result.append(codecForCStrings->toUnicode(c, i));
- else
- result.append(fromLatin1(c, i));
- c += i;
-#else
while (*c != '\0' && *c != '%')
result.append(QLatin1Char(*c++));
-#endif
if (*c == '\0')
break;
@@ -7003,8 +6996,7 @@ bool QString::isRightToLeft() const
This operator is mostly useful to pass a QString to a function
that accepts a std::string object.
- If the QString contains Unicode characters that the
- QTextCodec::codecForCStrings() codec cannot handle, using this operator
+ If the QString contains non-Latin1 Unicode characters, using this
can lead to loss of information.
This operator is only available if Qt is configured with STL
@@ -8673,9 +8665,7 @@ QByteArray QStringRef::toLatin1() const
Returns an 8-bit representation of the string as a QByteArray.
- If a codec has been set using QTextCodec::setCodecForCStrings(),
- it is used to convert Unicode to 8-bit char; otherwise this
- function does the same as toLatin1().
+ This function does the same as toLatin1().
Note that, despite the name, this function does not necessarily return an US-ASCII
(ANSI X3.4-1986) string and its result may not be US-ASCII compatible.
@@ -8684,10 +8674,6 @@ QByteArray QStringRef::toLatin1() const
*/
QByteArray QStringRef::toAscii() const
{
-#ifndef QT_NO_TEXTCODEC
- if (QString::codecForCStrings)
- return QString::codecForCStrings->fromUnicode(unicode(), length());
-#endif // QT_NO_TEXTCODEC
return toLatin1();
}