diff options
Diffstat (limited to 'src/corelib/tools/qstring.cpp')
-rw-r--r-- | src/corelib/tools/qstring.cpp | 1169 |
1 files changed, 607 insertions, 562 deletions
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index d22d808a12..50f616a010 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -1,6 +1,7 @@ /**************************************************************************** ** ** Copyright (C) 2013 Digia Plc and/or its subsidiary(-ies). +** Copyright (C) 2013 Intel Corporation ** Contact: http://www.qt-project.org/legal ** ** This file is part of the QtCore module of the Qt Toolkit. @@ -53,6 +54,7 @@ #include <qlist.h> #include "qlocale.h" #include "qlocale_p.h" +#include "qstringbuilder.h" #include "qstringmatcher.h" #include "qvarlengtharray.h" #include "qtools_p.h" @@ -75,6 +77,7 @@ #include "qchar.cpp" #include "qstringmatcher.cpp" +#include "qstringiterator_p.h" #ifdef Q_OS_WIN # include <qt_windows.h> @@ -101,6 +104,43 @@ QT_BEGIN_NAMESPACE +/* + * Note on the use of SIMD in qstring.cpp: + * + * Several operations with strings are improved with the use of SIMD code, + * since they are repetitive. For MIPS, we have hand-written assembly code + * outside of qstring.cpp targeting MIPS DSP and MIPS DSPr2. For ARM and for + * x86, we can only use intrinsics and therefore everything is contained in + * qstring.cpp. We need to use intrinsics only for those platforms due to the + * different compilers and toolchains used, which have different syntax for + * assembly sources. + * + * ** SSE notes: ** + * + * Whenever multiple alternatives are equivalent or near so, we prefer the one + * using instructions from SSE2, since SSE2 is guaranteed to be enabled for all + * 64-bit builds and we enable it for 32-bit builds by default. Use of higher + * SSE versions should be done when there's a clear performance benefit and + * requires fallback code to SSE2, if it exists. + * + * Performance measurement in the past shows that most strings are short in + * size and, therefore, do not benefit from alignment prologues. That is, + * trying to find a 16-byte-aligned boundary to operate on is often more + * expensive than executing the unaligned operation directly. In addition, note + * that the QString private data is designed so that the data is stored on + * 16-byte boundaries if the system malloc() returns 16-byte aligned pointers + * on its own (64-bit glibc on Linux does; 32-bit glibc on Linux returns them + * 50% of the time), so skipping the alignment prologue is actually optimizing + * for the common case. + */ + +#if defined(__mips_dsp) +// From qstring_mips_dsp_asm.S +extern "C" void qt_fromlatin1_mips_asm_unroll4 (ushort*, const char*, uint); +extern "C" void qt_fromlatin1_mips_asm_unroll8 (ushort*, const char*, uint); +extern "C" void qt_toLatin1_mips_dsp_asm(uchar *dst, const ushort *src, int length); +#endif + // internal int qFindString(const QChar *haystack, int haystackLen, int from, const QChar *needle, int needleLen, Qt::CaseSensitivity cs); @@ -124,6 +164,209 @@ static inline bool qt_ends_with(const QChar *haystack, int haystackLen, static inline bool qt_ends_with(const QChar *haystack, int haystackLen, QLatin1String needle, Qt::CaseSensitivity cs); +#ifdef Q_COMPILER_LAMBDA +namespace { +template <uint MaxCount> struct UnrollTailLoop +{ + template <typename RetType, typename Functor1, typename Functor2> + static inline RetType exec(int count, RetType returnIfExited, Functor1 loopCheck, Functor2 returnIfFailed, int i = 0) + { + /* equivalent to: + * while (count--) { + * if (loopCheck(i)) + * return returnIfFailed(i); + * } + * return returnIfExited; + */ + + if (!count) + return returnIfExited; + + bool check = loopCheck(i); + if (check) { + const RetType &retval = returnIfFailed(i); + return retval; + } + + return UnrollTailLoop<MaxCount - 1>::exec(count - 1, returnIfExited, loopCheck, returnIfFailed, i + 1); + } + + template <typename Functor> + static inline void exec(int count, Functor code) + { + /* equivalent to: + * for (int i = 0; i < count; ++i) + * code(i); + */ + exec(count, 0, [=](int i) -> bool { code(i); return false; }, [](int) { return 0; }); + } +}; +template <> template <typename RetType, typename Functor1, typename Functor2> +inline RetType UnrollTailLoop<0>::exec(int, RetType returnIfExited, Functor1, Functor2, int) +{ + return returnIfExited; +} +} +#endif + +// conversion between Latin 1 and UTF-16 +static void qt_from_latin1(ushort *dst, const char *str, size_t size) +{ + /* SIMD: + * Unpacking with SSE has been shown to improve performance on recent CPUs + * The same method gives no improvement with NEON. + */ +#if defined(__SSE2__) + const char *e = str + size; + qptrdiff offset = 0; + + // we're going to read str[offset..offset+15] (16 bytes) + for ( ; str + offset + 15 < e; offset += 16) { + const __m128i nullMask = _mm_set1_epi32(0); + const __m128i chunk = _mm_loadu_si128((__m128i*)(str + offset)); // load + + // unpack the first 8 bytes, padding with zeros + const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask); + _mm_storeu_si128((__m128i*)(dst + offset), firstHalf); // store + + // unpack the last 8 bytes, padding with zeros + const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask); + _mm_storeu_si128((__m128i*)(dst + offset + 8), secondHalf); // store + } + + size = size % 16; + dst += offset; + str += offset; +# ifdef Q_COMPILER_LAMBDA + return UnrollTailLoop<15>::exec(size, [=](int i) { dst[i] = (uchar)str[i]; }); +# endif +#endif +#if defined(__mips_dsp) + if (size > 20) + qt_fromlatin1_mips_asm_unroll8(dst, str, size); + else + qt_fromlatin1_mips_asm_unroll4(dst, str, size); +#else + while (size--) + *dst++ = (uchar)*str++; +#endif +} + +#if defined(__SSE2__) +static inline __m128i mergeQuestionMarks(__m128i chunk) +{ + const __m128i questionMark = _mm_set1_epi16('?'); + +# ifdef __SSE4_2__ + // compare the unsigned shorts for the range 0x0100-0xFFFF + // note on the use of _mm_cmpestrm: + // The MSDN documentation online (http://technet.microsoft.com/en-us/library/bb514080.aspx) + // says for range search the following: + // For each character c in a, determine whether b0 <= c <= b1 or b2 <= c <= b3 + // + // However, all examples on the Internet, including from Intel + // (see http://software.intel.com/en-us/articles/xml-parsing-accelerator-with-intel-streaming-simd-extensions-4-intel-sse4/) + // put the range to be searched first + // + // Disassembly and instruction-level debugging with GCC and ICC show + // that they are doing the right thing. Inverting the arguments in the + // instruction does cause a bunch of test failures. + + const int mode = _SIDD_UWORD_OPS | _SIDD_CMP_RANGES | _SIDD_UNIT_MASK; + const __m128i rangeMatch = _mm_cvtsi32_si128(0xffff0100); + const __m128i offLimitMask = _mm_cmpestrm(rangeMatch, 2, chunk, 8, mode); + + // replace the non-Latin 1 characters in the chunk with question marks + chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask); +# else + // SSE has no compare instruction for unsigned comparison. + // The variables must be shiffted + 0x8000 to be compared + const __m128i signedBitOffset = _mm_set1_epi16(short(0x8000)); + const __m128i thresholdMask = _mm_set1_epi16(short(0xff + 0x8000)); + + const __m128i signedChunk = _mm_add_epi16(chunk, signedBitOffset); + const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask); + +# ifdef __SSE4_1__ + // replace the non-Latin 1 characters in the chunk with question marks + chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask); +# else + // offLimitQuestionMark contains '?' for each 16 bits that was off-limit + // the 16 bits that were correct contains zeros + const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark); + + // correctBytes contains the bytes that were in limit + // the 16 bits that were off limits contains zeros + const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk); + + // merge offLimitQuestionMark and correctBytes to have the result + chunk = _mm_or_si128(correctBytes, offLimitQuestionMark); +# endif +# endif + return chunk; +} +#endif + +static void qt_to_latin1(uchar *dst, const ushort *src, int length) +{ +#if defined(__SSE2__) + uchar *e = dst + length; + qptrdiff offset = 0; + + // we're going to write to dst[offset..offset+15] (16 bytes) + for ( ; dst + offset + 15 < e; offset += 16) { + __m128i chunk1 = _mm_loadu_si128((__m128i*)(src + offset)); // load + chunk1 = mergeQuestionMarks(chunk1); + + __m128i chunk2 = _mm_loadu_si128((__m128i*)(src + offset + 8)); // load + chunk2 = mergeQuestionMarks(chunk2); + + // pack the two vector to 16 x 8bits elements + const __m128i result = _mm_packus_epi16(chunk1, chunk2); + _mm_storeu_si128((__m128i*)(dst + offset), result); // store + } + + length = length % 16; + dst += offset; + src += offset; + +# ifdef Q_COMPILER_LAMBDA + return UnrollTailLoop<15>::exec(length, [=](int i) { dst[i] = (src[i]>0xff) ? '?' : (uchar) src[i]; }); +# endif +#elif defined(__ARM_NEON__) + // Refer to the documentation of the SSE2 implementation + // this use eactly the same method as for SSE except: + // 1) neon has unsigned comparison + // 2) packing is done to 64 bits (8 x 8bits component). + if (length >= 16) { + const int chunkCount = length >> 3; // divided by 8 + const uint16x8_t questionMark = vdupq_n_u16('?'); // set + const uint16x8_t thresholdMask = vdupq_n_u16(0xff); // set + for (int i = 0; i < chunkCount; ++i) { + uint16x8_t chunk = vld1q_u16((uint16_t *)src); // load + src += 8; + + const uint16x8_t offLimitMask = vcgtq_u16(chunk, thresholdMask); // chunk > thresholdMask + const uint16x8_t offLimitQuestionMark = vandq_u16(offLimitMask, questionMark); // offLimitMask & questionMark + const uint16x8_t correctBytes = vbicq_u16(chunk, offLimitMask); // !offLimitMask & chunk + chunk = vorrq_u16(correctBytes, offLimitQuestionMark); // correctBytes | offLimitQuestionMark + const uint8x8_t result = vmovn_u16(chunk); // narrowing move->packing + vst1_u8(dst, result); // store + dst += 8; + } + length = length % 8; + } +#endif +#if defined(__mips_dsp) + qt_toLatin1_mips_dsp_asm(dst, src, length); +#else + while (length--) { + *dst++ = (*src>0xff) ? '?' : (uchar) *src; + ++src; + } +#endif +} + // Unicode case-insensitive comparison static int ucstricmp(const ushort *a, const ushort *ae, const ushort *b, const ushort *be) { @@ -205,83 +448,39 @@ static int ucstrncmp(const QChar *a, const QChar *b, int l) l); } #endif // __mips_dsp - while (l-- && *a == *b) - a++,b++; - if (l==-1) - return 0; - return a->unicode() - b->unicode(); -} - -// Unicode case-sensitive comparison -static int ucstrcmp(const QChar *a, int alen, const QChar *b, int blen) -{ - if (a == b && alen == blen) +#ifdef __SSE2__ + const char *ptr = reinterpret_cast<const char*>(a); + qptrdiff distance = reinterpret_cast<const char*>(b) - ptr; + a += l & ~7; + b += l & ~7; + l &= 7; + + // we're going to read ptr[0..15] (16 bytes) + for ( ; ptr + 15 < reinterpret_cast<const char *>(a); ptr += 16) { + __m128i a_data = _mm_loadu_si128((__m128i*)ptr); + __m128i b_data = _mm_loadu_si128((__m128i*)(ptr + distance)); + __m128i result = _mm_cmpeq_epi16(a_data, b_data); + uint mask = ~_mm_movemask_epi8(result); + if (ushort(mask)) { + // found a different byte + uint idx = uint(_bit_scan_forward(mask)); + return reinterpret_cast<const QChar *>(ptr + idx)->unicode() + - reinterpret_cast<const QChar *>(ptr + distance + idx)->unicode(); + } + } +# ifdef Q_COMPILER_LAMBDA + const auto &lambda = [=](int i) -> int { + return reinterpret_cast<const QChar *>(ptr)[i].unicode() + - reinterpret_cast<const QChar *>(ptr + distance)[i].unicode(); + }; + return UnrollTailLoop<7>::exec(l, 0, lambda, lambda); +# endif +#endif + if (!l) return 0; - int l = qMin(alen, blen); - int cmp = ucstrncmp(a, b, l); - return cmp ? cmp : (alen-blen); -} - -// Unicode case-insensitive compare two same-sized strings -static int ucstrnicmp(const ushort *a, const ushort *b, int l) -{ - return ucstricmp(a, a + l, b, b + l); -} - -// Benchmarking indicates that doing memcmp is much slower than -// executing the comparison ourselves. -// -// The profiling was done on a population of calls to qMemEquals, generated -// during a run of the demo browser. The profile of the data (32-bit x86 -// Linux) was: -// -// total number of comparisons: 21353 -// longest string compared: 95 -// average comparison length: 14.8786 -// cache-line crosses: 5661 (13.3%) -// alignment histogram: -// 0xXXX0 = 512 (1.2%) strings, 0 (0.0%) of which same-aligned -// 0xXXX2 = 15087 (35.3%) strings, 5145 (34.1%) of which same-aligned -// 0xXXX4 = 525 (1.2%) strings, 0 (0.0%) of which same-aligned -// 0xXXX6 = 557 (1.3%) strings, 6 (1.1%) of which same-aligned -// 0xXXX8 = 509 (1.2%) strings, 0 (0.0%) of which same-aligned -// 0xXXXa = 24358 (57.0%) strings, 9901 (40.6%) of which same-aligned -// 0xXXXc = 557 (1.3%) strings, 0 (0.0%) of which same-aligned -// 0xXXXe = 601 (1.4%) strings, 15 (2.5%) of which same-aligned -// total = 42706 (100%) strings, 15067 (35.3%) of which same-aligned -// -// 92% of the strings have alignment of 2 or 10, which is due to malloc on -// 32-bit Linux returning values aligned to 8 bytes, and offsetof(array, QString::Data) == 18. -// -// The profile on 64-bit will be different since offsetof(array, QString::Data) == 26. -// -// The benchmark results were, for a Core-i7 @ 2.67 GHz 32-bit, compiled with -O3 -funroll-loops: -// 16-bit loads only: 872,301 CPU ticks [Qt 4.5 / memcmp] -// 32- and 16-bit loads: 773,362 CPU ticks [Qt 4.6] -// SSE2 "movdqu" 128-bit loads: 618,736 CPU ticks -// SSE3 "lddqu" 128-bit loads: 619,954 CPU ticks -// SSSE3 "palignr" corrections: 852,147 CPU ticks -// SSE4.2 "pcmpestrm": 738,702 CPU ticks -// -// The same benchmark on an Atom N450 @ 1.66 GHz, is: -// 16-bit loads only: 2,185,882 CPU ticks -// 32- and 16-bit loads: 1,805,060 CPU ticks -// SSE2 "movdqu" 128-bit loads: 2,529,843 CPU ticks -// SSE3 "lddqu" 128-bit loads: 2,514,858 CPU ticks -// SSSE3 "palignr" corrections: 2,160,325 CPU ticks -// SSE4.2 not available -// -// The conclusion we reach is that alignment the SSE2 unaligned code can gain -// 20% improvement in performance in some systems, but suffers a penalty due -// to the unaligned loads on others. - -static bool qMemEquals(const quint16 *a, const quint16 *b, int length) -{ - if (a == b || !length) - return true; union { - const quint16 *w; + const QChar *w; const quint32 *d; quintptr value; } sa, sb; @@ -295,8 +494,8 @@ static bool qMemEquals(const quint16 *a, const quint16 *b, int length) // both addresses are not aligned to 4-bytes boundaries // compare the first character if (*sa.w != *sb.w) - return false; - --length; + return sa.w->unicode() - sb.w->unicode(); + --l; ++sa.w; ++sb.w; @@ -305,23 +504,128 @@ static bool qMemEquals(const quint16 *a, const quint16 *b, int length) // both addresses are 4-bytes aligned // do a fast 32-bit comparison - const quint32 *e = sa.d + (length >> 1); + const quint32 *e = sa.d + (l >> 1); for ( ; sa.d != e; ++sa.d, ++sb.d) { - if (*sa.d != *sb.d) - return false; + if (*sa.d != *sb.d) { + if (*sa.w != *sb.w) + return sa.w->unicode() - sb.w->unicode(); + return sa.w[1].unicode() - sb.w[1].unicode(); + } } // do we have a tail? - return (length & 1) ? *sa.w == *sb.w : true; + return (l & 1) ? sa.w->unicode() - sb.w->unicode() : 0; } else { // one of the addresses isn't 4-byte aligned but the other is - const quint16 *e = sa.w + length; + const QChar *e = sa.w + l; for ( ; sa.w != e; ++sa.w, ++sb.w) { if (*sa.w != *sb.w) - return false; + return sa.w->unicode() - sb.w->unicode(); } } - return true; + return 0; +} + +static int ucstrncmp(const QChar *a, const uchar *c, int l) +{ + const ushort *uc = reinterpret_cast<const ushort *>(a); + const ushort *e = uc + l; + +#ifdef __SSE2__ + __m128i nullmask = _mm_setzero_si128(); + qptrdiff offset = 0; + + // we're going to read uc[offset..offset+15] (32 bytes) + // and c[offset..offset+15] (16 bytes) + for ( ; uc + offset + 15 < e; offset += 16) { + // similar to fromLatin1_helper: + // load Latin 1 data and expand to UTF-16 + __m128i chunk = _mm_loadu_si128((__m128i*)(c + offset)); + __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullmask); + __m128i secondHalf = _mm_unpackhi_epi8(chunk, nullmask); + + // load UTF-16 data and compare + __m128i ucdata1 = _mm_loadu_si128((__m128i*)(uc + offset)); + __m128i ucdata2 = _mm_loadu_si128((__m128i*)(uc + offset + 8)); + __m128i result1 = _mm_cmpeq_epi16(firstHalf, ucdata1); + __m128i result2 = _mm_cmpeq_epi16(secondHalf, ucdata2); + + uint mask = ~(_mm_movemask_epi8(result1) | _mm_movemask_epi8(result2) << 16); + if (mask) { + // found a different character + uint idx = uint(_bit_scan_forward(mask)); + return uc[offset + idx / 2] - c[offset + idx / 2]; + } + } + + // we'll read uc[offset..offset+7] (16 bytes) and c[offset-8..offset+7] (16 bytes) + if (uc + offset + 7 < e) { + // same, but we'll throw away half the data + __m128i chunk = _mm_loadu_si128((__m128i*)(c + offset - 8)); + __m128i secondHalf = _mm_unpackhi_epi8(chunk, nullmask); + + __m128i ucdata = _mm_loadu_si128((__m128i*)(uc + offset)); + __m128i result = _mm_cmpeq_epi16(secondHalf, ucdata); + uint mask = ~_mm_movemask_epi8(result); + if (ushort(mask)) { + // found a different character + uint idx = uint(_bit_scan_forward(mask)); + return uc[offset + idx / 2] - c[offset + idx / 2]; + } + + // still matched + offset += 8; + } + + // reset uc and c + uc += offset; + c += offset; + +# ifdef Q_COMPILER_LAMBDA + const auto &lambda = [=](int i) { return uc[i] - ushort(c[i]); }; + return UnrollTailLoop<7>::exec(e - uc, 0, lambda, lambda); +# endif +#endif + + while (uc < e) { + int diff = *uc - *c; + if (diff) + return diff; + uc++, c++; + } + + return 0; +} + +// Unicode case-sensitive comparison +static int ucstrcmp(const QChar *a, int alen, const QChar *b, int blen) +{ + if (a == b && alen == blen) + return 0; + int l = qMin(alen, blen); + int cmp = ucstrncmp(a, b, l); + return cmp ? cmp : (alen-blen); +} + +// Unicode case-insensitive compare two same-sized strings +static int ucstrnicmp(const ushort *a, const ushort *b, int l) +{ + return ucstricmp(a, a + l, b, b + l); +} + +static bool qMemEquals(const quint16 *a, const quint16 *b, int length) +{ + if (a == b || !length) + return true; + + return ucstrncmp(reinterpret_cast<const QChar *>(a), reinterpret_cast<const QChar *>(b), length) == 0; +} + +static int ucstrcmp(const QChar *a, int alen, const uchar *b, int blen) +{ + int l = qMin(alen, blen); + int cmp = ucstrncmp(a, b, l); + return cmp ? cmp : (alen-blen); } /*! @@ -340,14 +644,38 @@ static int findChar(const QChar *str, int len, QChar ch, int from, if (from < 0) from = qMax(from + len, 0); if (from < len) { - const ushort *n = s + from - 1; + const ushort *n = s + from; const ushort *e = s + len; if (cs == Qt::CaseSensitive) { +#ifdef __SSE2__ + __m128i mch = _mm_set1_epi32(c | (c << 16)); + + // we're going to read n[0..7] (16 bytes) + for (const ushort *next = n + 8; next <= e; n = next, next += 8) { + __m128i data = _mm_loadu_si128((__m128i*)n); + __m128i result = _mm_cmpeq_epi16(data, mch); + uint mask = _mm_movemask_epi8(result); + if (ushort(mask)) { + // found a match + // same as: return n - s + _bit_scan_forward(mask) / 2 + return (reinterpret_cast<const char *>(n) - reinterpret_cast<const char *>(s) + + _bit_scan_forward(mask)) >> 1; + } + } + +# ifdef Q_COMPILER_LAMBDA + return UnrollTailLoop<7>::exec(e - n, -1, + [=](int i) { return n[i] == c; }, + [=](int i) { return n - s + i; }); +# endif +#endif + --n; while (++n != e) if (*n == c) return n - s; } else { c = foldCase(c); + --n; while (++n != e) if (foldCase(*n) == c) return n - s; @@ -1014,21 +1342,13 @@ const QString::Null QString::null = { }; int QString::toUcs4_helper(const ushort *uc, int length, uint *out) { - int i = 0; - const ushort *const e = uc + length; - while (uc < e) { - uint u = *uc; - if (QChar::isHighSurrogate(u) && uc + 1 < e) { - ushort low = uc[1]; - if (QChar::isLowSurrogate(low)) { - ++uc; - u = QChar::surrogateToUcs4(u, low); - } - } - out[i++] = u; - ++uc; - } - return i; + int count = 0; + + QStringIterator i(reinterpret_cast<const QChar *>(uc), reinterpret_cast<const QChar *>(uc + length)); + while (i.hasNext()) + out[count++] = i.next(); + + return count; } /*! \fn int QString::toWCharArray(wchar_t *array) const @@ -1463,7 +1783,7 @@ QString &QString::operator=(QChar ch) */ QString &QString::insert(int i, QLatin1String str) { - const uchar *s = (const uchar *)str.latin1(); + const char *s = str.latin1(); if (i < 0 || !s || !(*s)) return *this; @@ -1471,8 +1791,7 @@ QString &QString::insert(int i, QLatin1String str) expand(qMax(d->size, i) + len - 1); ::memmove(d->data() + i + len, d->data() + i, (d->size - i - len) * sizeof(QChar)); - for (int j = 0; j < len; ++j) - d->data()[i + j] = s[j]; + qt_from_latin1(d->data() + i, s, uint(len)); return *this; } @@ -1584,14 +1903,14 @@ QString &QString::append(const QChar *str, int len) */ QString &QString::append(QLatin1String str) { - const uchar *s = (const uchar *)str.latin1(); + const char *s = str.latin1(); if (s) { int len = str.size(); if (d->ref.isShared() || uint(d->size + len) + 1u > d->alloc) reallocData(uint(d->size + len) + 1u, true); ushort *i = d->data() + d->size; - while ((*i++ = *s++)) - ; + qt_from_latin1(i, s, uint(len)); + i[len] = '\0'; d->size += len; } return *this; @@ -2098,13 +2417,11 @@ QString& QString::replace(QChar before, QChar after, Qt::CaseSensitivity cs) QString &QString::replace(QLatin1String before, QLatin1String after, Qt::CaseSensitivity cs) { int alen = after.size(); - QVarLengthArray<ushort> a(alen); - for (int i = 0; i < alen; ++i) - a[i] = (uchar)after.latin1()[i]; int blen = before.size(); + QVarLengthArray<ushort> a(alen); QVarLengthArray<ushort> b(blen); - for (int i = 0; i < blen; ++i) - b[i] = (uchar)before.latin1()[i]; + qt_from_latin1(a.data(), after.latin1(), alen); + qt_from_latin1(b.data(), before.latin1(), blen); return replace((const QChar *)b.data(), blen, (const QChar *)a.data(), alen, cs); } @@ -2124,8 +2441,7 @@ QString &QString::replace(QLatin1String before, const QString &after, Qt::CaseSe { int blen = before.size(); QVarLengthArray<ushort> b(blen); - for (int i = 0; i < blen; ++i) - b[i] = (uchar)before.latin1()[i]; + qt_from_latin1(b.data(), before.latin1(), blen); return replace((const QChar *)b.data(), blen, after.constData(), after.d->size, cs); } @@ -2145,8 +2461,7 @@ QString &QString::replace(const QString &before, QLatin1String after, Qt::CaseSe { int alen = after.size(); QVarLengthArray<ushort> a(alen); - for (int i = 0; i < alen; ++i) - a[i] = (uchar)after.latin1()[i]; + qt_from_latin1(a.data(), after.latin1(), alen); return replace(before.constData(), before.d->size, (const QChar *)a.data(), alen, cs); } @@ -2166,8 +2481,7 @@ QString &QString::replace(QChar c, QLatin1String after, Qt::CaseSensitivity cs) { int alen = after.size(); QVarLengthArray<ushort> a(alen); - for (int i = 0; i < alen; ++i) - a[i] = (uchar)after.latin1()[i]; + qt_from_latin1(a.data(), after.latin1(), alen); return replace(&c, 1, (const QChar *)a.data(), alen, cs); } @@ -2201,17 +2515,7 @@ bool QString::operator==(QLatin1String other) const if (!other.size()) return isEmpty(); - const ushort *uc = d->data(); - const ushort *e = uc + d->size; - const uchar *c = (uchar *)other.latin1(); - - while (uc < e) { - if (*uc != *c) - return false; - ++uc; - ++c; - } - return true; + return compare_helper(data(), size(), other, Qt::CaseSensitive) == 0; } /*! \fn bool QString::operator==(const QByteArray &other) const @@ -2265,16 +2569,7 @@ bool QString::operator<(QLatin1String other) const if (!c || *c == 0) return false; - const ushort *uc = d->data(); - const ushort *e = uc + qMin(d->size, other.size()); - - while (uc < e) { - if (*uc != *c) - break; - ++uc; - ++c; - } - return (uc == e ? d->size < other.size() : *uc < *c); + return compare_helper(data(), size(), other, Qt::CaseSensitive) < 0; } /*! \fn bool QString::operator<(const QByteArray &other) const @@ -2367,16 +2662,7 @@ bool QString::operator>(QLatin1String other) const if (!c || *c == '\0') return !isEmpty(); - const ushort *uc = d->data(); - const ushort *e = uc + qMin(d->size, other.size()); - - while (uc < e) { - if (*uc != *c) - break; - ++uc; - ++c; - } - return (uc == e) ? d->size > other.size() : *uc > *c; + return compare_helper(data(), size(), other, Qt::CaseSensitive) > 0; } /*! \fn bool QString::operator>(const QByteArray &other) const @@ -2763,8 +3049,7 @@ int QString::lastIndexOf(QLatin1String str, int from, Qt::CaseSensitivity cs) co from = delta; QVarLengthArray<ushort> s(sl); - for (int i = 0; i < sl; ++i) - s[i] = str.latin1()[i]; + qt_from_latin1(s.data(), str.latin1(), sl); return lastIndexOfHelper(d->data(), from, s.data(), sl, cs); } @@ -3172,6 +3457,15 @@ int QString::count(const QStringRef &str, Qt::CaseSensitivity cs) const \sa indexOf(), count() */ +/*! \fn bool QString::contains(QLatin1String str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const + \since 5.3 + + \overload contains() + + Returns \c true if this string contains an occurrence of the latin-1 string + \a str; otherwise returns \c false. +*/ + /*! \fn bool QString::contains(QChar ch, Qt::CaseSensitivity cs = Qt::CaseSensitive) const \overload contains() @@ -3895,131 +4189,58 @@ bool QString::endsWith(QChar c, Qt::CaseSensitivity cs) const : foldCase(d->data()[d->size - 1]) == foldCase(c.unicode())); } - -#if defined(__SSE2__) -static inline __m128i mergeQuestionMarks(__m128i chunk) +QByteArray QString::toLatin1_helper(const QString &string) { - const __m128i questionMark = _mm_set1_epi16('?'); - -# ifdef __SSE4_2__ - // compare the unsigned shorts for the range 0x0100-0xFFFF - // note on the use of _mm_cmpestrm: - // The MSDN documentation online (http://technet.microsoft.com/en-us/library/bb514080.aspx) - // says for range search the following: - // For each character c in a, determine whether b0 <= c <= b1 or b2 <= c <= b3 - // - // However, all examples on the Internet, including from Intel - // (see http://software.intel.com/en-us/articles/xml-parsing-accelerator-with-intel-streaming-simd-extensions-4-intel-sse4/) - // put the range to be searched first - // - // Disassembly and instruction-level debugging with GCC and ICC show - // that they are doing the right thing. Inverting the arguments in the - // instruction does cause a bunch of test failures. - - const int mode = _SIDD_UWORD_OPS | _SIDD_CMP_RANGES | _SIDD_UNIT_MASK; - const __m128i rangeMatch = _mm_cvtsi32_si128(0xffff0100); - const __m128i offLimitMask = _mm_cmpestrm(rangeMatch, 2, chunk, 8, mode); - - // replace the non-Latin 1 characters in the chunk with question marks - chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask); -# else - // SSE has no compare instruction for unsigned comparison. - // The variables must be shiffted + 0x8000 to be compared - const __m128i signedBitOffset = _mm_set1_epi16(short(0x8000)); - const __m128i thresholdMask = _mm_set1_epi16(short(0xff + 0x8000)); - - const __m128i signedChunk = _mm_add_epi16(chunk, signedBitOffset); - const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask); + if (Q_UNLIKELY(string.isNull())) + return QByteArray(); -# ifdef __SSE4_1__ - // replace the non-Latin 1 characters in the chunk with question marks - chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask); -# else - // offLimitQuestionMark contains '?' for each 16 bits that was off-limit - // the 16 bits that were correct contains zeros - const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark); + return toLatin1_helper(string.constData(), string.length()); +} - // correctBytes contains the bytes that were in limit - // the 16 bits that were off limits contains zeros - const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk); +QByteArray QString::toLatin1_helper(const QChar *data, int length) +{ + QByteArray ba(length, Qt::Uninitialized); - // merge offLimitQuestionMark and correctBytes to have the result - chunk = _mm_or_si128(correctBytes, offLimitQuestionMark); -# endif -# endif - return chunk; + // since we own the only copy, we're going to const_cast the constData; + // that avoids an unnecessary call to detach() and expansion code that will never get used + qt_to_latin1(reinterpret_cast<uchar *>(const_cast<char *>(ba.constData())), + reinterpret_cast<const ushort *>(data), length); + return ba; } -#endif - -#if defined(__mips_dsp) -extern "C" void qt_toLatin1_mips_dsp_asm(uchar *dst, const ushort *src, int length); -#endif -static QByteArray toLatin1_helper(const QChar *data, int length) +QByteArray QString::toLatin1_helper_inplace(QString &s) { - QByteArray ba; - if (length) { - ba.resize(length); - const ushort *src = reinterpret_cast<const ushort *>(data); - uchar *dst = (uchar*) ba.data(); -#if defined(__SSE2__) - if (length >= 16) { - const int chunkCount = length >> 4; // divided by 16 + if (!s.isDetached()) + return s.toLatin1(); - for (int i = 0; i < chunkCount; ++i) { - __m128i chunk1 = _mm_loadu_si128((__m128i*)src); // load - chunk1 = mergeQuestionMarks(chunk1); - src += 8; + // We can return our own buffer to the caller. + // Conversion to Latin-1 always shrinks the buffer by half. + const ushort *data = reinterpret_cast<const ushort *>(s.constData()); + uint length = s.size(); - __m128i chunk2 = _mm_loadu_si128((__m128i*)src); // load - chunk2 = mergeQuestionMarks(chunk2); - src += 8; + // Swap the d pointers. + // Kids, avert your eyes. Don't try this at home. + QArrayData *ba_d = s.d; - // pack the two vector to 16 x 8bits elements - const __m128i result = _mm_packus_epi16(chunk1, chunk2); + // multiply the allocated capacity by sizeof(ushort) + ba_d->alloc *= sizeof(ushort); - _mm_storeu_si128((__m128i*)dst, result); // store - dst += 16; - } - length = length % 16; - } -#elif defined(__ARM_NEON__) - // Refer to the documentation of the SSE2 implementation - // this use eactly the same method as for SSE except: - // 1) neon has unsigned comparison - // 2) packing is done to 64 bits (8 x 8bits component). - if (length >= 16) { - const int chunkCount = length >> 3; // divided by 8 - const uint16x8_t questionMark = vdupq_n_u16('?'); // set - const uint16x8_t thresholdMask = vdupq_n_u16(0xff); // set - for (int i = 0; i < chunkCount; ++i) { - uint16x8_t chunk = vld1q_u16((uint16_t *)src); // load - src += 8; - - const uint16x8_t offLimitMask = vcgtq_u16(chunk, thresholdMask); // chunk > thresholdMask - const uint16x8_t offLimitQuestionMark = vandq_u16(offLimitMask, questionMark); // offLimitMask & questionMark - const uint16x8_t correctBytes = vbicq_u16(chunk, offLimitMask); // !offLimitMask & chunk - chunk = vorrq_u16(correctBytes, offLimitQuestionMark); // correctBytes | offLimitQuestionMark - const uint8x8_t result = vmovn_u16(chunk); // narrowing move->packing - vst1_u8(dst, result); // store - dst += 8; - } - length = length % 8; - } -#endif -#if defined(__mips_dsp) - qt_toLatin1_mips_dsp_asm(dst, src, length); -#else - while (length--) { - *dst++ = (*src>0xff) ? '?' : (uchar) *src; - ++src; - } -#endif - } - return ba; + // reset ourselves to QString() + s.d = QString().d; + + // do the in-place conversion + uchar *dst = reinterpret_cast<uchar *>(ba_d->data()); + qt_to_latin1(dst, data, length); + dst[length] = '\0'; + + QByteArrayDataPtr badptr = { ba_d }; + return QByteArray(badptr); } + /*! + \fn QByteArray QString::toLatin1() const + Returns a Latin-1 representation of the string as a QByteArray. The returned byte array is undefined if the string contains non-Latin1 @@ -4028,10 +4249,6 @@ static QByteArray toLatin1_helper(const QChar *data, int length) \sa fromLatin1(), toUtf8(), toLocal8Bit(), QTextCodec */ -QByteArray QString::toLatin1() const -{ - return toLatin1_helper(unicode(), length()); -} /*! \fn QByteArray QString::toAscii() const @@ -4046,19 +4263,9 @@ QByteArray QString::toLatin1() const \sa fromAscii(), toLatin1(), toUtf8(), toLocal8Bit(), QTextCodec */ -#if !defined(Q_OS_MAC) && defined(Q_OS_UNIX) && !defined(QT_USE_ICU) -static QByteArray toLocal8Bit_helper(const QChar *data, int length) -{ -#ifndef QT_NO_TEXTCODEC - QTextCodec *localeCodec = QTextCodec::codecForLocale(); - if (localeCodec) - return localeCodec->fromUnicode(data, length); -#endif // QT_NO_TEXTCODEC - return toLatin1_helper(data, length); -} -#endif - /*! + \fn QByteArray QString::toLocal8Bit() const + Returns the local 8-bit representation of the string as a QByteArray. The returned byte array is undefined if the string contains characters not supported by the local 8-bit encoding. @@ -4073,17 +4280,21 @@ static QByteArray toLocal8Bit_helper(const QChar *data, int length) \sa fromLocal8Bit(), toLatin1(), toUtf8(), QTextCodec */ -QByteArray QString::toLocal8Bit() const + +QByteArray QString::toLocal8Bit_helper(const QChar *data, int size) { #ifndef QT_NO_TEXTCODEC QTextCodec *localeCodec = QTextCodec::codecForLocale(); if (localeCodec) - return localeCodec->fromUnicode(*this); + return localeCodec->fromUnicode(data, size); #endif // QT_NO_TEXTCODEC - return toLatin1(); + return toLatin1_helper(data, size); } + /*! + \fn QByteArray QString::toUtf8() const + Returns a UTF-8 representation of the string as a QByteArray. UTF-8 is a Unicode codec and can represent all characters in a Unicode @@ -4099,12 +4310,13 @@ QByteArray QString::toLocal8Bit() const \sa fromUtf8(), toLatin1(), toLocal8Bit(), QTextCodec */ -QByteArray QString::toUtf8() const + +QByteArray QString::toUtf8_helper(const QString &str) { - if (isNull()) + if (str.isNull()) return QByteArray(); - return QUtf8::convertFromUnicode(constData(), length(), 0); + return QUtf8::convertFromUnicode(str.constData(), str.length()); } /*! @@ -4112,8 +4324,12 @@ QByteArray QString::toUtf8() const Returns a UCS-4/UTF-32 representation of the string as a QVector<uint>. - UCS-4 is a Unicode codec and is lossless. All characters from this string - can be encoded in UCS-4. The vector is not null terminated. + UCS-4 is a Unicode codec and therefore it is lossless. All characters from + this string will be encoded in UCS-4. Any invalid sequence of code units in + this string is replaced by the Unicode's replacement character + (QChar::ReplacementCharacter, which corresponds to \c{U+FFFD}). + + The returned vector is not NUL terminated. \sa fromUtf8(), toUtf8(), toLatin1(), toLocal8Bit(), QTextCodec, fromUcs4(), toWCharArray() */ @@ -4126,12 +4342,6 @@ QVector<uint> QString::toUcs4() const return v; } -#if defined(__mips_dsp) -// From qstring_mips_dsp_asm.S -extern "C" void qt_fromlatin1_mips_asm_unroll4 (ushort*, const char*, uint); -extern "C" void qt_fromlatin1_mips_asm_unroll8 (ushort*, const char*, uint); -#endif - QString::Data *QString::fromLatin1_helper(const char *str, int size) { Data *d; @@ -4147,40 +4357,8 @@ QString::Data *QString::fromLatin1_helper(const char *str, int size) d->size = size; d->data()[size] = '\0'; ushort *dst = d->data(); - /* SIMD: - * Unpacking with SSE has been shown to improve performance on recent CPUs - * The same method gives no improvement with NEON. - */ -#if defined(__SSE2__) - if (size >= 16) { - int chunkCount = size >> 4; // divided by 16 - const __m128i nullMask = _mm_set1_epi32(0); - for (int i = 0; i < chunkCount; ++i) { - const __m128i chunk = _mm_loadu_si128((__m128i*)str); // load - str += 16; - - // unpack the first 8 bytes, padding with zeros - const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask); - _mm_storeu_si128((__m128i*)dst, firstHalf); // store - dst += 8; - - // unpack the last 8 bytes, padding with zeros - const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask); - _mm_storeu_si128((__m128i*)dst, secondHalf); // store - dst += 8; - } - size = size % 16; - } -#endif -#if defined(__mips_dsp) - if (size > 20) - qt_fromlatin1_mips_asm_unroll8(dst, str, size); - else - qt_fromlatin1_mips_asm_unroll4(dst, str, size); -#else - while (size--) - *dst++ = (uchar)*str++; -#endif + + qt_from_latin1(dst, str, uint(size)); } return d; } @@ -4305,7 +4483,7 @@ QString QString::fromUtf8_helper(const char *str, int size) return QString(); Q_ASSERT(size != -1); - return QUtf8::convertToUnicode(str, size, 0); + return QUtf8::convertToUnicode(str, size); } /*! @@ -5039,22 +5217,7 @@ int QString::compare_helper(const QChar *data1, int length1, QLatin1String s2, return length1; if (cs == Qt::CaseSensitive) { - const ushort *e = uc + length1; - if (s2.size() < length1) - e = uc + s2.size(); - while (uc < e) { - int diff = *uc - *c; - if (diff) - return diff; - uc++, c++; - } - - if (uc == uce) { - if (c == (const uchar *)s2.latin1() + s2.size()) - return 0; - return -1; - } - return 1; + return ucstrcmp(data1, length1, c, s2.size()); } else { return ucstricmp(uc, uce, c, c + s2.size()); } @@ -5144,7 +5307,11 @@ int QString::localeAwareCompare_helper(const QChar *data1, int length1, return ucstrcmp(data1, length1, data2, length2); #if defined(Q_OS_WIN32) || defined(Q_OS_WINCE) +#ifndef Q_OS_WINRT int res = CompareString(GetUserDefaultLCID(), 0, (wchar_t*)data1, length1, (wchar_t*)data2, length2); +#else + int res = CompareStringEx(LOCALE_NAME_USER_DEFAULT, 0, (LPCWSTR)data1, length1, (LPCWSTR)data2, length2, NULL, NULL, 0); +#endif switch (res) { case CSTR_LESS_THAN: @@ -5553,8 +5720,6 @@ QString &QString::sprintf(const char *cformat, ...) QString &QString::vsprintf(const char* cformat, va_list ap) { - const QLocale locale(QLocale::C); - if (!cformat || !*cformat) { // Qt 1.x compat *this = fromLatin1(""); @@ -5594,12 +5759,12 @@ QString &QString::vsprintf(const char* cformat, va_list ap) bool no_more_flags = false; do { switch (*c) { - case '#': flags |= QLocalePrivate::Alternate; break; - case '0': flags |= QLocalePrivate::ZeroPadded; break; - case '-': flags |= QLocalePrivate::LeftAdjusted; break; - case ' ': flags |= QLocalePrivate::BlankBeforePositive; break; - case '+': flags |= QLocalePrivate::AlwaysShowSign; break; - case '\'': flags |= QLocalePrivate::ThousandsGroup; break; + case '#': flags |= QLocaleData::Alternate; break; + case '0': flags |= QLocaleData::ZeroPadded; break; + case '-': flags |= QLocaleData::LeftAdjusted; break; + case ' ': flags |= QLocaleData::BlankBeforePositive; break; + case '+': flags |= QLocaleData::AlwaysShowSign; break; + case '\'': flags |= QLocaleData::ThousandsGroup; break; default: no_more_flags = true; break; } @@ -5731,7 +5896,7 @@ QString &QString::vsprintf(const char* cformat, va_list ap) case lm_t: i = va_arg(ap, int); break; default: i = 0; break; } - subst = locale.d->longLongToString(i, precision, 10, width, flags); + subst = QLocaleData::c()->longLongToString(i, precision, 10, width, flags); ++c; break; } @@ -5751,7 +5916,7 @@ QString &QString::vsprintf(const char* cformat, va_list ap) } if (qIsUpper(*c)) - flags |= QLocalePrivate::CapitalEorX; + flags |= QLocaleData::CapitalEorX; int base = 10; switch (qToLower(*c)) { @@ -5763,7 +5928,7 @@ QString &QString::vsprintf(const char* cformat, va_list ap) base = 16; break; default: break; } - subst = locale.d->unsLongLongToString(u, precision, base, width, flags); + subst = QLocaleData::c()->unsLongLongToString(u, precision, base, width, flags); ++c; break; } @@ -5782,17 +5947,17 @@ QString &QString::vsprintf(const char* cformat, va_list ap) d = va_arg(ap, double); if (qIsUpper(*c)) - flags |= QLocalePrivate::CapitalEorX; + flags |= QLocaleData::CapitalEorX; - QLocalePrivate::DoubleForm form = QLocalePrivate::DFDecimal; + QLocaleData::DoubleForm form = QLocaleData::DFDecimal; switch (qToLower(*c)) { - case 'e': form = QLocalePrivate::DFExponent; break; + case 'e': form = QLocaleData::DFExponent; break; case 'a': // not supported - decimal form used instead - case 'f': form = QLocalePrivate::DFDecimal; break; - case 'g': form = QLocalePrivate::DFSignificantDigits; break; + case 'f': form = QLocaleData::DFDecimal; break; + case 'g': form = QLocaleData::DFSignificantDigits; break; default: break; } - subst = locale.d->doubleToString(d, precision, form, width, flags); + subst = QLocaleData::c()->doubleToString(d, precision, form, width, flags); ++c; break; } @@ -5825,8 +5990,8 @@ QString &QString::vsprintf(const char* cformat, va_list ap) #else quint64 i = reinterpret_cast<unsigned long>(arg); #endif - flags |= QLocalePrivate::Alternate; - subst = locale.d->unsLongLongToString(i, precision, 16, width, flags); + flags |= QLocaleData::Alternate; + subst = QLocaleData::c()->unsLongLongToString(i, precision, 16, width, flags); ++c; break; } @@ -5868,7 +6033,7 @@ QString &QString::vsprintf(const char* cformat, va_list ap) continue; } - if (flags & QLocalePrivate::LeftAdjusted) + if (flags & QLocaleData::LeftAdjusted) result.append(subst.leftJustified(width)); else result.append(subst.rightJustified(width)); @@ -5903,17 +6068,22 @@ QString &QString::vsprintf(const char* cformat, va_list ap) qint64 QString::toLongLong(bool *ok, int base) const { + return toIntegral_helper<qlonglong>(constData(), size(), ok, base); +} + +qlonglong QString::toIntegral_helper(const QChar *data, int len, bool *ok, int base) +{ #if defined(QT_CHECK_RANGE) if (base != 0 && (base < 2 || base > 36)) { - qWarning("QString::toLongLong: Invalid base (%d)", base); + qWarning("QString::toULongLong: Invalid base (%d)", base); base = 10; } #endif - QLocale c_locale(QLocale::C); - return c_locale.d->stringToLongLong(*this, base, ok, QLocalePrivate::FailOnGroupSeparators); + return QLocaleData::c()->stringToLongLong(data, len, base, ok, QLocaleData::FailOnGroupSeparators); } + /*! Returns the string converted to an \c{unsigned long long} using base \a base, which is 10 by default and must be between 2 and 36, or 0. @@ -5938,6 +6108,11 @@ qint64 QString::toLongLong(bool *ok, int base) const quint64 QString::toULongLong(bool *ok, int base) const { + return toIntegral_helper<qulonglong>(constData(), size(), ok, base); +} + +qulonglong QString::toIntegral_helper(const QChar *data, uint len, bool *ok, int base) +{ #if defined(QT_CHECK_RANGE) if (base != 0 && (base < 2 || base > 36)) { qWarning("QString::toULongLong: Invalid base (%d)", base); @@ -5945,8 +6120,7 @@ quint64 QString::toULongLong(bool *ok, int base) const } #endif - QLocale c_locale(QLocale::C); - return c_locale.d->stringToUnsLongLong(*this, base, ok, QLocalePrivate::FailOnGroupSeparators); + return QLocaleData::c()->stringToUnsLongLong(data, len, base, ok, QLocaleData::FailOnGroupSeparators); } /*! @@ -5975,13 +6149,7 @@ quint64 QString::toULongLong(bool *ok, int base) const long QString::toLong(bool *ok, int base) const { - qint64 v = toLongLong(ok, base); - if (v < LONG_MIN || v > LONG_MAX) { - if (ok) - *ok = false; - v = 0; - } - return (long)v; + return toIntegral_helper<long>(constData(), size(), ok, base); } /*! @@ -6010,13 +6178,7 @@ long QString::toLong(bool *ok, int base) const ulong QString::toULong(bool *ok, int base) const { - quint64 v = toULongLong(ok, base); - if (v > ULONG_MAX) { - if (ok) - *ok = false; - v = 0; - } - return (ulong)v; + return toIntegral_helper<ulong>(constData(), size(), ok, base); } @@ -6044,13 +6206,7 @@ ulong QString::toULong(bool *ok, int base) const int QString::toInt(bool *ok, int base) const { - qint64 v = toLongLong(ok, base); - if (v < INT_MIN || v > INT_MAX) { - if (ok) - *ok = false; - v = 0; - } - return v; + return toIntegral_helper<int>(constData(), size(), ok, base); } /*! @@ -6077,13 +6233,7 @@ int QString::toInt(bool *ok, int base) const uint QString::toUInt(bool *ok, int base) const { - quint64 v = toULongLong(ok, base); - if (v > UINT_MAX) { - if (ok) - *ok = false; - v = 0; - } - return (uint)v; + return toIntegral_helper<uint>(constData(), size(), ok, base); } /*! @@ -6110,13 +6260,7 @@ uint QString::toUInt(bool *ok, int base) const short QString::toShort(bool *ok, int base) const { - long v = toLongLong(ok, base); - if (v < SHRT_MIN || v > SHRT_MAX) { - if (ok) - *ok = false; - v = 0; - } - return (short)v; + return toIntegral_helper<short>(constData(), size(), ok, base); } /*! @@ -6143,13 +6287,7 @@ short QString::toShort(bool *ok, int base) const ushort QString::toUShort(bool *ok, int base) const { - ulong v = toULongLong(ok, base); - if (v > USHRT_MAX) { - if (ok) - *ok = false; - v = 0; - } - return (ushort)v; + return toIntegral_helper<ushort>(constData(), size(), ok, base); } @@ -6184,8 +6322,7 @@ ushort QString::toUShort(bool *ok, int base) const double QString::toDouble(bool *ok) const { - QLocale c_locale(QLocale::C); - return c_locale.d->stringToDouble(*this, ok, QLocalePrivate::FailOnGroupSeparators); + return QLocaleData::c()->stringToDouble(constData(), size(), ok, QLocaleData::FailOnGroupSeparators); } /*! @@ -6204,27 +6341,9 @@ double QString::toDouble(bool *ok) const \sa number(), toDouble(), toInt(), QLocale::toFloat() */ -#define QT_MAX_FLOAT 3.4028234663852886e+38 - float QString::toFloat(bool *ok) const { - bool myOk; - double d = toDouble(&myOk); - if (!myOk) { - if (ok != 0) - *ok = false; - return 0.0; - } - if (qIsInf(d)) - return float(d); - if (d > QT_MAX_FLOAT || d < -QT_MAX_FLOAT) { - if (ok != 0) - *ok = false; - return 0.0; - } - if (ok != 0) - *ok = true; - return float(d); + return QLocaleData::convertDoubleToFloat(toDouble(ok), ok); } /*! \fn QString &QString::setNum(int n, int base) @@ -6268,8 +6387,7 @@ QString &QString::setNum(qlonglong n, int base) base = 10; } #endif - QLocale locale(QLocale::C); - *this = locale.d->longLongToString(n, -1, base); + *this = QLocaleData::c()->longLongToString(n, -1, base); return *this; } @@ -6284,8 +6402,7 @@ QString &QString::setNum(qulonglong n, int base) base = 10; } #endif - QLocale locale(QLocale::C); - *this = locale.d->unsLongLongToString(n, -1, base); + *this = QLocaleData::c()->unsLongLongToString(n, -1, base); return *this; } @@ -6317,22 +6434,22 @@ QString &QString::setNum(qulonglong n, int base) QString &QString::setNum(double n, char f, int prec) { - QLocalePrivate::DoubleForm form = QLocalePrivate::DFDecimal; + QLocaleData::DoubleForm form = QLocaleData::DFDecimal; uint flags = 0; if (qIsUpper(f)) - flags = QLocalePrivate::CapitalEorX; + flags = QLocaleData::CapitalEorX; f = qToLower(f); switch (f) { case 'f': - form = QLocalePrivate::DFDecimal; + form = QLocaleData::DFDecimal; break; case 'e': - form = QLocalePrivate::DFExponent; + form = QLocaleData::DFExponent; break; case 'g': - form = QLocalePrivate::DFSignificantDigits; + form = QLocaleData::DFSignificantDigits; break; default: #if defined(QT_CHECK_RANGE) @@ -6341,8 +6458,7 @@ QString &QString::setNum(double n, char f, int prec) break; } - QLocale locale(QLocale::C); - *this = locale.d->doubleToString(n, prec, form, -1, flags); + *this = QLocaleData::c()->doubleToString(n, prec, form, -1, flags); return *this; } @@ -7140,20 +7256,20 @@ QString QString::arg(qlonglong a, int fieldWidth, int base, QChar fillChar) cons return *this; } - unsigned flags = QLocalePrivate::NoFlags; + unsigned flags = QLocaleData::NoFlags; if (fillChar == QLatin1Char('0')) - flags = QLocalePrivate::ZeroPadded; + flags = QLocaleData::ZeroPadded; QString arg; if (d.occurrences > d.locale_occurrences) - arg = QLocale::c().d->longLongToString(a, -1, base, fieldWidth, flags); + arg = QLocaleData::c()->longLongToString(a, -1, base, fieldWidth, flags); QString locale_arg; if (d.locale_occurrences > 0) { QLocale locale; if (!(locale.numberOptions() & QLocale::OmitGroupSeparator)) - flags |= QLocalePrivate::ThousandsGroup; - locale_arg = locale.d->longLongToString(a, -1, base, fieldWidth, flags); + flags |= QLocaleData::ThousandsGroup; + locale_arg = locale.d->m_data->longLongToString(a, -1, base, fieldWidth, flags); } return replaceArgEscapes(*this, d, fieldWidth, arg, locale_arg, fillChar); @@ -7184,20 +7300,20 @@ QString QString::arg(qulonglong a, int fieldWidth, int base, QChar fillChar) con return *this; } - unsigned flags = QLocalePrivate::NoFlags; + unsigned flags = QLocaleData::NoFlags; if (fillChar == QLatin1Char('0')) - flags = QLocalePrivate::ZeroPadded; + flags = QLocaleData::ZeroPadded; QString arg; if (d.occurrences > d.locale_occurrences) - arg = QLocale::c().d->unsLongLongToString(a, -1, base, fieldWidth, flags); + arg = QLocaleData::c()->unsLongLongToString(a, -1, base, fieldWidth, flags); QString locale_arg; if (d.locale_occurrences > 0) { QLocale locale; if (!(locale.numberOptions() & QLocale::OmitGroupSeparator)) - flags |= QLocalePrivate::ThousandsGroup; - locale_arg = locale.d->unsLongLongToString(a, -1, base, fieldWidth, flags); + flags |= QLocaleData::ThousandsGroup; + locale_arg = locale.d->m_data->unsLongLongToString(a, -1, base, fieldWidth, flags); } return replaceArgEscapes(*this, d, fieldWidth, arg, locale_arg, fillChar); @@ -7296,24 +7412,24 @@ QString QString::arg(double a, int fieldWidth, char fmt, int prec, QChar fillCha return *this; } - unsigned flags = QLocalePrivate::NoFlags; + unsigned flags = QLocaleData::NoFlags; if (fillChar == QLatin1Char('0')) - flags = QLocalePrivate::ZeroPadded; + flags = QLocaleData::ZeroPadded; if (qIsUpper(fmt)) - flags |= QLocalePrivate::CapitalEorX; + flags |= QLocaleData::CapitalEorX; fmt = qToLower(fmt); - QLocalePrivate::DoubleForm form = QLocalePrivate::DFDecimal; + QLocaleData::DoubleForm form = QLocaleData::DFDecimal; switch (fmt) { case 'f': - form = QLocalePrivate::DFDecimal; + form = QLocaleData::DFDecimal; break; case 'e': - form = QLocalePrivate::DFExponent; + form = QLocaleData::DFExponent; break; case 'g': - form = QLocalePrivate::DFSignificantDigits; + form = QLocaleData::DFSignificantDigits; break; default: #if defined(QT_CHECK_RANGE) @@ -7324,15 +7440,15 @@ QString QString::arg(double a, int fieldWidth, char fmt, int prec, QChar fillCha QString arg; if (d.occurrences > d.locale_occurrences) - arg = QLocale::c().d->doubleToString(a, prec, form, fieldWidth, flags); + arg = QLocaleData::c()->doubleToString(a, prec, form, fieldWidth, flags); QString locale_arg; if (d.locale_occurrences > 0) { QLocale locale; if (!(locale.numberOptions() & QLocale::OmitGroupSeparator)) - flags |= QLocalePrivate::ThousandsGroup; - locale_arg = locale.d->doubleToString(a, prec, form, fieldWidth, flags); + flags |= QLocaleData::ThousandsGroup; + locale_arg = locale.d->m_data->doubleToString(a, prec, form, fieldWidth, flags); } return replaceArgEscapes(*this, d, fieldWidth, arg, locale_arg, fillChar); @@ -8254,19 +8370,10 @@ bool operator==(QLatin1String s1, const QStringRef &s2) if (s1.size() != s2.size()) return false; - const ushort *uc = reinterpret_cast<const ushort *>(s2.unicode()); - const ushort *e = uc + s2.size(); const uchar *c = reinterpret_cast<const uchar *>(s1.latin1()); if (!c) return s2.isEmpty(); - - while (*c) { - if (uc == e || *uc != *c) - return false; - ++uc; - ++c; - } - return (uc == e); + return ucstrncmp(s2.unicode(), c, s2.size()) == 0; } /*! @@ -8854,8 +8961,7 @@ int QStringRef::lastIndexOf(QLatin1String str, int from, Qt::CaseSensitivity cs) from = delta; QVarLengthArray<ushort> s(sl); - for (int i = 0; i < sl; ++i) - s[i] = str.latin1()[i]; + qt_from_latin1(s.data(), str.latin1(), sl); return lastIndexOfHelper(reinterpret_cast<const ushort*>(unicode()), from, s.data(), sl, cs); } @@ -9193,8 +9299,7 @@ static inline int qt_find_latin1_string(const QChar *haystack, int size, const char *latin1 = needle.latin1(); int len = needle.size(); QVarLengthArray<ushort> s(len); - for (int i = 0; i < len; ++i) - s[i] = latin1[i]; + qt_from_latin1(s.data(), latin1, len); return qFindString(haystack, size, from, reinterpret_cast<const QChar*>(s.constData()), len, cs); @@ -9238,9 +9343,7 @@ static inline bool qt_starts_with(const QChar *haystack, int haystackLen, const ushort *data = reinterpret_cast<const ushort*>(haystack); const uchar *latin = reinterpret_cast<const uchar*>(needle.latin1()); if (cs == Qt::CaseSensitive) { - for (int i = 0; i < slen; ++i) - if (data[i] != latin[i]) - return false; + return ucstrncmp(haystack, latin, slen) == 0; } else { for (int i = 0; i < slen; ++i) if (foldCase(data[i]) != foldCase((ushort)latin[i])) @@ -9290,9 +9393,7 @@ static inline bool qt_ends_with(const QChar *haystack, int haystackLen, const uchar *latin = reinterpret_cast<const uchar*>(needle.latin1()); const ushort *data = reinterpret_cast<const ushort*>(haystack); if (cs == Qt::CaseSensitive) { - for (int i = 0; i < slen; i++) - if (data[pos+i] != latin[i]) - return false; + return ucstrncmp(haystack + pos, latin, slen) == 0; } else { for (int i = 0; i < slen; i++) if (foldCase(data[pos+i]) != foldCase((ushort)latin[i])) @@ -9314,7 +9415,7 @@ static inline bool qt_ends_with(const QChar *haystack, int haystackLen, */ QByteArray QStringRef::toLatin1() const { - return toLatin1_helper(unicode(), length()); + return QString::toLatin1_helper(unicode(), length()); } /*! @@ -9390,8 +9491,12 @@ QByteArray QStringRef::toUtf8() const Returns a UCS-4/UTF-32 representation of the string as a QVector<uint>. - UCS-4 is a Unicode codec and is lossless. All characters from this string - can be encoded in UCS-4. + UCS-4 is a Unicode codec and therefore it is lossless. All characters from + this string will be encoded in UCS-4. Any invalid sequence of code units in + this string is replaced by the Unicode's replacement character + (QChar::ReplacementCharacter, which corresponds to \c{U+FFFD}). + + The returned vector is not NUL terminated. \sa toUtf8(), toLatin1(), toLocal8Bit(), QTextCodec */ @@ -9458,15 +9563,7 @@ QStringRef QStringRef::trimmed() const qint64 QStringRef::toLongLong(bool *ok, int base) const { -#if defined(QT_CHECK_RANGE) - if (base != 0 && (base < 2 || base > 36)) { - qWarning("QString::toLongLong: Invalid base (%d)", base); - base = 10; - } -#endif - - QLocale c_locale(QLocale::C); - return c_locale.d->stringToLongLong(*this, base, ok, QLocalePrivate::FailOnGroupSeparators); + return QString::toIntegral_helper<qint64>(constData(), size(), ok, base); } /*! @@ -9491,15 +9588,7 @@ qint64 QStringRef::toLongLong(bool *ok, int base) const quint64 QStringRef::toULongLong(bool *ok, int base) const { -#if defined(QT_CHECK_RANGE) - if (base != 0 && (base < 2 || base > 36)) { - qWarning("QString::toULongLong: Invalid base (%d)", base); - base = 10; - } -#endif - - QLocale c_locale(QLocale::C); - return c_locale.d->stringToUnsLongLong(*this, base, ok, QLocalePrivate::FailOnGroupSeparators); + return QString::toIntegral_helper<quint64>(constData(), size(), ok, base); } /*! @@ -9526,13 +9615,7 @@ quint64 QStringRef::toULongLong(bool *ok, int base) const long QStringRef::toLong(bool *ok, int base) const { - qint64 v = toLongLong(ok, base); - if (v < LONG_MIN || v > LONG_MAX) { - if (ok) - *ok = false; - v = 0; - } - return long(v); + return QString::toIntegral_helper<long>(constData(), size(), ok, base); } /*! @@ -9559,13 +9642,7 @@ long QStringRef::toLong(bool *ok, int base) const ulong QStringRef::toULong(bool *ok, int base) const { - quint64 v = toULongLong(ok, base); - if (v > ULONG_MAX) { - if (ok) - *ok = false; - v = 0; - } - return ulong(v); + return QString::toIntegral_helper<ulong>(constData(), size(), ok, base); } @@ -9591,13 +9668,7 @@ ulong QStringRef::toULong(bool *ok, int base) const int QStringRef::toInt(bool *ok, int base) const { - qint64 v = toLongLong(ok, base); - if (v < INT_MIN || v > INT_MAX) { - if (ok) - *ok = false; - v = 0; - } - return int(v); + return QString::toIntegral_helper<int>(constData(), size(), ok, base); } /*! @@ -9622,13 +9693,7 @@ int QStringRef::toInt(bool *ok, int base) const uint QStringRef::toUInt(bool *ok, int base) const { - quint64 v = toULongLong(ok, base); - if (v > UINT_MAX) { - if (ok) - *ok = false; - v = 0; - } - return uint(v); + return QString::toIntegral_helper<uint>(constData(), size(), ok, base); } /*! @@ -9653,13 +9718,7 @@ uint QStringRef::toUInt(bool *ok, int base) const short QStringRef::toShort(bool *ok, int base) const { - long v = toLongLong(ok, base); - if (v < SHRT_MIN || v > SHRT_MAX) { - if (ok) - *ok = false; - v = 0; - } - return short(v); + return QString::toIntegral_helper<short>(constData(), size(), ok, base); } /*! @@ -9684,13 +9743,7 @@ short QStringRef::toShort(bool *ok, int base) const ushort QStringRef::toUShort(bool *ok, int base) const { - ulong v = toULongLong(ok, base); - if (v > USHRT_MAX) { - if (ok) - *ok = false; - v = 0; - } - return ushort(v); + return QString::toIntegral_helper<ushort>(constData(), size(), ok, base); } @@ -9716,8 +9769,7 @@ ushort QStringRef::toUShort(bool *ok, int base) const double QStringRef::toDouble(bool *ok) const { - QLocale c_locale(QLocale::C); - return c_locale.d->stringToDouble(*this, ok, QLocalePrivate::FailOnGroupSeparators); + return QLocaleData::c()->stringToDouble(constData(), size(), ok, QLocaleData::FailOnGroupSeparators); } /*! @@ -9736,23 +9788,7 @@ double QStringRef::toDouble(bool *ok) const float QStringRef::toFloat(bool *ok) const { - bool myOk; - double d = toDouble(&myOk); - if (!myOk) { - if (ok != 0) - *ok = false; - return 0.0; - } - if (qIsInf(d)) - return float(d); - if (d > QT_MAX_FLOAT || d < -QT_MAX_FLOAT) { - if (ok != 0) - *ok = false; - return 0.0; - } - if (ok) - *ok = true; - return float(d); + return QLocaleData::convertDoubleToFloat(toDouble(ok), ok); } /*! @@ -9848,4 +9884,13 @@ QString QString::toHtmlEscaped() const \endlist */ + +/*! + \internal + */ +void QAbstractConcatenable::appendLatin1To(const char *a, int len, QChar *out) +{ + qt_from_latin1(reinterpret_cast<ushort *>(out), a, uint(len)); +} + QT_END_NAMESPACE |