summaryrefslogtreecommitdiffstats
path: root/src/corelib/tools/qstring.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/tools/qstring.cpp')
-rw-r--r--src/corelib/tools/qstring.cpp825
1 files changed, 482 insertions, 343 deletions
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp
index 2c505ef033..f5e25f1de9 100644
--- a/src/corelib/tools/qstring.cpp
+++ b/src/corelib/tools/qstring.cpp
@@ -1,6 +1,7 @@
/****************************************************************************
**
** Copyright (C) 2013 Digia Plc and/or its subsidiary(-ies).
+** Copyright (C) 2013 Intel Corporation
** Contact: http://www.qt-project.org/legal
**
** This file is part of the QtCore module of the Qt Toolkit.
@@ -101,6 +102,43 @@
QT_BEGIN_NAMESPACE
+/*
+ * Note on the use of SIMD in qstring.cpp:
+ *
+ * Several operations with strings are improved with the use of SIMD code,
+ * since they are repetitive. For MIPS, we have hand-written assembly code
+ * outside of qstring.cpp targeting MIPS DSP and MIPS DSPr2. For ARM and for
+ * x86, we can only use intrinsics and therefore everything is contained in
+ * qstring.cpp. We need to use intrinsics only for those platforms due to the
+ * different compilers and toolchains used, which have different syntax for
+ * assembly sources.
+ *
+ * ** SSE notes: **
+ *
+ * Whenever multiple alternatives are equivalent or near so, we prefer the one
+ * using instructions from SSE2, since SSE2 is guaranteed to be enabled for all
+ * 64-bit builds and we enable it for 32-bit builds by default. Use of higher
+ * SSE versions should be done when there's a clear performance benefit and
+ * requires fallback code to SSE2, if it exists.
+ *
+ * Performance measurement in the past shows that most strings are short in
+ * size and, therefore, do not benefit from alignment prologues. That is,
+ * trying to find a 16-byte-aligned boundary to operate on is often more
+ * expensive than executing the unaligned operation directly. In addition, note
+ * that the QString private data is designed so that the data is stored on
+ * 16-byte boundaries if the system malloc() returns 16-byte aligned pointers
+ * on its own (64-bit glibc on Linux does; 32-bit glibc on Linux returns them
+ * 50% of the time), so skipping the alignment prologue is actually optimizing
+ * for the common case.
+ */
+
+#if defined(__mips_dsp)
+// From qstring_mips_dsp_asm.S
+extern "C" void qt_fromlatin1_mips_asm_unroll4 (ushort*, const char*, uint);
+extern "C" void qt_fromlatin1_mips_asm_unroll8 (ushort*, const char*, uint);
+extern "C" void qt_toLatin1_mips_dsp_asm(uchar *dst, const ushort *src, int length);
+#endif
+
// internal
int qFindString(const QChar *haystack, int haystackLen, int from,
const QChar *needle, int needleLen, Qt::CaseSensitivity cs);
@@ -124,6 +162,194 @@ static inline bool qt_ends_with(const QChar *haystack, int haystackLen,
static inline bool qt_ends_with(const QChar *haystack, int haystackLen,
QLatin1String needle, Qt::CaseSensitivity cs);
+#ifdef Q_COMPILER_LAMBDA
+namespace {
+template <uint MaxCount> struct UnrollTailLoop
+{
+ template <typename RetType, typename Functor1, typename Functor2>
+ static inline RetType exec(int count, RetType returnIfExited, Functor1 loopCheck, Functor2 returnIfFailed, int i = 0)
+ {
+ /* equivalent to:
+ * while (count--) {
+ * if (loopCheck(i))
+ * return returnIfFailed(i);
+ * }
+ * return returnIfExited;
+ */
+
+ if (!count)
+ return returnIfExited;
+
+ bool check = loopCheck(i);
+ if (check) {
+ const RetType &retval = returnIfFailed(i);
+ return retval;
+ }
+
+ return UnrollTailLoop<MaxCount - 1>::exec(count - 1, returnIfExited, loopCheck, returnIfFailed, i + 1);
+ }
+};
+template <> template <typename RetType, typename Functor1, typename Functor2>
+inline RetType UnrollTailLoop<0>::exec(int, RetType returnIfExited, Functor1, Functor2, int)
+{
+ return returnIfExited;
+}
+}
+#endif
+
+// conversion between Latin 1 and UTF-16
+static void qt_from_latin1(ushort *dst, const char *str, size_t size)
+{
+ /* SIMD:
+ * Unpacking with SSE has been shown to improve performance on recent CPUs
+ * The same method gives no improvement with NEON.
+ */
+#if defined(__SSE2__)
+ if (size >= 16) {
+ int chunkCount = size >> 4; // divided by 16
+ const __m128i nullMask = _mm_set1_epi32(0);
+ for (int i = 0; i < chunkCount; ++i) {
+ const __m128i chunk = _mm_loadu_si128((__m128i*)str); // load
+ str += 16;
+
+ // unpack the first 8 bytes, padding with zeros
+ const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask);
+ _mm_storeu_si128((__m128i*)dst, firstHalf); // store
+ dst += 8;
+
+ // unpack the last 8 bytes, padding with zeros
+ const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask);
+ _mm_storeu_si128((__m128i*)dst, secondHalf); // store
+ dst += 8;
+ }
+ size = size % 16;
+ }
+#endif
+#if defined(__mips_dsp)
+ if (size > 20)
+ qt_fromlatin1_mips_asm_unroll8(dst, str, size);
+ else
+ qt_fromlatin1_mips_asm_unroll4(dst, str, size);
+#else
+ while (size--)
+ *dst++ = (uchar)*str++;
+#endif
+}
+
+#if defined(__SSE2__)
+static inline __m128i mergeQuestionMarks(__m128i chunk)
+{
+ const __m128i questionMark = _mm_set1_epi16('?');
+
+# ifdef __SSE4_2__
+ // compare the unsigned shorts for the range 0x0100-0xFFFF
+ // note on the use of _mm_cmpestrm:
+ // The MSDN documentation online (http://technet.microsoft.com/en-us/library/bb514080.aspx)
+ // says for range search the following:
+ // For each character c in a, determine whether b0 <= c <= b1 or b2 <= c <= b3
+ //
+ // However, all examples on the Internet, including from Intel
+ // (see http://software.intel.com/en-us/articles/xml-parsing-accelerator-with-intel-streaming-simd-extensions-4-intel-sse4/)
+ // put the range to be searched first
+ //
+ // Disassembly and instruction-level debugging with GCC and ICC show
+ // that they are doing the right thing. Inverting the arguments in the
+ // instruction does cause a bunch of test failures.
+
+ const int mode = _SIDD_UWORD_OPS | _SIDD_CMP_RANGES | _SIDD_UNIT_MASK;
+ const __m128i rangeMatch = _mm_cvtsi32_si128(0xffff0100);
+ const __m128i offLimitMask = _mm_cmpestrm(rangeMatch, 2, chunk, 8, mode);
+
+ // replace the non-Latin 1 characters in the chunk with question marks
+ chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask);
+# else
+ // SSE has no compare instruction for unsigned comparison.
+ // The variables must be shiffted + 0x8000 to be compared
+ const __m128i signedBitOffset = _mm_set1_epi16(short(0x8000));
+ const __m128i thresholdMask = _mm_set1_epi16(short(0xff + 0x8000));
+
+ const __m128i signedChunk = _mm_add_epi16(chunk, signedBitOffset);
+ const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask);
+
+# ifdef __SSE4_1__
+ // replace the non-Latin 1 characters in the chunk with question marks
+ chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask);
+# else
+ // offLimitQuestionMark contains '?' for each 16 bits that was off-limit
+ // the 16 bits that were correct contains zeros
+ const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark);
+
+ // correctBytes contains the bytes that were in limit
+ // the 16 bits that were off limits contains zeros
+ const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk);
+
+ // merge offLimitQuestionMark and correctBytes to have the result
+ chunk = _mm_or_si128(correctBytes, offLimitQuestionMark);
+# endif
+# endif
+ return chunk;
+}
+#endif
+
+static void qt_to_latin1(uchar *dst, const ushort *src, int length)
+{
+ if (length) {
+#if defined(__SSE2__)
+ if (length >= 16) {
+ const int chunkCount = length >> 4; // divided by 16
+
+ for (int i = 0; i < chunkCount; ++i) {
+ __m128i chunk1 = _mm_loadu_si128((__m128i*)src); // load
+ chunk1 = mergeQuestionMarks(chunk1);
+ src += 8;
+
+ __m128i chunk2 = _mm_loadu_si128((__m128i*)src); // load
+ chunk2 = mergeQuestionMarks(chunk2);
+ src += 8;
+
+ // pack the two vector to 16 x 8bits elements
+ const __m128i result = _mm_packus_epi16(chunk1, chunk2);
+
+ _mm_storeu_si128((__m128i*)dst, result); // store
+ dst += 16;
+ }
+ length = length % 16;
+ }
+#elif defined(__ARM_NEON__)
+ // Refer to the documentation of the SSE2 implementation
+ // this use eactly the same method as for SSE except:
+ // 1) neon has unsigned comparison
+ // 2) packing is done to 64 bits (8 x 8bits component).
+ if (length >= 16) {
+ const int chunkCount = length >> 3; // divided by 8
+ const uint16x8_t questionMark = vdupq_n_u16('?'); // set
+ const uint16x8_t thresholdMask = vdupq_n_u16(0xff); // set
+ for (int i = 0; i < chunkCount; ++i) {
+ uint16x8_t chunk = vld1q_u16((uint16_t *)src); // load
+ src += 8;
+
+ const uint16x8_t offLimitMask = vcgtq_u16(chunk, thresholdMask); // chunk > thresholdMask
+ const uint16x8_t offLimitQuestionMark = vandq_u16(offLimitMask, questionMark); // offLimitMask & questionMark
+ const uint16x8_t correctBytes = vbicq_u16(chunk, offLimitMask); // !offLimitMask & chunk
+ chunk = vorrq_u16(correctBytes, offLimitQuestionMark); // correctBytes | offLimitQuestionMark
+ const uint8x8_t result = vmovn_u16(chunk); // narrowing move->packing
+ vst1_u8(dst, result); // store
+ dst += 8;
+ }
+ length = length % 8;
+ }
+#endif
+#if defined(__mips_dsp)
+ qt_toLatin1_mips_dsp_asm(dst, src, length);
+#else
+ while (length--) {
+ *dst++ = (*src>0xff) ? '?' : (uchar) *src;
+ ++src;
+ }
+#endif
+ }
+}
+
// Unicode case-insensitive comparison
static int ucstricmp(const ushort *a, const ushort *ae, const ushort *b, const ushort *be)
{
@@ -205,83 +431,39 @@ static int ucstrncmp(const QChar *a, const QChar *b, int l)
l);
}
#endif // __mips_dsp
- while (l-- && *a == *b)
- a++,b++;
- if (l==-1)
- return 0;
- return a->unicode() - b->unicode();
-}
-
-// Unicode case-sensitive comparison
-static int ucstrcmp(const QChar *a, int alen, const QChar *b, int blen)
-{
- if (a == b && alen == blen)
+#ifdef __SSE2__
+ const char *ptr = reinterpret_cast<const char*>(a);
+ qptrdiff distance = reinterpret_cast<const char*>(b) - ptr;
+ a += l & ~7;
+ b += l & ~7;
+ l &= 7;
+
+ // we're going to read ptr[0..15] (16 bytes)
+ for ( ; ptr + 15 < reinterpret_cast<const char *>(a); ptr += 16) {
+ __m128i a_data = _mm_loadu_si128((__m128i*)ptr);
+ __m128i b_data = _mm_loadu_si128((__m128i*)(ptr + distance));
+ __m128i result = _mm_cmpeq_epi16(a_data, b_data);
+ uint mask = ~_mm_movemask_epi8(result);
+ if (ushort(mask)) {
+ // found a different byte
+ uint idx = uint(_bit_scan_forward(mask));
+ return reinterpret_cast<const QChar *>(ptr + idx)->unicode()
+ - reinterpret_cast<const QChar *>(ptr + distance + idx)->unicode();
+ }
+ }
+# ifdef Q_COMPILER_LAMBDA
+ const auto &lambda = [=](int i) -> int {
+ return reinterpret_cast<const QChar *>(ptr)[i].unicode()
+ - reinterpret_cast<const QChar *>(ptr + distance)[i].unicode();
+ };
+ return UnrollTailLoop<7>::exec(l, 0, lambda, lambda);
+# endif
+#endif
+ if (!l)
return 0;
- int l = qMin(alen, blen);
- int cmp = ucstrncmp(a, b, l);
- return cmp ? cmp : (alen-blen);
-}
-
-// Unicode case-insensitive compare two same-sized strings
-static int ucstrnicmp(const ushort *a, const ushort *b, int l)
-{
- return ucstricmp(a, a + l, b, b + l);
-}
-
-// Benchmarking indicates that doing memcmp is much slower than
-// executing the comparison ourselves.
-//
-// The profiling was done on a population of calls to qMemEquals, generated
-// during a run of the demo browser. The profile of the data (32-bit x86
-// Linux) was:
-//
-// total number of comparisons: 21353
-// longest string compared: 95
-// average comparison length: 14.8786
-// cache-line crosses: 5661 (13.3%)
-// alignment histogram:
-// 0xXXX0 = 512 (1.2%) strings, 0 (0.0%) of which same-aligned
-// 0xXXX2 = 15087 (35.3%) strings, 5145 (34.1%) of which same-aligned
-// 0xXXX4 = 525 (1.2%) strings, 0 (0.0%) of which same-aligned
-// 0xXXX6 = 557 (1.3%) strings, 6 (1.1%) of which same-aligned
-// 0xXXX8 = 509 (1.2%) strings, 0 (0.0%) of which same-aligned
-// 0xXXXa = 24358 (57.0%) strings, 9901 (40.6%) of which same-aligned
-// 0xXXXc = 557 (1.3%) strings, 0 (0.0%) of which same-aligned
-// 0xXXXe = 601 (1.4%) strings, 15 (2.5%) of which same-aligned
-// total = 42706 (100%) strings, 15067 (35.3%) of which same-aligned
-//
-// 92% of the strings have alignment of 2 or 10, which is due to malloc on
-// 32-bit Linux returning values aligned to 8 bytes, and offsetof(array, QString::Data) == 18.
-//
-// The profile on 64-bit will be different since offsetof(array, QString::Data) == 26.
-//
-// The benchmark results were, for a Core-i7 @ 2.67 GHz 32-bit, compiled with -O3 -funroll-loops:
-// 16-bit loads only: 872,301 CPU ticks [Qt 4.5 / memcmp]
-// 32- and 16-bit loads: 773,362 CPU ticks [Qt 4.6]
-// SSE2 "movdqu" 128-bit loads: 618,736 CPU ticks
-// SSE3 "lddqu" 128-bit loads: 619,954 CPU ticks
-// SSSE3 "palignr" corrections: 852,147 CPU ticks
-// SSE4.2 "pcmpestrm": 738,702 CPU ticks
-//
-// The same benchmark on an Atom N450 @ 1.66 GHz, is:
-// 16-bit loads only: 2,185,882 CPU ticks
-// 32- and 16-bit loads: 1,805,060 CPU ticks
-// SSE2 "movdqu" 128-bit loads: 2,529,843 CPU ticks
-// SSE3 "lddqu" 128-bit loads: 2,514,858 CPU ticks
-// SSSE3 "palignr" corrections: 2,160,325 CPU ticks
-// SSE4.2 not available
-//
-// The conclusion we reach is that alignment the SSE2 unaligned code can gain
-// 20% improvement in performance in some systems, but suffers a penalty due
-// to the unaligned loads on others.
-
-static bool qMemEquals(const quint16 *a, const quint16 *b, int length)
-{
- if (a == b || !length)
- return true;
union {
- const quint16 *w;
+ const QChar *w;
const quint32 *d;
quintptr value;
} sa, sb;
@@ -295,8 +477,8 @@ static bool qMemEquals(const quint16 *a, const quint16 *b, int length)
// both addresses are not aligned to 4-bytes boundaries
// compare the first character
if (*sa.w != *sb.w)
- return false;
- --length;
+ return sa.w->unicode() - sb.w->unicode();
+ --l;
++sa.w;
++sb.w;
@@ -305,23 +487,128 @@ static bool qMemEquals(const quint16 *a, const quint16 *b, int length)
// both addresses are 4-bytes aligned
// do a fast 32-bit comparison
- const quint32 *e = sa.d + (length >> 1);
+ const quint32 *e = sa.d + (l >> 1);
for ( ; sa.d != e; ++sa.d, ++sb.d) {
- if (*sa.d != *sb.d)
- return false;
+ if (*sa.d != *sb.d) {
+ if (*sa.w != *sb.w)
+ return sa.w->unicode() - sb.w->unicode();
+ return sa.w[1].unicode() - sb.w[1].unicode();
+ }
}
// do we have a tail?
- return (length & 1) ? *sa.w == *sb.w : true;
+ return (l & 1) ? sa.w->unicode() - sb.w->unicode() : 0;
} else {
// one of the addresses isn't 4-byte aligned but the other is
- const quint16 *e = sa.w + length;
+ const QChar *e = sa.w + l;
for ( ; sa.w != e; ++sa.w, ++sb.w) {
if (*sa.w != *sb.w)
- return false;
+ return sa.w->unicode() - sb.w->unicode();
}
}
- return true;
+ return 0;
+}
+
+static int ucstrncmp(const QChar *a, const uchar *c, int l)
+{
+ const ushort *uc = reinterpret_cast<const ushort *>(a);
+ const ushort *e = uc + l;
+
+#ifdef __SSE2__
+ __m128i nullmask = _mm_setzero_si128();
+ qptrdiff offset = 0;
+
+ // we're going to read uc[offset..offset+15] (32 bytes)
+ // and c[offset..offset+15] (16 bytes)
+ for ( ; uc + offset + 15 < e; offset += 16) {
+ // similar to fromLatin1_helper:
+ // load Latin 1 data and expand to UTF-16
+ __m128i chunk = _mm_loadu_si128((__m128i*)(c + offset));
+ __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullmask);
+ __m128i secondHalf = _mm_unpackhi_epi8(chunk, nullmask);
+
+ // load UTF-16 data and compare
+ __m128i ucdata1 = _mm_loadu_si128((__m128i*)(uc + offset));
+ __m128i ucdata2 = _mm_loadu_si128((__m128i*)(uc + offset + 8));
+ __m128i result1 = _mm_cmpeq_epi16(firstHalf, ucdata1);
+ __m128i result2 = _mm_cmpeq_epi16(secondHalf, ucdata2);
+
+ uint mask = ~(_mm_movemask_epi8(result1) | _mm_movemask_epi8(result2) << 16);
+ if (mask) {
+ // found a different character
+ uint idx = uint(_bit_scan_forward(mask));
+ return uc[offset + idx / 2] - c[offset + idx / 2];
+ }
+ }
+
+ // we'll read uc[offset..offset+7] (16 bytes) and c[offset-8..offset+7] (16 bytes)
+ if (uc + offset + 7 < e) {
+ // same, but we'll throw away half the data
+ __m128i chunk = _mm_loadu_si128((__m128i*)(c + offset - 8));
+ __m128i secondHalf = _mm_unpackhi_epi8(chunk, nullmask);
+
+ __m128i ucdata = _mm_loadu_si128((__m128i*)(uc + offset));
+ __m128i result = _mm_cmpeq_epi16(secondHalf, ucdata);
+ uint mask = ~_mm_movemask_epi8(result);
+ if (ushort(mask)) {
+ // found a different character
+ uint idx = uint(_bit_scan_forward(mask));
+ return uc[offset + idx / 2] - c[offset + idx / 2];
+ }
+
+ // still matched
+ offset += 8;
+ }
+
+ // reset uc and c
+ uc += offset;
+ c += offset;
+
+# ifdef Q_COMPILER_LAMBDA
+ const auto &lambda = [=](int i) { return uc[i] - ushort(c[i]); };
+ return UnrollTailLoop<7>::exec(e - uc, 0, lambda, lambda);
+# endif
+#endif
+
+ while (uc < e) {
+ int diff = *uc - *c;
+ if (diff)
+ return diff;
+ uc++, c++;
+ }
+
+ return 0;
+}
+
+// Unicode case-sensitive comparison
+static int ucstrcmp(const QChar *a, int alen, const QChar *b, int blen)
+{
+ if (a == b && alen == blen)
+ return 0;
+ int l = qMin(alen, blen);
+ int cmp = ucstrncmp(a, b, l);
+ return cmp ? cmp : (alen-blen);
+}
+
+// Unicode case-insensitive compare two same-sized strings
+static int ucstrnicmp(const ushort *a, const ushort *b, int l)
+{
+ return ucstricmp(a, a + l, b, b + l);
+}
+
+static bool qMemEquals(const quint16 *a, const quint16 *b, int length)
+{
+ if (a == b || !length)
+ return true;
+
+ return ucstrncmp(reinterpret_cast<const QChar *>(a), reinterpret_cast<const QChar *>(b), length) == 0;
+}
+
+static int ucstrcmp(const QChar *a, int alen, const uchar *b, int blen)
+{
+ int l = qMin(alen, blen);
+ int cmp = ucstrncmp(a, b, l);
+ return cmp ? cmp : (alen-blen);
}
/*!
@@ -340,14 +627,38 @@ static int findChar(const QChar *str, int len, QChar ch, int from,
if (from < 0)
from = qMax(from + len, 0);
if (from < len) {
- const ushort *n = s + from - 1;
+ const ushort *n = s + from;
const ushort *e = s + len;
if (cs == Qt::CaseSensitive) {
+#ifdef __SSE2__
+ __m128i mch = _mm_set1_epi32(c | (c << 16));
+
+ // we're going to read n[0..7] (16 bytes)
+ for (const ushort *next = n + 8; next <= e; n = next, next += 8) {
+ __m128i data = _mm_loadu_si128((__m128i*)n);
+ __m128i result = _mm_cmpeq_epi16(data, mch);
+ uint mask = _mm_movemask_epi8(result);
+ if (ushort(mask)) {
+ // found a match
+ // same as: return n - s + _bit_scan_forward(mask) / 2
+ return (reinterpret_cast<const char *>(n) - reinterpret_cast<const char *>(s)
+ + _bit_scan_forward(mask)) >> 1;
+ }
+ }
+
+# ifdef Q_COMPILER_LAMBDA
+ return UnrollTailLoop<7>::exec(e - n, -1,
+ [=](int i) { return n[i] == c; },
+ [=](int i) { return n - s + i; });
+# endif
+#endif
+ --n;
while (++n != e)
if (*n == c)
return n - s;
} else {
c = foldCase(c);
+ --n;
while (++n != e)
if (foldCase(*n) == c)
return n - s;
@@ -1463,7 +1774,7 @@ QString &QString::operator=(QChar ch)
*/
QString &QString::insert(int i, QLatin1String str)
{
- const uchar *s = (const uchar *)str.latin1();
+ const char *s = str.latin1();
if (i < 0 || !s || !(*s))
return *this;
@@ -1471,8 +1782,7 @@ QString &QString::insert(int i, QLatin1String str)
expand(qMax(d->size, i) + len - 1);
::memmove(d->data() + i + len, d->data() + i, (d->size - i - len) * sizeof(QChar));
- for (int j = 0; j < len; ++j)
- d->data()[i + j] = s[j];
+ qt_from_latin1(d->data() + i, s, uint(len));
return *this;
}
@@ -1584,14 +1894,14 @@ QString &QString::append(const QChar *str, int len)
*/
QString &QString::append(QLatin1String str)
{
- const uchar *s = (const uchar *)str.latin1();
+ const char *s = str.latin1();
if (s) {
int len = str.size();
if (d->ref.isShared() || uint(d->size + len) + 1u > d->alloc)
reallocData(uint(d->size + len) + 1u, true);
ushort *i = d->data() + d->size;
- while ((*i++ = *s++))
- ;
+ qt_from_latin1(i, s, uint(len));
+ i[len] = '\0';
d->size += len;
}
return *this;
@@ -2098,13 +2408,11 @@ QString& QString::replace(QChar before, QChar after, Qt::CaseSensitivity cs)
QString &QString::replace(QLatin1String before, QLatin1String after, Qt::CaseSensitivity cs)
{
int alen = after.size();
- QVarLengthArray<ushort> a(alen);
- for (int i = 0; i < alen; ++i)
- a[i] = (uchar)after.latin1()[i];
int blen = before.size();
+ QVarLengthArray<ushort> a(alen);
QVarLengthArray<ushort> b(blen);
- for (int i = 0; i < blen; ++i)
- b[i] = (uchar)before.latin1()[i];
+ qt_from_latin1(a.data(), after.latin1(), alen);
+ qt_from_latin1(b.data(), before.latin1(), blen);
return replace((const QChar *)b.data(), blen, (const QChar *)a.data(), alen, cs);
}
@@ -2124,8 +2432,7 @@ QString &QString::replace(QLatin1String before, const QString &after, Qt::CaseSe
{
int blen = before.size();
QVarLengthArray<ushort> b(blen);
- for (int i = 0; i < blen; ++i)
- b[i] = (uchar)before.latin1()[i];
+ qt_from_latin1(b.data(), before.latin1(), blen);
return replace((const QChar *)b.data(), blen, after.constData(), after.d->size, cs);
}
@@ -2145,8 +2452,7 @@ QString &QString::replace(const QString &before, QLatin1String after, Qt::CaseSe
{
int alen = after.size();
QVarLengthArray<ushort> a(alen);
- for (int i = 0; i < alen; ++i)
- a[i] = (uchar)after.latin1()[i];
+ qt_from_latin1(a.data(), after.latin1(), alen);
return replace(before.constData(), before.d->size, (const QChar *)a.data(), alen, cs);
}
@@ -2166,8 +2472,7 @@ QString &QString::replace(QChar c, QLatin1String after, Qt::CaseSensitivity cs)
{
int alen = after.size();
QVarLengthArray<ushort> a(alen);
- for (int i = 0; i < alen; ++i)
- a[i] = (uchar)after.latin1()[i];
+ qt_from_latin1(a.data(), after.latin1(), alen);
return replace(&c, 1, (const QChar *)a.data(), alen, cs);
}
@@ -2201,17 +2506,7 @@ bool QString::operator==(QLatin1String other) const
if (!other.size())
return isEmpty();
- const ushort *uc = d->data();
- const ushort *e = uc + d->size;
- const uchar *c = (uchar *)other.latin1();
-
- while (uc < e) {
- if (*uc != *c)
- return false;
- ++uc;
- ++c;
- }
- return true;
+ return compare_helper(data(), size(), other, Qt::CaseSensitive) == 0;
}
/*! \fn bool QString::operator==(const QByteArray &other) const
@@ -2265,16 +2560,7 @@ bool QString::operator<(QLatin1String other) const
if (!c || *c == 0)
return false;
- const ushort *uc = d->data();
- const ushort *e = uc + qMin(d->size, other.size());
-
- while (uc < e) {
- if (*uc != *c)
- break;
- ++uc;
- ++c;
- }
- return (uc == e ? d->size < other.size() : *uc < *c);
+ return compare_helper(data(), size(), other, Qt::CaseSensitive) < 0;
}
/*! \fn bool QString::operator<(const QByteArray &other) const
@@ -2367,16 +2653,7 @@ bool QString::operator>(QLatin1String other) const
if (!c || *c == '\0')
return !isEmpty();
- const ushort *uc = d->data();
- const ushort *e = uc + qMin(d->size, other.size());
-
- while (uc < e) {
- if (*uc != *c)
- break;
- ++uc;
- ++c;
- }
- return (uc == e) ? d->size > other.size() : *uc > *c;
+ return compare_helper(data(), size(), other, Qt::CaseSensitive) > 0;
}
/*! \fn bool QString::operator>(const QByteArray &other) const
@@ -2763,8 +3040,7 @@ int QString::lastIndexOf(QLatin1String str, int from, Qt::CaseSensitivity cs) co
from = delta;
QVarLengthArray<ushort> s(sl);
- for (int i = 0; i < sl; ++i)
- s[i] = str.latin1()[i];
+ qt_from_latin1(s.data(), str.latin1(), sl);
return lastIndexOfHelper(d->data(), from, s.data(), sl, cs);
}
@@ -3172,6 +3448,15 @@ int QString::count(const QStringRef &str, Qt::CaseSensitivity cs) const
\sa indexOf(), count()
*/
+/*! \fn bool QString::contains(QLatin1String str, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
+ \since 5.3
+
+ \overload contains()
+
+ Returns \c true if this string contains an occurrence of the latin-1 string
+ \a str; otherwise returns \c false.
+*/
+
/*! \fn bool QString::contains(QChar ch, Qt::CaseSensitivity cs = Qt::CaseSensitive) const
\overload contains()
@@ -3895,131 +4180,58 @@ bool QString::endsWith(QChar c, Qt::CaseSensitivity cs) const
: foldCase(d->data()[d->size - 1]) == foldCase(c.unicode()));
}
-
-#if defined(__SSE2__)
-static inline __m128i mergeQuestionMarks(__m128i chunk)
+QByteArray QString::toLatin1_helper(const QString &string)
{
- const __m128i questionMark = _mm_set1_epi16('?');
-
-# ifdef __SSE4_2__
- // compare the unsigned shorts for the range 0x0100-0xFFFF
- // note on the use of _mm_cmpestrm:
- // The MSDN documentation online (http://technet.microsoft.com/en-us/library/bb514080.aspx)
- // says for range search the following:
- // For each character c in a, determine whether b0 <= c <= b1 or b2 <= c <= b3
- //
- // However, all examples on the Internet, including from Intel
- // (see http://software.intel.com/en-us/articles/xml-parsing-accelerator-with-intel-streaming-simd-extensions-4-intel-sse4/)
- // put the range to be searched first
- //
- // Disassembly and instruction-level debugging with GCC and ICC show
- // that they are doing the right thing. Inverting the arguments in the
- // instruction does cause a bunch of test failures.
-
- const int mode = _SIDD_UWORD_OPS | _SIDD_CMP_RANGES | _SIDD_UNIT_MASK;
- const __m128i rangeMatch = _mm_cvtsi32_si128(0xffff0100);
- const __m128i offLimitMask = _mm_cmpestrm(rangeMatch, 2, chunk, 8, mode);
-
- // replace the non-Latin 1 characters in the chunk with question marks
- chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask);
-# else
- // SSE has no compare instruction for unsigned comparison.
- // The variables must be shiffted + 0x8000 to be compared
- const __m128i signedBitOffset = _mm_set1_epi16(short(0x8000));
- const __m128i thresholdMask = _mm_set1_epi16(short(0xff + 0x8000));
-
- const __m128i signedChunk = _mm_add_epi16(chunk, signedBitOffset);
- const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask);
+ if (Q_UNLIKELY(string.isNull()))
+ return QByteArray();
-# ifdef __SSE4_1__
- // replace the non-Latin 1 characters in the chunk with question marks
- chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask);
-# else
- // offLimitQuestionMark contains '?' for each 16 bits that was off-limit
- // the 16 bits that were correct contains zeros
- const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark);
+ return toLatin1_helper(string.constData(), string.length());
+}
- // correctBytes contains the bytes that were in limit
- // the 16 bits that were off limits contains zeros
- const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk);
+QByteArray QString::toLatin1_helper(const QChar *data, int length)
+{
+ QByteArray ba(length, Qt::Uninitialized);
- // merge offLimitQuestionMark and correctBytes to have the result
- chunk = _mm_or_si128(correctBytes, offLimitQuestionMark);
-# endif
-# endif
- return chunk;
+ // since we own the only copy, we're going to const_cast the constData;
+ // that avoids an unnecessary call to detach() and expansion code that will never get used
+ qt_to_latin1(reinterpret_cast<uchar *>(const_cast<char *>(ba.constData())),
+ reinterpret_cast<const ushort *>(data), length);
+ return ba;
}
-#endif
-
-#if defined(__mips_dsp)
-extern "C" void qt_toLatin1_mips_dsp_asm(uchar *dst, const ushort *src, int length);
-#endif
-static QByteArray toLatin1_helper(const QChar *data, int length)
+QByteArray QString::toLatin1_helper_inplace(QString &s)
{
- QByteArray ba;
- if (length) {
- ba.resize(length);
- const ushort *src = reinterpret_cast<const ushort *>(data);
- uchar *dst = (uchar*) ba.data();
-#if defined(__SSE2__)
- if (length >= 16) {
- const int chunkCount = length >> 4; // divided by 16
+ if (!s.isDetached())
+ return s.toLatin1();
- for (int i = 0; i < chunkCount; ++i) {
- __m128i chunk1 = _mm_loadu_si128((__m128i*)src); // load
- chunk1 = mergeQuestionMarks(chunk1);
- src += 8;
+ // We can return our own buffer to the caller.
+ // Conversion to Latin-1 always shrinks the buffer by half.
+ const ushort *data = reinterpret_cast<const ushort *>(s.constData());
+ uint length = s.size();
- __m128i chunk2 = _mm_loadu_si128((__m128i*)src); // load
- chunk2 = mergeQuestionMarks(chunk2);
- src += 8;
+ // Swap the d pointers.
+ // Kids, avert your eyes. Don't try this at home.
+ QArrayData *ba_d = s.d;
- // pack the two vector to 16 x 8bits elements
- const __m128i result = _mm_packus_epi16(chunk1, chunk2);
+ // multiply the allocated capacity by sizeof(ushort)
+ ba_d->alloc *= sizeof(ushort);
- _mm_storeu_si128((__m128i*)dst, result); // store
- dst += 16;
- }
- length = length % 16;
- }
-#elif defined(__ARM_NEON__)
- // Refer to the documentation of the SSE2 implementation
- // this use eactly the same method as for SSE except:
- // 1) neon has unsigned comparison
- // 2) packing is done to 64 bits (8 x 8bits component).
- if (length >= 16) {
- const int chunkCount = length >> 3; // divided by 8
- const uint16x8_t questionMark = vdupq_n_u16('?'); // set
- const uint16x8_t thresholdMask = vdupq_n_u16(0xff); // set
- for (int i = 0; i < chunkCount; ++i) {
- uint16x8_t chunk = vld1q_u16((uint16_t *)src); // load
- src += 8;
+ // reset ourselves to QString()
+ s.d = QString().d;
- const uint16x8_t offLimitMask = vcgtq_u16(chunk, thresholdMask); // chunk > thresholdMask
- const uint16x8_t offLimitQuestionMark = vandq_u16(offLimitMask, questionMark); // offLimitMask & questionMark
- const uint16x8_t correctBytes = vbicq_u16(chunk, offLimitMask); // !offLimitMask & chunk
- chunk = vorrq_u16(correctBytes, offLimitQuestionMark); // correctBytes | offLimitQuestionMark
- const uint8x8_t result = vmovn_u16(chunk); // narrowing move->packing
- vst1_u8(dst, result); // store
- dst += 8;
- }
- length = length % 8;
- }
-#endif
-#if defined(__mips_dsp)
- qt_toLatin1_mips_dsp_asm(dst, src, length);
-#else
- while (length--) {
- *dst++ = (*src>0xff) ? '?' : (uchar) *src;
- ++src;
- }
-#endif
- }
- return ba;
+ // do the in-place conversion
+ uchar *dst = reinterpret_cast<uchar *>(ba_d->data());
+ qt_to_latin1(dst, data, length);
+ dst[length] = '\0';
+
+ QByteArrayDataPtr badptr = { ba_d };
+ return QByteArray(badptr);
}
+
/*!
+ \fn QByteArray QString::toLatin1() const
+
Returns a Latin-1 representation of the string as a QByteArray.
The returned byte array is undefined if the string contains non-Latin1
@@ -4028,10 +4240,6 @@ static QByteArray toLatin1_helper(const QChar *data, int length)
\sa fromLatin1(), toUtf8(), toLocal8Bit(), QTextCodec
*/
-QByteArray QString::toLatin1() const
-{
- return toLatin1_helper(unicode(), length());
-}
/*!
\fn QByteArray QString::toAscii() const
@@ -4046,19 +4254,9 @@ QByteArray QString::toLatin1() const
\sa fromAscii(), toLatin1(), toUtf8(), toLocal8Bit(), QTextCodec
*/
-#if !defined(Q_OS_MAC) && defined(Q_OS_UNIX) && !defined(QT_USE_ICU)
-static QByteArray toLocal8Bit_helper(const QChar *data, int length)
-{
-#ifndef QT_NO_TEXTCODEC
- QTextCodec *localeCodec = QTextCodec::codecForLocale();
- if (localeCodec)
- return localeCodec->fromUnicode(data, length);
-#endif // QT_NO_TEXTCODEC
- return toLatin1_helper(data, length);
-}
-#endif
-
/*!
+ \fn QByteArray QString::toLocal8Bit() const
+
Returns the local 8-bit representation of the string as a
QByteArray. The returned byte array is undefined if the string
contains characters not supported by the local 8-bit encoding.
@@ -4073,17 +4271,21 @@ static QByteArray toLocal8Bit_helper(const QChar *data, int length)
\sa fromLocal8Bit(), toLatin1(), toUtf8(), QTextCodec
*/
-QByteArray QString::toLocal8Bit() const
+
+QByteArray QString::toLocal8Bit_helper(const QChar *data, int size)
{
#ifndef QT_NO_TEXTCODEC
QTextCodec *localeCodec = QTextCodec::codecForLocale();
if (localeCodec)
- return localeCodec->fromUnicode(*this);
+ return localeCodec->fromUnicode(data, size);
#endif // QT_NO_TEXTCODEC
- return toLatin1();
+ return toLatin1_helper(data, size);
}
+
/*!
+ \fn QByteArray QString::toUtf8() const
+
Returns a UTF-8 representation of the string as a QByteArray.
UTF-8 is a Unicode codec and can represent all characters in a Unicode
@@ -4099,12 +4301,13 @@ QByteArray QString::toLocal8Bit() const
\sa fromUtf8(), toLatin1(), toLocal8Bit(), QTextCodec
*/
-QByteArray QString::toUtf8() const
+
+QByteArray QString::toUtf8_helper(const QString &str)
{
- if (isNull())
+ if (str.isNull())
return QByteArray();
- return QUtf8::convertFromUnicode(constData(), length(), 0);
+ return QUtf8::convertFromUnicode(str.constData(), str.length());
}
/*!
@@ -4126,12 +4329,6 @@ QVector<uint> QString::toUcs4() const
return v;
}
-#if defined(__mips_dsp)
-// From qstring_mips_dsp_asm.S
-extern "C" void qt_fromlatin1_mips_asm_unroll4 (ushort*, const char*, uint);
-extern "C" void qt_fromlatin1_mips_asm_unroll8 (ushort*, const char*, uint);
-#endif
-
QString::Data *QString::fromLatin1_helper(const char *str, int size)
{
Data *d;
@@ -4147,40 +4344,8 @@ QString::Data *QString::fromLatin1_helper(const char *str, int size)
d->size = size;
d->data()[size] = '\0';
ushort *dst = d->data();
- /* SIMD:
- * Unpacking with SSE has been shown to improve performance on recent CPUs
- * The same method gives no improvement with NEON.
- */
-#if defined(__SSE2__)
- if (size >= 16) {
- int chunkCount = size >> 4; // divided by 16
- const __m128i nullMask = _mm_set1_epi32(0);
- for (int i = 0; i < chunkCount; ++i) {
- const __m128i chunk = _mm_loadu_si128((__m128i*)str); // load
- str += 16;
- // unpack the first 8 bytes, padding with zeros
- const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask);
- _mm_storeu_si128((__m128i*)dst, firstHalf); // store
- dst += 8;
-
- // unpack the last 8 bytes, padding with zeros
- const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask);
- _mm_storeu_si128((__m128i*)dst, secondHalf); // store
- dst += 8;
- }
- size = size % 16;
- }
-#endif
-#if defined(__mips_dsp)
- if (size > 20)
- qt_fromlatin1_mips_asm_unroll8(dst, str, size);
- else
- qt_fromlatin1_mips_asm_unroll4(dst, str, size);
-#else
- while (size--)
- *dst++ = (uchar)*str++;
-#endif
+ qt_from_latin1(dst, str, uint(size));
}
return d;
}
@@ -4305,7 +4470,7 @@ QString QString::fromUtf8_helper(const char *str, int size)
return QString();
Q_ASSERT(size != -1);
- return QUtf8::convertToUnicode(str, size, 0);
+ return QUtf8::convertToUnicode(str, size);
}
/*!
@@ -5039,22 +5204,7 @@ int QString::compare_helper(const QChar *data1, int length1, QLatin1String s2,
return length1;
if (cs == Qt::CaseSensitive) {
- const ushort *e = uc + length1;
- if (s2.size() < length1)
- e = uc + s2.size();
- while (uc < e) {
- int diff = *uc - *c;
- if (diff)
- return diff;
- uc++, c++;
- }
-
- if (uc == uce) {
- if (c == (const uchar *)s2.latin1() + s2.size())
- return 0;
- return -1;
- }
- return 1;
+ return ucstrcmp(data1, length1, c, s2.size());
} else {
return ucstricmp(uc, uce, c, c + s2.size());
}
@@ -5144,7 +5294,11 @@ int QString::localeAwareCompare_helper(const QChar *data1, int length1,
return ucstrcmp(data1, length1, data2, length2);
#if defined(Q_OS_WIN32) || defined(Q_OS_WINCE)
+#ifndef Q_OS_WINRT
int res = CompareString(GetUserDefaultLCID(), 0, (wchar_t*)data1, length1, (wchar_t*)data2, length2);
+#else
+ int res = CompareStringEx(LOCALE_NAME_USER_DEFAULT, 0, (LPCWSTR)data1, length1, (LPCWSTR)data2, length2, NULL, NULL, 0);
+#endif
switch (res) {
case CSTR_LESS_THAN:
@@ -8254,19 +8408,10 @@ bool operator==(QLatin1String s1, const QStringRef &s2)
if (s1.size() != s2.size())
return false;
- const ushort *uc = reinterpret_cast<const ushort *>(s2.unicode());
- const ushort *e = uc + s2.size();
const uchar *c = reinterpret_cast<const uchar *>(s1.latin1());
if (!c)
return s2.isEmpty();
-
- while (*c) {
- if (uc == e || *uc != *c)
- return false;
- ++uc;
- ++c;
- }
- return (uc == e);
+ return ucstrncmp(s2.unicode(), c, s2.size()) == 0;
}
/*!
@@ -8854,8 +8999,7 @@ int QStringRef::lastIndexOf(QLatin1String str, int from, Qt::CaseSensitivity cs)
from = delta;
QVarLengthArray<ushort> s(sl);
- for (int i = 0; i < sl; ++i)
- s[i] = str.latin1()[i];
+ qt_from_latin1(s.data(), str.latin1(), sl);
return lastIndexOfHelper(reinterpret_cast<const ushort*>(unicode()), from, s.data(), sl, cs);
}
@@ -9193,8 +9337,7 @@ static inline int qt_find_latin1_string(const QChar *haystack, int size,
const char *latin1 = needle.latin1();
int len = needle.size();
QVarLengthArray<ushort> s(len);
- for (int i = 0; i < len; ++i)
- s[i] = latin1[i];
+ qt_from_latin1(s.data(), latin1, len);
return qFindString(haystack, size, from,
reinterpret_cast<const QChar*>(s.constData()), len, cs);
@@ -9238,9 +9381,7 @@ static inline bool qt_starts_with(const QChar *haystack, int haystackLen,
const ushort *data = reinterpret_cast<const ushort*>(haystack);
const uchar *latin = reinterpret_cast<const uchar*>(needle.latin1());
if (cs == Qt::CaseSensitive) {
- for (int i = 0; i < slen; ++i)
- if (data[i] != latin[i])
- return false;
+ return ucstrncmp(haystack, latin, slen) == 0;
} else {
for (int i = 0; i < slen; ++i)
if (foldCase(data[i]) != foldCase((ushort)latin[i]))
@@ -9290,9 +9431,7 @@ static inline bool qt_ends_with(const QChar *haystack, int haystackLen,
const uchar *latin = reinterpret_cast<const uchar*>(needle.latin1());
const ushort *data = reinterpret_cast<const ushort*>(haystack);
if (cs == Qt::CaseSensitive) {
- for (int i = 0; i < slen; i++)
- if (data[pos+i] != latin[i])
- return false;
+ return ucstrncmp(haystack + pos, latin, slen) == 0;
} else {
for (int i = 0; i < slen; i++)
if (foldCase(data[pos+i]) != foldCase((ushort)latin[i]))
@@ -9314,7 +9453,7 @@ static inline bool qt_ends_with(const QChar *haystack, int haystackLen,
*/
QByteArray QStringRef::toLatin1() const
{
- return toLatin1_helper(unicode(), length());
+ return QString::toLatin1_helper(unicode(), length());
}
/*!