summaryrefslogtreecommitdiffstats
path: root/src/corelib
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2014-01-16 14:34:59 -0800
committerThe Qt Project <gerrit-noreply@qt-project.org>2014-01-31 21:51:34 +0100
commit17678bee8912b33219bbc1e4e5ce2b82f61744b2 (patch)
treebbfb4381a1c7bd94811f371cd7989bd4370bcd77 /src/corelib
parent1f6ae7444b38752657ddfcc00affc67031d03d6d (diff)
Move the code that converts from UTF-16 to Latin1 to the top
Just so it's closer to the code that does the inverse. Change-Id: Iecaab20adad2cb7f4a55818c687aecc76f0523e7 Reviewed-by: Lars Knoll <lars.knoll@digia.com> Reviewed-by: Frederik Gladhorn <frederik.gladhorn@digia.com>
Diffstat (limited to 'src/corelib')
-rw-r--r--src/corelib/tools/qstring.cpp240
1 files changed, 118 insertions, 122 deletions
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp
index bf6e792588..390a65aa23 100644
--- a/src/corelib/tools/qstring.cpp
+++ b/src/corelib/tools/qstring.cpp
@@ -136,6 +136,7 @@ QT_BEGIN_NAMESPACE
// From qstring_mips_dsp_asm.S
extern "C" void qt_fromlatin1_mips_asm_unroll4 (ushort*, const char*, uint);
extern "C" void qt_fromlatin1_mips_asm_unroll8 (ushort*, const char*, uint);
+extern "C" void qt_toLatin1_mips_dsp_asm(uchar *dst, const ushort *src, int length);
#endif
// internal
@@ -235,6 +236,120 @@ static void qt_from_latin1(ushort *dst, const char *str, size_t size)
#endif
}
+#if defined(__SSE2__)
+static inline __m128i mergeQuestionMarks(__m128i chunk)
+{
+ const __m128i questionMark = _mm_set1_epi16('?');
+
+# ifdef __SSE4_2__
+ // compare the unsigned shorts for the range 0x0100-0xFFFF
+ // note on the use of _mm_cmpestrm:
+ // The MSDN documentation online (http://technet.microsoft.com/en-us/library/bb514080.aspx)
+ // says for range search the following:
+ // For each character c in a, determine whether b0 <= c <= b1 or b2 <= c <= b3
+ //
+ // However, all examples on the Internet, including from Intel
+ // (see http://software.intel.com/en-us/articles/xml-parsing-accelerator-with-intel-streaming-simd-extensions-4-intel-sse4/)
+ // put the range to be searched first
+ //
+ // Disassembly and instruction-level debugging with GCC and ICC show
+ // that they are doing the right thing. Inverting the arguments in the
+ // instruction does cause a bunch of test failures.
+
+ const int mode = _SIDD_UWORD_OPS | _SIDD_CMP_RANGES | _SIDD_UNIT_MASK;
+ const __m128i rangeMatch = _mm_cvtsi32_si128(0xffff0100);
+ const __m128i offLimitMask = _mm_cmpestrm(rangeMatch, 2, chunk, 8, mode);
+
+ // replace the non-Latin 1 characters in the chunk with question marks
+ chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask);
+# else
+ // SSE has no compare instruction for unsigned comparison.
+ // The variables must be shiffted + 0x8000 to be compared
+ const __m128i signedBitOffset = _mm_set1_epi16(short(0x8000));
+ const __m128i thresholdMask = _mm_set1_epi16(short(0xff + 0x8000));
+
+ const __m128i signedChunk = _mm_add_epi16(chunk, signedBitOffset);
+ const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask);
+
+# ifdef __SSE4_1__
+ // replace the non-Latin 1 characters in the chunk with question marks
+ chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask);
+# else
+ // offLimitQuestionMark contains '?' for each 16 bits that was off-limit
+ // the 16 bits that were correct contains zeros
+ const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark);
+
+ // correctBytes contains the bytes that were in limit
+ // the 16 bits that were off limits contains zeros
+ const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk);
+
+ // merge offLimitQuestionMark and correctBytes to have the result
+ chunk = _mm_or_si128(correctBytes, offLimitQuestionMark);
+# endif
+# endif
+ return chunk;
+}
+#endif
+
+static void qt_to_latin1(uchar *dst, const ushort *src, int length)
+{
+ if (length) {
+#if defined(__SSE2__)
+ if (length >= 16) {
+ const int chunkCount = length >> 4; // divided by 16
+
+ for (int i = 0; i < chunkCount; ++i) {
+ __m128i chunk1 = _mm_loadu_si128((__m128i*)src); // load
+ chunk1 = mergeQuestionMarks(chunk1);
+ src += 8;
+
+ __m128i chunk2 = _mm_loadu_si128((__m128i*)src); // load
+ chunk2 = mergeQuestionMarks(chunk2);
+ src += 8;
+
+ // pack the two vector to 16 x 8bits elements
+ const __m128i result = _mm_packus_epi16(chunk1, chunk2);
+
+ _mm_storeu_si128((__m128i*)dst, result); // store
+ dst += 16;
+ }
+ length = length % 16;
+ }
+#elif defined(__ARM_NEON__)
+ // Refer to the documentation of the SSE2 implementation
+ // this use eactly the same method as for SSE except:
+ // 1) neon has unsigned comparison
+ // 2) packing is done to 64 bits (8 x 8bits component).
+ if (length >= 16) {
+ const int chunkCount = length >> 3; // divided by 8
+ const uint16x8_t questionMark = vdupq_n_u16('?'); // set
+ const uint16x8_t thresholdMask = vdupq_n_u16(0xff); // set
+ for (int i = 0; i < chunkCount; ++i) {
+ uint16x8_t chunk = vld1q_u16((uint16_t *)src); // load
+ src += 8;
+
+ const uint16x8_t offLimitMask = vcgtq_u16(chunk, thresholdMask); // chunk > thresholdMask
+ const uint16x8_t offLimitQuestionMark = vandq_u16(offLimitMask, questionMark); // offLimitMask & questionMark
+ const uint16x8_t correctBytes = vbicq_u16(chunk, offLimitMask); // !offLimitMask & chunk
+ chunk = vorrq_u16(correctBytes, offLimitQuestionMark); // correctBytes | offLimitQuestionMark
+ const uint8x8_t result = vmovn_u16(chunk); // narrowing move->packing
+ vst1_u8(dst, result); // store
+ dst += 8;
+ }
+ length = length % 8;
+ }
+#endif
+#if defined(__mips_dsp)
+ qt_toLatin1_mips_dsp_asm(dst, src, length);
+#else
+ while (length--) {
+ *dst++ = (*src>0xff) ? '?' : (uchar) *src;
+ ++src;
+ }
+#endif
+ }
+}
+
// Unicode case-insensitive comparison
static int ucstricmp(const ushort *a, const ushort *ae, const ushort *b, const ushort *be)
{
@@ -4065,125 +4180,6 @@ bool QString::endsWith(QChar c, Qt::CaseSensitivity cs) const
: foldCase(d->data()[d->size - 1]) == foldCase(c.unicode()));
}
-
-#if defined(__SSE2__)
-static inline __m128i mergeQuestionMarks(__m128i chunk)
-{
- const __m128i questionMark = _mm_set1_epi16('?');
-
-# ifdef __SSE4_2__
- // compare the unsigned shorts for the range 0x0100-0xFFFF
- // note on the use of _mm_cmpestrm:
- // The MSDN documentation online (http://technet.microsoft.com/en-us/library/bb514080.aspx)
- // says for range search the following:
- // For each character c in a, determine whether b0 <= c <= b1 or b2 <= c <= b3
- //
- // However, all examples on the Internet, including from Intel
- // (see http://software.intel.com/en-us/articles/xml-parsing-accelerator-with-intel-streaming-simd-extensions-4-intel-sse4/)
- // put the range to be searched first
- //
- // Disassembly and instruction-level debugging with GCC and ICC show
- // that they are doing the right thing. Inverting the arguments in the
- // instruction does cause a bunch of test failures.
-
- const int mode = _SIDD_UWORD_OPS | _SIDD_CMP_RANGES | _SIDD_UNIT_MASK;
- const __m128i rangeMatch = _mm_cvtsi32_si128(0xffff0100);
- const __m128i offLimitMask = _mm_cmpestrm(rangeMatch, 2, chunk, 8, mode);
-
- // replace the non-Latin 1 characters in the chunk with question marks
- chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask);
-# else
- // SSE has no compare instruction for unsigned comparison.
- // The variables must be shiffted + 0x8000 to be compared
- const __m128i signedBitOffset = _mm_set1_epi16(short(0x8000));
- const __m128i thresholdMask = _mm_set1_epi16(short(0xff + 0x8000));
-
- const __m128i signedChunk = _mm_add_epi16(chunk, signedBitOffset);
- const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask);
-
-# ifdef __SSE4_1__
- // replace the non-Latin 1 characters in the chunk with question marks
- chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask);
-# else
- // offLimitQuestionMark contains '?' for each 16 bits that was off-limit
- // the 16 bits that were correct contains zeros
- const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark);
-
- // correctBytes contains the bytes that were in limit
- // the 16 bits that were off limits contains zeros
- const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk);
-
- // merge offLimitQuestionMark and correctBytes to have the result
- chunk = _mm_or_si128(correctBytes, offLimitQuestionMark);
-# endif
-# endif
- return chunk;
-}
-#endif
-
-#if defined(__mips_dsp)
-extern "C" void qt_toLatin1_mips_dsp_asm(uchar *dst, const ushort *src, int length);
-#endif
-
-static void toLatin1_helper(uchar *dst, const ushort *src, int length)
-{
- if (length) {
-#if defined(__SSE2__)
- if (length >= 16) {
- const int chunkCount = length >> 4; // divided by 16
-
- for (int i = 0; i < chunkCount; ++i) {
- __m128i chunk1 = _mm_loadu_si128((__m128i*)src); // load
- chunk1 = mergeQuestionMarks(chunk1);
- src += 8;
-
- __m128i chunk2 = _mm_loadu_si128((__m128i*)src); // load
- chunk2 = mergeQuestionMarks(chunk2);
- src += 8;
-
- // pack the two vector to 16 x 8bits elements
- const __m128i result = _mm_packus_epi16(chunk1, chunk2);
-
- _mm_storeu_si128((__m128i*)dst, result); // store
- dst += 16;
- }
- length = length % 16;
- }
-#elif defined(__ARM_NEON__)
- // Refer to the documentation of the SSE2 implementation
- // this use eactly the same method as for SSE except:
- // 1) neon has unsigned comparison
- // 2) packing is done to 64 bits (8 x 8bits component).
- if (length >= 16) {
- const int chunkCount = length >> 3; // divided by 8
- const uint16x8_t questionMark = vdupq_n_u16('?'); // set
- const uint16x8_t thresholdMask = vdupq_n_u16(0xff); // set
- for (int i = 0; i < chunkCount; ++i) {
- uint16x8_t chunk = vld1q_u16((uint16_t *)src); // load
- src += 8;
-
- const uint16x8_t offLimitMask = vcgtq_u16(chunk, thresholdMask); // chunk > thresholdMask
- const uint16x8_t offLimitQuestionMark = vandq_u16(offLimitMask, questionMark); // offLimitMask & questionMark
- const uint16x8_t correctBytes = vbicq_u16(chunk, offLimitMask); // !offLimitMask & chunk
- chunk = vorrq_u16(correctBytes, offLimitQuestionMark); // correctBytes | offLimitQuestionMark
- const uint8x8_t result = vmovn_u16(chunk); // narrowing move->packing
- vst1_u8(dst, result); // store
- dst += 8;
- }
- length = length % 8;
- }
-#endif
-#if defined(__mips_dsp)
- qt_toLatin1_mips_dsp_asm(dst, src, length);
-#else
- while (length--) {
- *dst++ = (*src>0xff) ? '?' : (uchar) *src;
- ++src;
- }
-#endif
- }
-}
-
QByteArray QString::toLatin1_helper(const QString &string)
{
if (Q_UNLIKELY(string.isNull()))
@@ -4198,8 +4194,8 @@ QByteArray QString::toLatin1_helper(const QChar *data, int length)
// since we own the only copy, we're going to const_cast the constData;
// that avoids an unnecessary call to detach() and expansion code that will never get used
- QT_PREPEND_NAMESPACE(toLatin1_helper)(reinterpret_cast<uchar *>(const_cast<char *>(ba.constData())),
- reinterpret_cast<const ushort *>(data), length);
+ qt_to_latin1(reinterpret_cast<uchar *>(const_cast<char *>(ba.constData())),
+ reinterpret_cast<const ushort *>(data), length);
return ba;
}
@@ -4225,7 +4221,7 @@ QByteArray QString::toLatin1_helper_inplace(QString &s)
// do the in-place conversion
uchar *dst = reinterpret_cast<uchar *>(ba_d->data());
- QT_PREPEND_NAMESPACE(toLatin1_helper)(dst, data, length);
+ qt_to_latin1(dst, data, length);
dst[length] = '\0';
QByteArrayDataPtr badptr = { ba_d };