diff options
author | Gunnar Sletta <gunnar.sletta@nokia.com> | 2011-09-12 07:49:03 +0200 |
---|---|---|
committer | Gunnar Sletta <gunnar.sletta@nokia.com> | 2011-09-12 07:49:03 +0200 |
commit | a9ac6da893ac10ce160c9eb80141508881cd71e2 (patch) | |
tree | 0e74067407612d44ad07ff74cb7be6d444ee28c5 /src/corelib | |
parent | 8eb5ba3b9cb1e8e07f28a3153672a946b2d82fd9 (diff) | |
parent | 687461627310e8b781da15d1a907c35bd8ffea6e (diff) |
Merge branch 'master' into refactor
Conflicts:
src/3rdparty/v8
src/gui/text/qfont_qpa.cpp
src/gui/widgets/qlinecontrol.cpp
src/plugins/platforms/fontdatabases/fontconfig/qfontconfigdatabase.cpp
tests/auto/gui.pro
tests/auto/network.pro
tests/auto/qstring/tst_qstring.cpp
Change-Id: Id118c172645303ccf06a207050d5bf1462ff57fe
Diffstat (limited to 'src/corelib')
-rw-r--r-- | src/corelib/tools/qstring.cpp | 87 |
1 files changed, 58 insertions, 29 deletions
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index c56c050f76..3a2e35c322 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -3535,6 +3535,61 @@ bool QString::endsWith(const QChar &c, Qt::CaseSensitivity cs) const } +#if defined(QT_ALWAYS_HAVE_SSE2) +static inline __m128i mergeQuestionMarks(__m128i chunk) +{ + const __m128i questionMark = _mm_set1_epi16('?'); + +# ifdef __SSE4_2__ + // compare the unsigned shorts for the range 0x0100-0xFFFF + // note on the use of _mm_cmpestrm: + // The MSDN documentation online (http://technet.microsoft.com/en-us/library/bb514080.aspx) + // says for range search the following: + // For each character c in a, determine whether b0 <= c <= b1 or b2 <= c <= b3 + // + // However, all examples on the Internet, including from Intel + // (see http://software.intel.com/en-us/articles/xml-parsing-accelerator-with-intel-streaming-simd-extensions-4-intel-sse4/) + // put the range to be searched first + // + // Disassembly and instruction-level debugging with GCC and ICC show + // that they are doing the right thing. Inverting the arguments in the + // instruction does cause a bunch of test failures. + + const int mode = _SIDD_UWORD_OPS | _SIDD_CMP_RANGES | _SIDD_UNIT_MASK; + const __m128i rangeMatch = _mm_cvtsi32_si128(0xffff0100); + const __m128i offLimitMask = _mm_cmpestrm(rangeMatch, 2, chunk, 8, mode); + + // replace the non-Latin 1 characters in the chunk with question marks + chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask); +# else + // SSE has no compare instruction for unsigned comparison. + // The variables must be shiffted + 0x8000 to be compared + const __m128i signedBitOffset = _mm_set1_epi16(0x8000); + const __m128i thresholdMask = _mm_set1_epi16(0xff + 0x8000); + + const __m128i signedChunk = _mm_add_epi16(chunk, signedBitOffset); + const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask); + +# ifdef __SSE4_1__ + // replace the non-Latin 1 characters in the chunk with question marks + chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask); +# else + // offLimitQuestionMark contains '?' for each 16 bits that was off-limit + // the 16 bits that were correct contains zeros + const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark); + + // correctBytes contains the bytes that were in limit + // the 16 bits that were off limits contains zeros + const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk); + + // merge offLimitQuestionMark and correctBytes to have the result + chunk = _mm_or_si128(correctBytes, offLimitQuestionMark); +# endif +# endif + return chunk; +} +#endif + static QByteArray toLatin1_helper(const QChar *data, int length) { QByteArray ba; @@ -3545,41 +3600,15 @@ static QByteArray toLatin1_helper(const QChar *data, int length) #if defined(QT_ALWAYS_HAVE_SSE2) if (length >= 16) { const int chunkCount = length >> 4; // divided by 16 - const __m128i questionMark = _mm_set1_epi16('?'); - // SSE has no compare instruction for unsigned comparison. - // The variables must be shiffted + 0x8000 to be compared - const __m128i signedBitOffset = _mm_set1_epi16(short(0x8000)); - const __m128i thresholdMask = _mm_set1_epi16(short(0xff + 0x8000)); + for (int i = 0; i < chunkCount; ++i) { __m128i chunk1 = _mm_loadu_si128((__m128i*)src); // load + chunk1 = mergeQuestionMarks(chunk1); src += 8; - { - // each 16 bit is equal to 0xFF if the source is outside latin 1 (>0xff) - const __m128i signedChunk = _mm_add_epi16(chunk1, signedBitOffset); - const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask); - - // offLimitQuestionMark contains '?' for each 16 bits that was off-limit - // the 16 bits that were correct contains zeros - const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark); - - // correctBytes contains the bytes that were in limit - // the 16 bits that were off limits contains zeros - const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk1); - - // merge offLimitQuestionMark and correctBytes to have the result - chunk1 = _mm_or_si128(correctBytes, offLimitQuestionMark); - } __m128i chunk2 = _mm_loadu_si128((__m128i*)src); // load + chunk2 = mergeQuestionMarks(chunk2); src += 8; - { - // exactly the same operations as for the previous chunk of data - const __m128i signedChunk = _mm_add_epi16(chunk2, signedBitOffset); - const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask); - const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark); - const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk2); - chunk2 = _mm_or_si128(correctBytes, offLimitQuestionMark); - } // pack the two vector to 16 x 8bits elements const __m128i result = _mm_packus_epi16(chunk1, chunk2); |