summaryrefslogtreecommitdiffstats
path: root/src/corelib
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2018-05-13 14:11:38 -0700
committerThiago Macieira <thiago.macieira@intel.com>2018-05-15 23:07:22 +0000
commitbd0905db95cd1b2f225050390bd50809452ff53f (patch)
tree70b84658256b9640c86b387c6074d3e73ecaaca2 /src/corelib
parent1e95a07a5ced774b20adb66b34c31bdfaf566bdc (diff)
QString: add a method to do a conversion to Latin1 without checking
If the input is already known to be Latin 1, we don't need to check and merge in question marks. QJsonObject already needed this code, now we can make it more efficient. I'll need the same code in CBOR. Change-Id: Ib48364abee9f464c96c6fffd152e508f078404e5 Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'src/corelib')
-rw-r--r--src/corelib/serialization/qjson_p.h32
-rw-r--r--src/corelib/tools/qstring.cpp47
2 files changed, 43 insertions, 36 deletions
diff --git a/src/corelib/serialization/qjson_p.h b/src/corelib/serialization/qjson_p.h
index dc56a49084..feba1faac6 100644
--- a/src/corelib/serialization/qjson_p.h
+++ b/src/corelib/serialization/qjson_p.h
@@ -69,6 +69,9 @@
QT_BEGIN_NAMESPACE
+// in qstring.cpp
+void qt_to_latin1_unchecked(uchar *dst, const ushort *uc, qsizetype len);
+
/*
This defines a binary data structure for Json data. The data structure is optimised for fast reading
and minimum allocations. The whole data structure can be mmap'ed and used directly.
@@ -294,31 +297,10 @@ public:
int len = d->length = str.length();
uchar *l = (uchar *)d->latin1;
const ushort *uc = (const ushort *)str.unicode();
- int i = 0;
-#ifdef __SSE2__
- for ( ; i + 16 <= len; i += 16) {
- __m128i chunk1 = _mm_loadu_si128((__m128i*)&uc[i]); // load
- __m128i chunk2 = _mm_loadu_si128((__m128i*)&uc[i + 8]); // load
- // pack the two vector to 16 x 8bits elements
- const __m128i result = _mm_packus_epi16(chunk1, chunk2);
- _mm_storeu_si128((__m128i*)&l[i], result); // store
- }
-# ifdef Q_PROCESSOR_X86_64
- // we can do one more round, of 8 characters
- if (i + 8 <= len) {
- __m128i chunk = _mm_loadu_si128((__m128i*)&uc[i]); // load
- // pack with itself, we'll discard the high part anyway
- chunk = _mm_packus_epi16(chunk, chunk);
- // unaligned 64-bit store
- qToUnaligned(_mm_cvtsi128_si64(chunk), l + i);
- i += 8;
- }
-# endif
-#endif
- for ( ; i < len; ++i)
- l[i] = uc[i];
- for ( ; (quintptr)(l+i) & 0x3; ++i)
- l[i] = 0;
+ qt_to_latin1_unchecked(l, uc, len);
+
+ for ( ; (quintptr)(l+len) & 0x3; ++len)
+ l[len] = 0;
return *this;
}
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp
index 81e5e1e884..d045913b87 100644
--- a/src/corelib/tools/qstring.cpp
+++ b/src/corelib/tools/qstring.cpp
@@ -591,7 +591,8 @@ static inline __m128i mergeQuestionMarks(__m128i chunk)
}
#endif
-static void qt_to_latin1(uchar *dst, const ushort *src, int length)
+template <bool Checked>
+static void qt_to_latin1_internal(uchar *dst, const ushort *src, qsizetype length)
{
#if defined(__SSE2__)
uchar *e = dst + length;
@@ -600,10 +601,12 @@ static void qt_to_latin1(uchar *dst, const ushort *src, int length)
// we're going to write to dst[offset..offset+15] (16 bytes)
for ( ; dst + offset + 15 < e; offset += 16) {
__m128i chunk1 = _mm_loadu_si128((const __m128i*)(src + offset)); // load
- chunk1 = mergeQuestionMarks(chunk1);
+ if (Checked)
+ chunk1 = mergeQuestionMarks(chunk1);
__m128i chunk2 = _mm_loadu_si128((const __m128i*)(src + offset + 8)); // load
- chunk2 = mergeQuestionMarks(chunk2);
+ if (Checked)
+ chunk2 = mergeQuestionMarks(chunk2);
// pack the two vector to 16 x 8bits elements
const __m128i result = _mm_packus_epi16(chunk1, chunk2);
@@ -614,7 +617,8 @@ static void qt_to_latin1(uchar *dst, const ushort *src, int length)
// we're going to write to dst[offset..offset+7] (8 bytes)
if (dst + offset + 7 < e) {
__m128i chunk = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src + offset));
- chunk = mergeQuestionMarks(chunk);
+ if (Checked)
+ chunk = mergeQuestionMarks(chunk);
// pack, where the upper half is ignored
const __m128i result = _mm_packus_epi16(chunk, chunk);
@@ -625,7 +629,8 @@ static void qt_to_latin1(uchar *dst, const ushort *src, int length)
// we're going to write to dst[offset..offset+3] (4 bytes)
if (dst + offset + 3 < e) {
__m128i chunk = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(src + offset));
- chunk = mergeQuestionMarks(chunk);
+ if (Checked)
+ chunk = mergeQuestionMarks(chunk);
// pack, we'll the upper three quarters
const __m128i result = _mm_packus_epi16(chunk, chunk);
@@ -637,7 +642,12 @@ static void qt_to_latin1(uchar *dst, const ushort *src, int length)
dst += offset;
src += offset;
- return UnrollTailLoop<3>::exec(length, [=](int i) { dst[i] = (src[i]>0xff) ? '?' : (uchar) src[i]; });
+ return UnrollTailLoop<3>::exec(length, [=](int i) {
+ if (Checked)
+ dst[i] = (src[i]>0xff) ? '?' : (uchar) src[i];
+ else
+ dst[i] = src[i];
+ });
# endif
#elif defined(__ARM_NEON__)
// Refer to the documentation of the SSE2 implementation
@@ -652,10 +662,12 @@ static void qt_to_latin1(uchar *dst, const ushort *src, int length)
uint16x8_t chunk = vld1q_u16((uint16_t *)src); // load
src += 8;
- const uint16x8_t offLimitMask = vcgtq_u16(chunk, thresholdMask); // chunk > thresholdMask
- const uint16x8_t offLimitQuestionMark = vandq_u16(offLimitMask, questionMark); // offLimitMask & questionMark
- const uint16x8_t correctBytes = vbicq_u16(chunk, offLimitMask); // !offLimitMask & chunk
- chunk = vorrq_u16(correctBytes, offLimitQuestionMark); // correctBytes | offLimitQuestionMark
+ if (Checked) {
+ const uint16x8_t offLimitMask = vcgtq_u16(chunk, thresholdMask); // chunk > thresholdMask
+ const uint16x8_t offLimitQuestionMark = vandq_u16(offLimitMask, questionMark); // offLimitMask & questionMark
+ const uint16x8_t correctBytes = vbicq_u16(chunk, offLimitMask); // !offLimitMask & chunk
+ chunk = vorrq_u16(correctBytes, offLimitQuestionMark); // correctBytes | offLimitQuestionMark
+ }
const uint8x8_t result = vmovn_u16(chunk); // narrowing move->packing
vst1_u8(dst, result); // store
dst += 8;
@@ -667,12 +679,25 @@ static void qt_to_latin1(uchar *dst, const ushort *src, int length)
qt_toLatin1_mips_dsp_asm(dst, src, length);
#else
while (length--) {
- *dst++ = (*src>0xff) ? '?' : (uchar) *src;
+ if (Checked)
+ *dst++ = (*src>0xff) ? '?' : (uchar) *src;
+ else
+ *dst++ = *src;
++src;
}
#endif
}
+static void qt_to_latin1(uchar *dst, const ushort *src, qsizetype length)
+{
+ qt_to_latin1_internal<true>(dst, src, length);
+}
+
+void qt_to_latin1_unchecked(uchar *dst, const ushort *src, qsizetype length)
+{
+ qt_to_latin1_internal<false>(dst, src, length);
+}
+
// Unicode case-insensitive comparison
static int ucstricmp(const QChar *a, const QChar *ae, const QChar *b, const QChar *be)
{