summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2018-01-20 10:56:58 -0800
committerThiago Macieira <thiago.macieira@intel.com>2018-01-27 17:51:24 +0000
commitc375503fa030de51e821db00e7ca6c1378eb34ba (patch)
tree225e16fec15be8dba46e7cc1cb7880c9024f4068 /src
parent39f76b4325d1d5b7dd28e9e80f1a8ba7378c7e54 (diff)
Add a few methods to check if a string is US-ASCII or Latin1
isLatin1(QLatin1String) is provided for completeness sake, in case some generic code operates on both QLatin1String and QString/QStringView. Change-Id: I5e421e32396d44e4b39efffd150b99a18eedf648 Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io> Reviewed-by: Lars Knoll <lars.knoll@qt.io>
Diffstat (limited to 'src')
-rw-r--r--src/corelib/tools/qstring.cpp147
-rw-r--r--src/corelib/tools/qstring.h6
-rw-r--r--src/corelib/tools/qstringalgorithms.h5
3 files changed, 157 insertions, 1 deletions
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp
index 5eeaa2a2a8..69751eb6dc 100644
--- a/src/corelib/tools/qstring.cpp
+++ b/src/corelib/tools/qstring.cpp
@@ -1,7 +1,7 @@
/****************************************************************************
**
** Copyright (C) 2016 The Qt Company Ltd.
-** Copyright (C) 2016 Intel Corporation.
+** Copyright (C) 2018 Intel Corporation.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the QtCore module of the Qt Toolkit.
@@ -251,6 +251,151 @@ inline RetType UnrollTailLoop<0>::exec(Number, RetType returnIfExited, Functor1,
}
#endif
+#ifdef __SSE2__
+static bool simdTestMask(const char *&ptr, const char *end, quint32 maskval)
+{
+# if defined(__AVX2__)
+ // AVX2 implementation: test 32 bytes at a time
+ const __m256i mask256 = _mm256_broadcastd_epi32(_mm_cvtsi32_si128(maskval));
+ while (ptr + 32 < end) {
+ __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr));
+ if (!_mm256_testz_si256(mask256, data))
+ return false;
+ ptr += 32;
+ }
+
+ const __m128i mask = _mm256_castsi256_si128(mask256);
+# elif defined(__SSE4_1__)
+ // SSE 4.1 implementation: test 32 bytes at a time (two 16-byte
+ // comparisons, unrolled)
+ const __m128i mask = _mm_set1_epi32(maskval);
+ while (ptr + 32 < end) {
+ __m128i data1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
+ __m128i data2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 16));
+ if (!_mm_testz_si128(mask, data1))
+ return false;
+ if (!_mm_testz_si128(mask, data2))
+ return false;
+ ptr += 32;
+ }
+# endif
+# if defined(__SSE4_1__)
+ // AVX2 and SSE4.1: final 16-byte comparison
+ if (ptr + 16 < end) {
+ __m128i data1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
+ if (!_mm_testz_si128(mask, data1))
+ return false;
+ ptr += 16;
+ }
+# else
+ // SSE2 implementation: test 16 bytes at a time.
+ const __m128i mask = _mm_set1_epi32(maskval);
+ while (ptr + 16 < end) {
+ __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
+ __m128i masked = _mm_andnot_si128(mask, data);
+ __m128i comparison = _mm_cmpeq_epi16(masked, _mm_setzero_si128());
+ if (quint16(_mm_movemask_epi8(comparison)) != 0xffff)
+ return false;
+ ptr += 16;
+ }
+# endif
+
+ return true;
+}
+#endif
+
+bool QtPrivate::isAscii(QLatin1String s) Q_DECL_NOTHROW
+{
+ const char *ptr = s.begin();
+ const char *end = s.end();
+
+#if defined(__AVX2__)
+ if (!simdTestMask(ptr, end, 0x80808080))
+ return false;
+#elif defined(__SSE2__)
+ // Testing for the high bit can be done efficiently with just PMOVMSKB
+ while (ptr + 16 < end) {
+ __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
+ quint32 mask = _mm_movemask_epi8(data);
+ if (mask)
+ return false;
+ ptr += 16;
+ }
+#endif
+
+ while (ptr + 4 < end) {
+ quint32 data = qFromUnaligned<quint32>(ptr);
+ if (data & 0x80808080U)
+ return false;
+ ptr += 4;
+ }
+
+ while (ptr != end) {
+ if (quint8(*ptr++) & 0x80)
+ return false;
+ }
+ return true;
+}
+
+bool QtPrivate::isAscii(QStringView s) Q_DECL_NOTHROW
+{
+ const QChar *ptr = s.begin();
+ const QChar *end = s.end();
+
+#ifdef __SSE2__
+ const char *ptr8 = reinterpret_cast<const char *>(ptr);
+ const char *end8 = reinterpret_cast<const char *>(end);
+ if (!simdTestMask(ptr8, end8, 0xff80ff80))
+ return false;
+ ptr = reinterpret_cast<const QChar *>(ptr8);
+#endif
+
+ while (ptr != end) {
+ if ((*ptr++).unicode() & 0xff80)
+ return false;
+ }
+ return true;
+}
+
+bool QtPrivate::isLatin1(QStringView s) Q_DECL_NOTHROW
+{
+ const QChar *ptr = s.begin();
+ const QChar *end = s.end();
+
+#if defined(__SSE4_1__)
+ const char *ptr8 = reinterpret_cast<const char *>(ptr);
+ const char *end8 = reinterpret_cast<const char *>(end);
+ if (!simdTestMask(ptr8, end8, 0xff00ff00))
+ return false;
+ ptr = reinterpret_cast<const QChar *>(ptr8);
+#elif defined(__SSE2__)
+ // Testing if every other byte is non-zero can be done efficiently by
+ // using PUNPCKHBW (unpack high order bytes) and comparing that to zero.
+ while (ptr + 32 < end) {
+ __m128i data1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
+ __m128i data2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 16));
+ __m128i high = _mm_unpackhi_epi8(data1, data2);
+ __m128i comparison = _mm_cmpeq_epi16(high, _mm_setzero_si128());
+ if (_mm_movemask_epi8(comparison))
+ return false;
+ ptr += 16;
+ }
+ if (ptr + 16 < end) {
+ __m128i data1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr));
+ __m128i high = _mm_unpackhi_epi8(data1, data1);
+ __m128i comparison = _mm_cmpeq_epi16(high, _mm_setzero_si128());
+ if (_mm_movemask_epi8(comparison))
+ return false;
+ }
+#endif
+
+ while (ptr != end) {
+ if ((*ptr++).unicode() > 0xff)
+ return false;
+ }
+ return true;
+}
+
// conversion between Latin 1 and UTF-16
void qt_from_latin1(ushort *dst, const char *str, size_t size) Q_DECL_NOTHROW
{
diff --git a/src/corelib/tools/qstring.h b/src/corelib/tools/qstring.h
index 808f388c89..b40a622c7c 100644
--- a/src/corelib/tools/qstring.h
+++ b/src/corelib/tools/qstring.h
@@ -202,6 +202,12 @@ Q_DECLARE_TYPEINFO(QLatin1String, Q_MOVABLE_TYPE);
typedef QLatin1String QLatin1Literal;
//
+// QLatin1String inline implementations
+//
+inline bool QtPrivate::isLatin1(QLatin1String) Q_DECL_NOTHROW
+{ return true; }
+
+//
// QStringView members that require QLatin1String:
//
bool QStringView::startsWith(QLatin1String s, Qt::CaseSensitivity cs) const Q_DECL_NOTHROW
diff --git a/src/corelib/tools/qstringalgorithms.h b/src/corelib/tools/qstringalgorithms.h
index 6146e525d9..8446d85239 100644
--- a/src/corelib/tools/qstringalgorithms.h
+++ b/src/corelib/tools/qstringalgorithms.h
@@ -82,6 +82,11 @@ Q_REQUIRED_RESULT Q_CORE_EXPORT QByteArray convertToLocal8Bit(QStringView str);
Q_REQUIRED_RESULT Q_CORE_EXPORT QVector<uint> convertToUcs4(QStringView str);
Q_REQUIRED_RESULT Q_CORE_EXPORT bool isRightToLeft(QStringView string);
+Q_REQUIRED_RESULT Q_CORE_EXPORT bool isAscii(QLatin1String s) Q_DECL_NOTHROW;
+Q_REQUIRED_RESULT Q_CORE_EXPORT bool isAscii(QStringView s) Q_DECL_NOTHROW;
+Q_REQUIRED_RESULT bool isLatin1(QLatin1String s) Q_DECL_NOTHROW; // in qstring.h
+Q_REQUIRED_RESULT Q_CORE_EXPORT bool isLatin1(QStringView s) Q_DECL_NOTHROW;
+
} // namespace QtPRivate
QT_END_NAMESPACE