From c375503fa030de51e821db00e7ca6c1378eb34ba Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Sat, 20 Jan 2018 10:56:58 -0800 Subject: Add a few methods to check if a string is US-ASCII or Latin1 isLatin1(QLatin1String) is provided for completeness sake, in case some generic code operates on both QLatin1String and QString/QStringView. Change-Id: I5e421e32396d44e4b39efffd150b99a18eedf648 Reviewed-by: Allan Sandfeld Jensen Reviewed-by: Lars Knoll --- src/corelib/tools/qstring.cpp | 147 +++++++++++++++++++++++++++++++++- src/corelib/tools/qstring.h | 6 ++ src/corelib/tools/qstringalgorithms.h | 5 ++ 3 files changed, 157 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index 5eeaa2a2a8..69751eb6dc 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -1,7 +1,7 @@ /**************************************************************************** ** ** Copyright (C) 2016 The Qt Company Ltd. -** Copyright (C) 2016 Intel Corporation. +** Copyright (C) 2018 Intel Corporation. ** Contact: https://www.qt.io/licensing/ ** ** This file is part of the QtCore module of the Qt Toolkit. @@ -251,6 +251,151 @@ inline RetType UnrollTailLoop<0>::exec(Number, RetType returnIfExited, Functor1, } #endif +#ifdef __SSE2__ +static bool simdTestMask(const char *&ptr, const char *end, quint32 maskval) +{ +# if defined(__AVX2__) + // AVX2 implementation: test 32 bytes at a time + const __m256i mask256 = _mm256_broadcastd_epi32(_mm_cvtsi32_si128(maskval)); + while (ptr + 32 < end) { + __m256i data = _mm256_loadu_si256(reinterpret_cast(ptr)); + if (!_mm256_testz_si256(mask256, data)) + return false; + ptr += 32; + } + + const __m128i mask = _mm256_castsi256_si128(mask256); +# elif defined(__SSE4_1__) + // SSE 4.1 implementation: test 32 bytes at a time (two 16-byte + // comparisons, unrolled) + const __m128i mask = _mm_set1_epi32(maskval); + while (ptr + 32 < end) { + __m128i data1 = _mm_loadu_si128(reinterpret_cast(ptr)); + __m128i data2 = _mm_loadu_si128(reinterpret_cast(ptr + 16)); + if (!_mm_testz_si128(mask, data1)) + return false; + if (!_mm_testz_si128(mask, data2)) + return false; + ptr += 32; + } +# endif +# if defined(__SSE4_1__) + // AVX2 and SSE4.1: final 16-byte comparison + if (ptr + 16 < end) { + __m128i data1 = _mm_loadu_si128(reinterpret_cast(ptr)); + if (!_mm_testz_si128(mask, data1)) + return false; + ptr += 16; + } +# else + // SSE2 implementation: test 16 bytes at a time. + const __m128i mask = _mm_set1_epi32(maskval); + while (ptr + 16 < end) { + __m128i data = _mm_loadu_si128(reinterpret_cast(ptr)); + __m128i masked = _mm_andnot_si128(mask, data); + __m128i comparison = _mm_cmpeq_epi16(masked, _mm_setzero_si128()); + if (quint16(_mm_movemask_epi8(comparison)) != 0xffff) + return false; + ptr += 16; + } +# endif + + return true; +} +#endif + +bool QtPrivate::isAscii(QLatin1String s) Q_DECL_NOTHROW +{ + const char *ptr = s.begin(); + const char *end = s.end(); + +#if defined(__AVX2__) + if (!simdTestMask(ptr, end, 0x80808080)) + return false; +#elif defined(__SSE2__) + // Testing for the high bit can be done efficiently with just PMOVMSKB + while (ptr + 16 < end) { + __m128i data = _mm_loadu_si128(reinterpret_cast(ptr)); + quint32 mask = _mm_movemask_epi8(data); + if (mask) + return false; + ptr += 16; + } +#endif + + while (ptr + 4 < end) { + quint32 data = qFromUnaligned(ptr); + if (data & 0x80808080U) + return false; + ptr += 4; + } + + while (ptr != end) { + if (quint8(*ptr++) & 0x80) + return false; + } + return true; +} + +bool QtPrivate::isAscii(QStringView s) Q_DECL_NOTHROW +{ + const QChar *ptr = s.begin(); + const QChar *end = s.end(); + +#ifdef __SSE2__ + const char *ptr8 = reinterpret_cast(ptr); + const char *end8 = reinterpret_cast(end); + if (!simdTestMask(ptr8, end8, 0xff80ff80)) + return false; + ptr = reinterpret_cast(ptr8); +#endif + + while (ptr != end) { + if ((*ptr++).unicode() & 0xff80) + return false; + } + return true; +} + +bool QtPrivate::isLatin1(QStringView s) Q_DECL_NOTHROW +{ + const QChar *ptr = s.begin(); + const QChar *end = s.end(); + +#if defined(__SSE4_1__) + const char *ptr8 = reinterpret_cast(ptr); + const char *end8 = reinterpret_cast(end); + if (!simdTestMask(ptr8, end8, 0xff00ff00)) + return false; + ptr = reinterpret_cast(ptr8); +#elif defined(__SSE2__) + // Testing if every other byte is non-zero can be done efficiently by + // using PUNPCKHBW (unpack high order bytes) and comparing that to zero. + while (ptr + 32 < end) { + __m128i data1 = _mm_loadu_si128(reinterpret_cast(ptr)); + __m128i data2 = _mm_loadu_si128(reinterpret_cast(ptr + 16)); + __m128i high = _mm_unpackhi_epi8(data1, data2); + __m128i comparison = _mm_cmpeq_epi16(high, _mm_setzero_si128()); + if (_mm_movemask_epi8(comparison)) + return false; + ptr += 16; + } + if (ptr + 16 < end) { + __m128i data1 = _mm_loadu_si128(reinterpret_cast(ptr)); + __m128i high = _mm_unpackhi_epi8(data1, data1); + __m128i comparison = _mm_cmpeq_epi16(high, _mm_setzero_si128()); + if (_mm_movemask_epi8(comparison)) + return false; + } +#endif + + while (ptr != end) { + if ((*ptr++).unicode() > 0xff) + return false; + } + return true; +} + // conversion between Latin 1 and UTF-16 void qt_from_latin1(ushort *dst, const char *str, size_t size) Q_DECL_NOTHROW { diff --git a/src/corelib/tools/qstring.h b/src/corelib/tools/qstring.h index 808f388c89..b40a622c7c 100644 --- a/src/corelib/tools/qstring.h +++ b/src/corelib/tools/qstring.h @@ -201,6 +201,12 @@ Q_DECLARE_TYPEINFO(QLatin1String, Q_MOVABLE_TYPE); // Qt 4.x compatibility typedef QLatin1String QLatin1Literal; +// +// QLatin1String inline implementations +// +inline bool QtPrivate::isLatin1(QLatin1String) Q_DECL_NOTHROW +{ return true; } + // // QStringView members that require QLatin1String: // diff --git a/src/corelib/tools/qstringalgorithms.h b/src/corelib/tools/qstringalgorithms.h index 6146e525d9..8446d85239 100644 --- a/src/corelib/tools/qstringalgorithms.h +++ b/src/corelib/tools/qstringalgorithms.h @@ -82,6 +82,11 @@ Q_REQUIRED_RESULT Q_CORE_EXPORT QByteArray convertToLocal8Bit(QStringView str); Q_REQUIRED_RESULT Q_CORE_EXPORT QVector convertToUcs4(QStringView str); Q_REQUIRED_RESULT Q_CORE_EXPORT bool isRightToLeft(QStringView string); +Q_REQUIRED_RESULT Q_CORE_EXPORT bool isAscii(QLatin1String s) Q_DECL_NOTHROW; +Q_REQUIRED_RESULT Q_CORE_EXPORT bool isAscii(QStringView s) Q_DECL_NOTHROW; +Q_REQUIRED_RESULT bool isLatin1(QLatin1String s) Q_DECL_NOTHROW; // in qstring.h +Q_REQUIRED_RESULT Q_CORE_EXPORT bool isLatin1(QStringView s) Q_DECL_NOTHROW; + } // namespace QtPRivate QT_END_NAMESPACE -- cgit v1.2.3