diff options
author | Frederik Gladhorn <frederik.gladhorn@digia.com> | 2014-05-06 16:19:14 +0200 |
---|---|---|
committer | Frederik Gladhorn <frederik.gladhorn@digia.com> | 2014-05-06 16:50:03 +0200 |
commit | 1326cd15f7ba985551f0fddc717e3bfc01ddda85 (patch) | |
tree | 024eb871ed5f4e8c02e21412475e6e9929a2b030 /src/corelib/codecs/qutfcodec.cpp | |
parent | fe70367fe06984d1ac84cc276ca3fd3edc4193c7 (diff) | |
parent | beb7258a56b6ec76531b73cc07ee30132a3f548f (diff) |
Merge remote-tracking branch 'origin/stable' into dev
Conflicts:
mkspecs/qnx-x86-qcc/qplatformdefs.h
src/corelib/global/qglobal.h
src/opengl/gl2paintengineex/qpaintengineex_opengl2.cpp
src/opengl/qgl.cpp
src/opengl/qglpixelbuffer.cpp
src/opengl/qglshaderprogram.cpp
tests/auto/opengl/qglthreads/tst_qglthreads.cpp
Change-Id: Iaba137884d3526a139000ca26fee02bb27b5cdb5
Diffstat (limited to 'src/corelib/codecs/qutfcodec.cpp')
-rw-r--r-- | src/corelib/codecs/qutfcodec.cpp | 44 |
1 files changed, 29 insertions, 15 deletions
diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp index 54312601e4..072cda63aa 100644 --- a/src/corelib/codecs/qutfcodec.cpp +++ b/src/corelib/codecs/qutfcodec.cpp @@ -52,6 +52,8 @@ QT_BEGIN_NAMESPACE enum { Endian = 0, Data = 1 }; +static const uchar utf8bom[] = { 0xef, 0xbb, 0xbf }; + #if defined(__SSE2__) && defined(QT_COMPILER_SUPPORTS_SSE2) static inline bool simdEncodeAscii(uchar *&dst, const ushort *&nextAscii, const ushort *&src, const ushort *end) { @@ -187,9 +189,9 @@ QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::Conve int invalid = 0; if (state && !(state->flags & QTextCodec::IgnoreHeader)) { // append UTF-8 BOM - *cursor++ = 0xef; - *cursor++ = 0xbb; - *cursor++ = 0xbf; + *cursor++ = utf8bom[0]; + *cursor++ = utf8bom[1]; + *cursor++ = utf8bom[2]; } const ushort *nextAscii = src; @@ -240,19 +242,31 @@ QString QUtf8::convertToUnicode(const char *chars, int len) const uchar *src = reinterpret_cast<const uchar *>(chars); const uchar *end = src + len; - while (src < end) { - const uchar *nextAscii = end; - if (simdDecodeAscii(dst, nextAscii, src, end)) - break; + // attempt to do a full decoding in SIMD + const uchar *nextAscii = end; + if (!simdDecodeAscii(dst, nextAscii, src, end)) { + // at least one non-ASCII entry + // check if we failed to decode the UTF-8 BOM; if so, skip it + if (Q_UNLIKELY(src == reinterpret_cast<const uchar *>(chars)) + && end - src >= 3 + && Q_UNLIKELY(src[0] == utf8bom[0] && src[1] == utf8bom[1] && src[2] == utf8bom[2])) { + src += 3; + } - do { - uchar b = *src++; - int res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, dst, src, end); - if (res < 0) { - // decoding error - *dst++ = QChar::ReplacementCharacter; - } - } while (src < nextAscii); + while (src < end) { + nextAscii = end; + if (simdDecodeAscii(dst, nextAscii, src, end)) + break; + + do { + uchar b = *src++; + int res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, dst, src, end); + if (res < 0) { + // decoding error + *dst++ = QChar::ReplacementCharacter; + } + } while (src < nextAscii); + } } result.truncate(dst - reinterpret_cast<const ushort *>(result.constData())); |