diff options
-rw-r--r-- | src/corelib/codecs/qutfcodec.cpp | 9 | ||||
-rw-r--r-- | tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp | 13 |
2 files changed, 16 insertions, 6 deletions
diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp index 98d4b2e4e3..a33c1bc9ce 100644 --- a/src/corelib/codecs/qutfcodec.cpp +++ b/src/corelib/codecs/qutfcodec.cpp @@ -364,6 +364,7 @@ QString QUtf8::convertToUnicode(const char *chars, int len, QTextCodec::Converte // main body, stateless decoding res = 0; const uchar *nextAscii = src; + const uchar *start = src; while (res >= 0 && src < end) { if (src >= nextAscii && simdDecodeAscii(dst, nextAscii, src, end)) break; @@ -372,9 +373,11 @@ QString QUtf8::convertToUnicode(const char *chars, int len, QTextCodec::Converte res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(ch, dst, src, end); if (!headerdone && res >= 0) { headerdone = true; - // eat the UTF-8 BOM - if (dst[-1] == 0xfeff) - --dst; + if (src == start + 3) { // 3 == sizeof(utf8-bom) + // eat the UTF-8 BOM (it can only appear at the beginning of the string). + if (dst[-1] == 0xfeff) + --dst; + } } if (res == QUtf8BaseTraits::Error) { res = 0; diff --git a/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp b/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp index 3aa06d237d..8a9ae0cd72 100644 --- a/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp +++ b/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp @@ -1588,10 +1588,17 @@ void tst_QTextCodec::utf8bom_data() << QString("a"); } - { + { // test the non-SIMD code-path static const ushort data[] = { 0x61, 0xfeff, 0x62 }; - QTest::newRow("middle-bom") - << QByteArray("a\357\273\277b", 5) + QTest::newRow("middle-bom (non SIMD)") + << QByteArray("a\357\273\277b") + << QString::fromUtf16(data, sizeof(data)/sizeof(short)); + } + + { // test the SIMD code-path + static const ushort data[] = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0xfeff, 0x6d }; + QTest::newRow("middle-bom (SIMD)") + << QByteArray("abcdefghijkl\357\273\277m") << QString::fromUtf16(data, sizeof(data)/sizeof(short)); } } |