summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/corelib/codecs/qutfcodec.cpp9
-rw-r--r--tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp13
2 files changed, 16 insertions, 6 deletions
diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp
index 98d4b2e4e3..a33c1bc9ce 100644
--- a/src/corelib/codecs/qutfcodec.cpp
+++ b/src/corelib/codecs/qutfcodec.cpp
@@ -364,6 +364,7 @@ QString QUtf8::convertToUnicode(const char *chars, int len, QTextCodec::Converte
// main body, stateless decoding
res = 0;
const uchar *nextAscii = src;
+ const uchar *start = src;
while (res >= 0 && src < end) {
if (src >= nextAscii && simdDecodeAscii(dst, nextAscii, src, end))
break;
@@ -372,9 +373,11 @@ QString QUtf8::convertToUnicode(const char *chars, int len, QTextCodec::Converte
res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(ch, dst, src, end);
if (!headerdone && res >= 0) {
headerdone = true;
- // eat the UTF-8 BOM
- if (dst[-1] == 0xfeff)
- --dst;
+ if (src == start + 3) { // 3 == sizeof(utf8-bom)
+ // eat the UTF-8 BOM (it can only appear at the beginning of the string).
+ if (dst[-1] == 0xfeff)
+ --dst;
+ }
}
if (res == QUtf8BaseTraits::Error) {
res = 0;
diff --git a/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp b/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp
index 3aa06d237d..8a9ae0cd72 100644
--- a/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp
+++ b/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp
@@ -1588,10 +1588,17 @@ void tst_QTextCodec::utf8bom_data()
<< QString("a");
}
- {
+ { // test the non-SIMD code-path
static const ushort data[] = { 0x61, 0xfeff, 0x62 };
- QTest::newRow("middle-bom")
- << QByteArray("a\357\273\277b", 5)
+ QTest::newRow("middle-bom (non SIMD)")
+ << QByteArray("a\357\273\277b")
+ << QString::fromUtf16(data, sizeof(data)/sizeof(short));
+ }
+
+ { // test the SIMD code-path
+ static const ushort data[] = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0xfeff, 0x6d };
+ QTest::newRow("middle-bom (SIMD)")
+ << QByteArray("abcdefghijkl\357\273\277m")
<< QString::fromUtf16(data, sizeof(data)/sizeof(short));
}
}