summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/corelib/codecs/qutfcodec.cpp175
-rw-r--r--src/corelib/codecs/qutfcodec_p.h105
-rw-r--r--src/corelib/tools/qstring.cpp2
-rw-r--r--tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp76
-rw-r--r--tests/auto/xml/sax/qxmlsimplereader/xmldocs/not-wf/sa/170.xml.ref2
5 files changed, 225 insertions, 135 deletions
diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp
index d1fc5b851a..b0e0b3f010 100644
--- a/src/corelib/codecs/qutfcodec.cpp
+++ b/src/corelib/codecs/qutfcodec.cpp
@@ -128,114 +128,117 @@ QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::Conve
return rstr;
}
+QString QUtf8::convertToUnicode(const char *chars, int len)
+{
+ QString result(len + 1, Qt::Uninitialized); // worst case
+ ushort *dst = reinterpret_cast<ushort *>(const_cast<QChar *>(result.constData()));
+ const uchar *src = reinterpret_cast<const uchar *>(chars);
+ const uchar *end = src + len;
+
+ while (src < end) {
+ uchar b = *src++;
+ int res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, dst, src, end);
+ if (res < 0) {
+ // decoding error
+ *dst++ = QChar::ReplacementCharacter;
+ }
+ }
+
+ result.truncate(dst - reinterpret_cast<const ushort *>(result.constData()));
+ return result;
+}
+
QString QUtf8::convertToUnicode(const char *chars, int len, QTextCodec::ConverterState *state)
{
bool headerdone = false;
ushort replacement = QChar::ReplacementCharacter;
int need = 0;
- int error = -1;
- uint uc = 0;
- uint min_uc = 0;
+ int invalid = 0;
+ int res;
+ uchar ch = 0;
+
+ QString result(need + len + 1, Qt::Uninitialized); // worst case
+ ushort *dst = reinterpret_cast<ushort *>(const_cast<QChar *>(result.constData()));
+ const uchar *src = reinterpret_cast<const uchar *>(chars);
+ const uchar *end = src + len;
+
if (state) {
if (state->flags & QTextCodec::IgnoreHeader)
headerdone = true;
if (state->flags & QTextCodec::ConvertInvalidToNull)
replacement = QChar::Null;
- need = state->remainingChars;
- if (need) {
- uc = state->state_data[0];
- min_uc = state->state_data[1];
- }
- }
- if (!headerdone && len > 3
- && (uchar)chars[0] == 0xef && (uchar)chars[1] == 0xbb && (uchar)chars[2] == 0xbf) {
- // starts with a byte order mark
- chars += 3;
- len -= 3;
- headerdone = true;
- }
-
- QString result(need + len + 1, Qt::Uninitialized); // worst case
- ushort *qch = (ushort *)result.unicode();
- uchar ch;
- int invalid = 0;
-
- for (int i = 0; i < len; ++i) {
- ch = chars[i];
- if (need) {
- if ((ch&0xc0) == 0x80) {
- uc = (uc << 6) | (ch & 0x3f);
- --need;
- if (!need) {
- // utf-8 bom composes into 0xfeff code point
- if (!headerdone && uc == 0xfeff) {
- // don't do anything, just skip the BOM
- } else if (QChar::requiresSurrogates(uc) && uc <= QChar::LastValidCodePoint) {
- // surrogate pair
- Q_ASSERT((qch - (ushort*)result.unicode()) + 2 < result.length());
- *qch++ = QChar::highSurrogate(uc);
- *qch++ = QChar::lowSurrogate(uc);
- } else if ((uc < min_uc) || QChar::isSurrogate(uc) || uc > QChar::LastValidCodePoint) {
- // error: overlong sequence, UTF16 surrogate or non-character
- *qch++ = replacement;
- ++invalid;
- } else {
- *qch++ = uc;
- }
- headerdone = true;
- }
- } else {
- // error
- i = error;
- *qch++ = replacement;
- ++invalid;
- need = 0;
- headerdone = true;
- }
- } else {
- if (ch < 128) {
- *qch++ = ushort(ch);
- headerdone = true;
- } else if ((ch & 0xe0) == 0xc0) {
- uc = ch & 0x1f;
- need = 1;
- error = i;
- min_uc = 0x80;
- headerdone = true;
- } else if ((ch & 0xf0) == 0xe0) {
- uc = ch & 0x0f;
- need = 2;
- error = i;
- min_uc = 0x800;
- } else if ((ch&0xf8) == 0xf0) {
- uc = ch & 0x07;
- need = 3;
- error = i;
- min_uc = 0x10000;
- headerdone = true;
- } else {
- // error
- *qch++ = replacement;
+ if (state->remainingChars) {
+ // handle incoming state first
+ uchar remainingCharsData[4]; // longest UTF-8 sequence possible
+ int remainingCharsCount = state->remainingChars;
+ int newCharsToCopy = qMin<int>(sizeof(remainingCharsData) - remainingCharsCount, end - src);
+
+ memset(remainingCharsData, 0, sizeof(remainingCharsData));
+ memcpy(remainingCharsData, &state->state_data[0], remainingCharsCount);
+ memcpy(remainingCharsData + remainingCharsCount, src, newCharsToCopy);
+
+ const uchar *begin = &remainingCharsData[1];
+ res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(remainingCharsData[0], dst, begin,
+ static_cast<const uchar *>(remainingCharsData) + remainingCharsCount + newCharsToCopy);
+ if (res == QUtf8BaseTraits::EndOfString) {
+ // if we got EndOfString again, then there were too few bytes in src;
+ // copy to our state and return
+ state->remainingChars = remainingCharsCount + newCharsToCopy;
+ memcpy(&state->state_data[0], remainingCharsData, state->remainingChars);
+ return QString();
+ } else if (res == QUtf8BaseTraits::Error) {
++invalid;
+ *dst++ = replacement;
+ } else if (!headerdone && res >= 0) {
+ // eat the UTF-8 BOM
headerdone = true;
+ if (dst[-1] == 0xfeff)
+ --dst;
}
+
+ // adjust src now that we have maybe consumed a few chars
+ //Q_ASSERT(res > remainingCharsCount)
+ src += res - remainingCharsCount;
}
}
- if (!state && need > 0) {
- // unterminated UTF sequence
- for (int i = error; i < len; ++i) {
- *qch++ = replacement;
+
+ // main body, stateless decoding
+ res = 0;
+ while (res >= 0 && src < end) {
+ ch = *src++;
+ res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(ch, dst, src, end);
+ if (!headerdone && res >= 0) {
+ headerdone = true;
+ // eat the UTF-8 BOM
+ if (dst[-1] == 0xfeff)
+ --dst;
+ }
+ if (res == QUtf8BaseTraits::Error) {
+ res = 0;
++invalid;
+ *dst++ = replacement;
}
}
- result.truncate(qch - (ushort *)result.unicode());
+
+ if (!state && res == QUtf8BaseTraits::EndOfString) {
+ // unterminated UTF sequence
+ *dst++ = QChar::ReplacementCharacter;
+ while (src++ < end)
+ *dst++ = QChar::ReplacementCharacter;
+ }
+
+ result.truncate(dst - (ushort *)result.unicode());
if (state) {
state->invalidChars += invalid;
- state->remainingChars = need;
if (headerdone)
state->flags |= QTextCodec::IgnoreHeader;
- state->state_data[0] = need ? uc : 0;
- state->state_data[1] = need ? min_uc : 0;
+ if (res == QUtf8BaseTraits::EndOfString) {
+ --src; // unread the byte in ch
+ state->remainingChars = end - src;
+ memcpy(&state->state_data[0], src, end - src);
+ } else {
+ state->remainingChars = 0;
+ }
}
return result;
}
diff --git a/src/corelib/codecs/qutfcodec_p.h b/src/corelib/codecs/qutfcodec_p.h
index 4f0e2394fe..c94a7a12e4 100644
--- a/src/corelib/codecs/qutfcodec_p.h
+++ b/src/corelib/codecs/qutfcodec_p.h
@@ -169,6 +169,110 @@ namespace QUtf8Functions
Traits::appendByte(dst, 0x80 | (u & 0x3f));
return 0;
}
+
+ inline bool isContinuationByte(uchar b)
+ {
+ return (b & 0xc0) == 0x80;
+ }
+
+ /// returns the number of characters consumed (including \a b) in case of success;
+ /// returns negative in case of error: Traits::Error or Traits::EndOfString
+ template <typename Traits, typename OutputPtr, typename InputPtr> inline
+ int fromUtf8(uchar b, OutputPtr &dst, InputPtr &src, InputPtr end)
+ {
+ int charsNeeded;
+ uint min_uc;
+ uint uc;
+
+ if (!Traits::skipAsciiHandling && b < 0x80) {
+ // US-ASCII
+ Traits::appendUtf16(dst, b);
+ return 1;
+ }
+
+ if (!Traits::isTrusted && Q_UNLIKELY(b <= 0xC1)) {
+ // an UTF-8 first character must be at least 0xC0
+ // however, all 0xC0 and 0xC1 first bytes can only produce overlong sequences
+ return Traits::Error;
+ } else if (b < 0xe0) {
+ charsNeeded = 2;
+ min_uc = 0x80;
+ uc = b & 0x1f;
+ } else if (b < 0xf0) {
+ charsNeeded = 3;
+ min_uc = 0x800;
+ uc = b & 0x0f;
+ } else if (b < 0xf5) {
+ charsNeeded = 4;
+ min_uc = 0x10000;
+ uc = b & 0x07;
+ } else {
+ // the last Unicode character is U+10FFFF
+ // it's encoded in UTF-8 as "\xF4\x8F\xBF\xBF"
+ // therefore, a byte higher than 0xF4 is not the UTF-8 first byte
+ return Traits::Error;
+ }
+
+ int bytesAvailable = Traits::availableBytes(src, end);
+ if (Q_UNLIKELY(bytesAvailable < charsNeeded - 1)) {
+ // it's possible that we have an error instead of just unfinished bytes
+ if (bytesAvailable > 0 && !isContinuationByte(Traits::peekByte(src, 0)))
+ return Traits::Error;
+ if (bytesAvailable > 1 && !isContinuationByte(Traits::peekByte(src, 1)))
+ return Traits::Error;
+ if (bytesAvailable > 2 && !isContinuationByte(Traits::peekByte(src, 2)))
+ return Traits::Error;
+ return Traits::EndOfString;
+ }
+
+ // first continuation character
+ b = Traits::peekByte(src, 0);
+ if (!isContinuationByte(b))
+ return Traits::Error;
+ uc <<= 6;
+ uc |= b & 0x3f;
+
+ if (charsNeeded > 2) {
+ // second continuation character
+ b = Traits::peekByte(src, 1);
+ if (!isContinuationByte(b))
+ return Traits::Error;
+ uc <<= 6;
+ uc |= b & 0x3f;
+
+ if (charsNeeded > 3) {
+ // third continuation character
+ b = Traits::peekByte(src, 2);
+ if (!isContinuationByte(b))
+ return Traits::Error;
+ uc <<= 6;
+ uc |= b & 0x3f;
+ }
+ }
+
+ // we've decoded something; safety-check it
+ if (!Traits::isTrusted) {
+ if (uc < min_uc)
+ return Traits::Error;
+ if (QChar::isSurrogate(uc) || uc > QChar::LastValidCodePoint)
+ return Traits::Error;
+ if (!Traits::allowNonCharacters && QChar::isNonCharacter(uc))
+ return Traits::Error;
+ }
+
+ // write the UTF-16 sequence
+ if (!QChar::requiresSurrogates(uc)) {
+ // UTF-8 decoded and no surrogates are required
+ // detach if necessary
+ Traits::appendUtf16(dst, ushort(uc));
+ } else {
+ // UTF-8 decoded to something that requires a surrogate pair
+ Traits::appendUcs4(dst, uc);
+ }
+
+ Traits::advanceByte(src, charsNeeded - 1);
+ return charsNeeded;
+ }
}
enum DataEndianness
@@ -180,6 +284,7 @@ enum DataEndianness
struct QUtf8
{
+ static QString convertToUnicode(const char *, int);
static QString convertToUnicode(const char *, int, QTextCodec::ConverterState *);
static QByteArray convertFromUnicode(const QChar *, int);
static QByteArray convertFromUnicode(const QChar *, int, QTextCodec::ConverterState *);
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp
index bd2aa3450a..9544062dd9 100644
--- a/src/corelib/tools/qstring.cpp
+++ b/src/corelib/tools/qstring.cpp
@@ -4317,7 +4317,7 @@ QString QString::fromUtf8_helper(const char *str, int size)
return QString();
Q_ASSERT(size != -1);
- return QUtf8::convertToUnicode(str, size, 0);
+ return QUtf8::convertToUnicode(str, size);
}
/*!
diff --git a/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp b/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp
index 8e1b3cf3b2..12b81ee7d4 100644
--- a/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp
+++ b/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp
@@ -456,7 +456,7 @@ void tst_QTextCodec::flagF7808080() const
//QVERIFY(!codec->canEncode(QChar(0x1C0000)));
QTextCodec::ConverterState state(QTextCodec::ConvertInvalidToNull);
- QVERIFY(codec->toUnicode(input.constData(), input.length(), &state) == QChar(0));
+ QCOMPARE(codec->toUnicode(input.constData(), input.length(), &state), QString(input.size(), QChar(0)));
}
void tst_QTextCodec::nonFlaggedEFBFBF() const
@@ -689,8 +689,7 @@ void tst_QTextCodec::utf8Codec_data()
utf8 += char(0xbf);
utf8 += char(0xbf);
utf8 += char(0xbf);
- str.clear();
- str += QChar(QChar::ReplacementCharacter);
+ str = fromInvalidUtf8Sequence(utf8);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.2.4") << utf8 << str << -1;
// 2.2.5 U+03FFFFFF (not a valid Unicode character)
@@ -755,8 +754,7 @@ void tst_QTextCodec::utf8Codec_data()
utf8 += char(0x90);
utf8 += char(0x80);
utf8 += char(0x80);
- str.clear();
- str += QChar(QChar::ReplacementCharacter);
+ str = fromInvalidUtf8Sequence(utf8);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.3.5") << utf8 << str << -1;
// 3.1.1
@@ -1244,7 +1242,7 @@ void tst_QTextCodec::utf8Codec_data()
utf8.clear();
utf8 += char(0xc0);
utf8 += char(0xaf);
- str = QChar(QChar::ReplacementCharacter);
+ str = fromInvalidUtf8Sequence(utf8);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.1.1") << utf8 << str << -1;
// 4.1.2
@@ -1252,7 +1250,7 @@ void tst_QTextCodec::utf8Codec_data()
utf8 += char(0xe0);
utf8 += char(0x80);
utf8 += char(0xaf);
- str = QChar(QChar::ReplacementCharacter);
+ str = fromInvalidUtf8Sequence(utf8);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.1.2") << utf8 << str << -1;
// 4.1.3
@@ -1261,7 +1259,7 @@ void tst_QTextCodec::utf8Codec_data()
utf8 += char(0x80);
utf8 += char(0x80);
utf8 += char(0xaf);
- str = QChar(QChar::ReplacementCharacter);
+ str = fromInvalidUtf8Sequence(utf8);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.1.3") << utf8 << str << -1;
// 4.1.4
@@ -1289,7 +1287,7 @@ void tst_QTextCodec::utf8Codec_data()
utf8.clear();
utf8 += char(0xc1);
utf8 += char(0xbf);
- str = QChar(QChar::ReplacementCharacter);
+ str = fromInvalidUtf8Sequence(utf8);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.2.1") << utf8 << str << -1;
// 4.2.2
@@ -1297,7 +1295,7 @@ void tst_QTextCodec::utf8Codec_data()
utf8 += char(0xe0);
utf8 += char(0x9f);
utf8 += char(0xbf);
- str = QChar(QChar::ReplacementCharacter);
+ str = fromInvalidUtf8Sequence(utf8);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.2.2") << utf8 << str << -1;
// 4.2.3
@@ -1306,7 +1304,7 @@ void tst_QTextCodec::utf8Codec_data()
utf8 += char(0x8f);
utf8 += char(0xbf);
utf8 += char(0xbf);
- str = QChar(QChar::ReplacementCharacter);
+ str = fromInvalidUtf8Sequence(utf8);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.2.3") << utf8 << str << -1;
// 4.2.4
@@ -1334,7 +1332,7 @@ void tst_QTextCodec::utf8Codec_data()
utf8.clear();
utf8 += char(0xc0);
utf8 += char(0x80);
- str = QChar(QChar::ReplacementCharacter);
+ str = fromInvalidUtf8Sequence(utf8);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.3.1") << utf8 << str << -1;
// 4.3.2
@@ -1342,7 +1340,7 @@ void tst_QTextCodec::utf8Codec_data()
utf8 += char(0xe0);
utf8 += char(0x80);
utf8 += char(0x80);
- str = QChar(QChar::ReplacementCharacter);
+ str = fromInvalidUtf8Sequence(utf8);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.3.2") << utf8 << str << -1;
// 4.3.3
@@ -1351,7 +1349,7 @@ void tst_QTextCodec::utf8Codec_data()
utf8 += char(0x80);
utf8 += char(0x80);
utf8 += char(0x80);
- str = QChar(QChar::ReplacementCharacter);
+ str = fromInvalidUtf8Sequence(utf8);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.3.3") << utf8 << str << -1;
// 4.3.4
@@ -1380,7 +1378,7 @@ void tst_QTextCodec::utf8Codec_data()
utf8 += char(0xed);
utf8 += char(0xa0);
utf8 += char(0x80);
- str = QChar(QChar::ReplacementCharacter);
+ str = fromInvalidUtf8Sequence(utf8);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.1") << utf8 << str << -1;
// 5.1.2
@@ -1388,7 +1386,7 @@ void tst_QTextCodec::utf8Codec_data()
utf8 += char(0xed);
utf8 += char(0xad);
utf8 += char(0xbf);
- str = QChar(QChar::ReplacementCharacter);
+ str = fromInvalidUtf8Sequence(utf8);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.2") << utf8 << str << -1;
// 5.1.3
@@ -1396,7 +1394,7 @@ void tst_QTextCodec::utf8Codec_data()
utf8 += char(0xed);
utf8 += char(0xae);
utf8 += char(0x80);
- str = QChar(QChar::ReplacementCharacter);
+ str = fromInvalidUtf8Sequence(utf8);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.3") << utf8 << str << -1;
// 5.1.4
@@ -1404,7 +1402,7 @@ void tst_QTextCodec::utf8Codec_data()
utf8 += char(0xed);
utf8 += char(0xaf);
utf8 += char(0xbf);
- str = QChar(QChar::ReplacementCharacter);
+ str = fromInvalidUtf8Sequence(utf8);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.4") << utf8 << str << -1;
// 5.1.5
@@ -1412,7 +1410,7 @@ void tst_QTextCodec::utf8Codec_data()
utf8 += char(0xed);
utf8 += char(0xb0);
utf8 += char(0x80);
- str = QChar(QChar::ReplacementCharacter);
+ str = fromInvalidUtf8Sequence(utf8);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.5") << utf8 << str << -1;
// 5.1.6
@@ -1420,7 +1418,7 @@ void tst_QTextCodec::utf8Codec_data()
utf8 += char(0xed);
utf8 += char(0xbe);
utf8 += char(0x80);
- str = QChar(QChar::ReplacementCharacter);
+ str = fromInvalidUtf8Sequence(utf8);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.6") << utf8 << str << -1;
// 5.1.7
@@ -1428,7 +1426,7 @@ void tst_QTextCodec::utf8Codec_data()
utf8 += char(0xed);
utf8 += char(0xbf);
utf8 += char(0xbf);
- str = QChar(QChar::ReplacementCharacter);
+ str = fromInvalidUtf8Sequence(utf8);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.7") << utf8 << str << -1;
// 5.2.1
@@ -1439,9 +1437,7 @@ void tst_QTextCodec::utf8Codec_data()
utf8 += char(0xed);
utf8 += char(0xb0);
utf8 += char(0x80);
- str.clear();
- str += QChar(QChar::ReplacementCharacter);
- str += QChar(QChar::ReplacementCharacter);
+ str = fromInvalidUtf8Sequence(utf8);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.1") << utf8 << str << -1;
// 5.2.2
@@ -1452,9 +1448,7 @@ void tst_QTextCodec::utf8Codec_data()
utf8 += char(0xed);
utf8 += char(0xbf);
utf8 += char(0xbf);
- str.clear();
- str += QChar(QChar::ReplacementCharacter);
- str += QChar(QChar::ReplacementCharacter);
+ str = fromInvalidUtf8Sequence(utf8);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.2") << utf8 << str << -1;
// 5.2.3
@@ -1465,9 +1459,7 @@ void tst_QTextCodec::utf8Codec_data()
utf8 += char(0xed);
utf8 += char(0xb0);
utf8 += char(0x80);
- str.clear();
- str += QChar(QChar::ReplacementCharacter);
- str += QChar(QChar::ReplacementCharacter);
+ str = fromInvalidUtf8Sequence(utf8);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.3") << utf8 << str << -1;
// 5.2.4
@@ -1478,9 +1470,7 @@ void tst_QTextCodec::utf8Codec_data()
utf8 += char(0xed);
utf8 += char(0xbf);
utf8 += char(0xbf);
- str.clear();
- str += QChar(QChar::ReplacementCharacter);
- str += QChar(QChar::ReplacementCharacter);
+ str = fromInvalidUtf8Sequence(utf8);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.4") << utf8 << str << -1;
// 5.2.5
@@ -1491,9 +1481,7 @@ void tst_QTextCodec::utf8Codec_data()
utf8 += char(0xed);
utf8 += char(0xb0);
utf8 += char(0x80);
- str.clear();
- str += QChar(QChar::ReplacementCharacter);
- str += QChar(QChar::ReplacementCharacter);
+ str = fromInvalidUtf8Sequence(utf8);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.5") << utf8 << str << -1;
// 5.2.6
@@ -1504,9 +1492,7 @@ void tst_QTextCodec::utf8Codec_data()
utf8 += char(0xed);
utf8 += char(0xbf);
utf8 += char(0xbf);
- str.clear();
- str += QChar(QChar::ReplacementCharacter);
- str += QChar(QChar::ReplacementCharacter);
+ str = fromInvalidUtf8Sequence(utf8);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.6") << utf8 << str << -1;
// 5.2.7
@@ -1517,9 +1503,7 @@ void tst_QTextCodec::utf8Codec_data()
utf8 += char(0xed);
utf8 += char(0xb0);
utf8 += char(0x80);
- str.clear();
- str += QChar(QChar::ReplacementCharacter);
- str += QChar(QChar::ReplacementCharacter);
+ str = fromInvalidUtf8Sequence(utf8);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.7") << utf8 << str << -1;
// 5.2.8
@@ -1530,9 +1514,7 @@ void tst_QTextCodec::utf8Codec_data()
utf8 += char(0xed);
utf8 += char(0xbf);
utf8 += char(0xbf);
- str.clear();
- str += QChar(QChar::ReplacementCharacter);
- str += QChar(QChar::ReplacementCharacter);
+ str = fromInvalidUtf8Sequence(utf8);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.8") << utf8 << str << -1;
// 5.3.1 - non-character code
@@ -1541,7 +1523,7 @@ void tst_QTextCodec::utf8Codec_data()
utf8 += char(0xbf);
utf8 += char(0xbe);
//str = QChar(QChar::ReplacementCharacter);
- str = QString::fromUtf8(utf8);
+ str = QChar(0xfffe);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.3.1") << utf8 << str << -1;
// 5.3.2 - non-character code
@@ -1550,7 +1532,7 @@ void tst_QTextCodec::utf8Codec_data()
utf8 += char(0xbf);
utf8 += char(0xbf);
//str = QChar(QChar::ReplacementCharacter);
- str = QString::fromUtf8(utf8);
+ str = QChar(0xffff);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.3.2") << utf8 << str << -1;
}
diff --git a/tests/auto/xml/sax/qxmlsimplereader/xmldocs/not-wf/sa/170.xml.ref b/tests/auto/xml/sax/qxmlsimplereader/xmldocs/not-wf/sa/170.xml.ref
index eca786f688..87336aa00f 100644
--- a/tests/auto/xml/sax/qxmlsimplereader/xmldocs/not-wf/sa/170.xml.ref
+++ b/tests/auto/xml/sax/qxmlsimplereader/xmldocs/not-wf/sa/170.xml.ref
@@ -1,6 +1,6 @@
setDocumentLocator(locator={columnNumber=1, lineNumber=1})
startDocument()
startElement(namespaceURI="", localName="doc", qName="doc", atts=[])
- characters(ch="�")
+ characters(ch="����")
endElement(namespaceURI="", localName="doc", qName="doc")
endDocument()