From e96a311334a5c70d5ffcc2ca5c10919952b99636 Mon Sep 17 00:00:00 2001 From: Lars Knoll Date: Tue, 8 Sep 2020 14:59:05 +0200 Subject: Use UTF-8 when converting 8 bit data in QTextStream This was overlooked when doing the conversion to use UTF-8 as the standard 8 bit encoding for text. Fixes: QTBUG-54942 Change-Id: Ib7b1b75b4d694648ab7143f6930b6bb1dcad19c9 Reviewed-by: Thiago Macieira --- src/corelib/serialization/qtextstream.cpp | 30 ++++++++++++---------- src/corelib/serialization/qtextstream_p.h | 1 + .../serialization/qtextstream/tst_qtextstream.cpp | 4 +-- tests/auto/corelib/text/qstring/tst_qstring.cpp | 10 +++++++- 4 files changed, 29 insertions(+), 16 deletions(-) diff --git a/src/corelib/serialization/qtextstream.cpp b/src/corelib/serialization/qtextstream.cpp index 68ead1068b..3c69f3398f 100644 --- a/src/corelib/serialization/qtextstream.cpp +++ b/src/corelib/serialization/qtextstream.cpp @@ -945,6 +945,11 @@ void QTextStreamPrivate::putString(QLatin1String data, bool number) } } +void QTextStreamPrivate::putString(QUtf8StringView data, bool number) +{ + putString(data.toString(), number); +} + /*! Constructs a QTextStream. Before you can use it for reading or writing, you must assign a device or a string. @@ -2186,7 +2191,7 @@ QTextStream &QTextStream::operator>>(QString &str) /*! \overload - Converts the word to ISO-8859-1, then stores it in \a array. + Converts the word to UTF-8, then stores it in \a array. \sa QString::toLatin1() */ @@ -2195,7 +2200,6 @@ QTextStream &QTextStream::operator>>(QByteArray &array) Q_D(QTextStream); CHECK_VALID_STREAM(*this); - array.clear(); d->scan(nullptr, nullptr, 0, QTextStreamPrivate::NotSpace); d->consumeLastToken(); @@ -2203,11 +2207,11 @@ QTextStream &QTextStream::operator>>(QByteArray &array) int length; if (!d->scan(&ptr, &length, 0, QTextStreamPrivate::Space)) { setStatus(ReadPastEnd); + array.clear(); return *this; } - for (int i = 0; i < length; ++i) - array += ptr[i].toLatin1(); + array = QStringView(ptr, length).toUtf8(); d->consumeLastToken(); return *this; @@ -2216,13 +2220,14 @@ QTextStream &QTextStream::operator>>(QByteArray &array) /*! \overload - Stores the word in \a c, terminated by a '\\0' character. If no word is - available, only the '\\0' character is stored. + Converts the word to UTF-8 and stores it in \a c, terminated by a '\\0' + character. If no word is available, only the '\\0' character is stored. Warning: Although convenient, this operator is dangerous and must be used with care. QTextStream assumes that \a c points to a buffer with enough space to hold the word. If the buffer is too - small, your application may crash. + small, your application may crash. For a word consisting of \c{n} QChars, + the buffer needs to be at least \c{3*n+1} characters long. If possible, use the QByteArray operator instead. */ @@ -2241,9 +2246,9 @@ QTextStream &QTextStream::operator>>(char *c) return *this; } - for (int i = 0; i < length; ++i) - *c++ = ptr[i].toLatin1(); - *c = '\0'; + QStringEncoder encoder(QStringConverter::Utf8); + char *e = encoder.appendToBuffer(c, ptr, length); + *e = '\0'; d->consumeLastToken(); return *this; } @@ -2559,7 +2564,7 @@ QTextStream &QTextStream::operator<<(const QByteArray &array) \overload Writes the constant string pointed to by \a string to the stream. \a - string is assumed to be in ISO-8859-1 encoding. This operator + string is assumed to be in UTF-8 encoding. This operator is convenient when working with constant string data. Example: \snippet code/src_corelib_io_qtextstream.cpp 8 @@ -2572,8 +2577,7 @@ QTextStream &QTextStream::operator<<(const char *string) { Q_D(QTextStream); CHECK_VALID_STREAM(*this); - // ### Qt6: consider changing to UTF-8 - d->putString(QLatin1String(string)); + d->putString(QUtf8StringView(string)); return *this; } diff --git a/src/corelib/serialization/qtextstream_p.h b/src/corelib/serialization/qtextstream_p.h index 067a5ec94a..7d6889aa70 100644 --- a/src/corelib/serialization/qtextstream_p.h +++ b/src/corelib/serialization/qtextstream_p.h @@ -177,6 +177,7 @@ public: inline void putString(const QString &ch, bool number = false) { putString(ch.constData(), ch.length(), number); } void putString(const QChar *data, int len, bool number = false); void putString(QLatin1String data, bool number = false); + void putString(QUtf8StringView data, bool number = false); inline void putChar(QChar ch); void putNumber(qulonglong number, bool negative); diff --git a/tests/auto/corelib/serialization/qtextstream/tst_qtextstream.cpp b/tests/auto/corelib/serialization/qtextstream/tst_qtextstream.cpp index e1fd7a7147..f9de9eeebf 100644 --- a/tests/auto/corelib/serialization/qtextstream/tst_qtextstream.cpp +++ b/tests/auto/corelib/serialization/qtextstream/tst_qtextstream.cpp @@ -2125,9 +2125,9 @@ void tst_QTextStream::generateStringData(bool for_QString) if (!for_QString) { QTest::newRow("utf16-BE (empty)") << QByteArray("\xff\xfe", 2) << QByteArray() << QString(); - QTest::newRow("utf16-BE (corrupt)") << QByteArray("\xff", 1) << QByteArray("\xff") << QString::fromLatin1("\xff"); + QTest::newRow("utf16-BE (corrupt)") << QByteArray("\xff", 1) << QByteArray("\xc3\xbf") << QString::fromUtf8("\xc3\xbf"); QTest::newRow("utf16-LE (empty)") << QByteArray("\xfe\xff", 2) << QByteArray() << QString(); - QTest::newRow("utf16-LE (corrupt)") << QByteArray("\xfe", 1) << QByteArray("\xfe") << QString::fromLatin1("\xfe"); + QTest::newRow("utf16-LE (corrupt)") << QByteArray("\xfe", 1) << QByteArray("\xc3\xbe") << QString::fromUtf8("\xc3\xbe"); } } diff --git a/tests/auto/corelib/text/qstring/tst_qstring.cpp b/tests/auto/corelib/text/qstring/tst_qstring.cpp index 8f77d03226..6e1b47a839 100644 --- a/tests/auto/corelib/text/qstring/tst_qstring.cpp +++ b/tests/auto/corelib/text/qstring/tst_qstring.cpp @@ -3963,8 +3963,16 @@ void tst_QString::check_QTextIOStream() { a=""; QTextStream ts(&a); + // invalid Utf8 ts << "pi \261= " << 3.125; - QCOMPARE(a, QString::fromLatin1("pi \261= 3.125")); + QCOMPARE(a, QString::fromUtf16(u"pi \xfffd= 3.125")); + } + { + a=""; + QTextStream ts(&a); + // valid Utf8 + ts << "pi ø= " << 3.125; + QCOMPARE(a, QString::fromUtf16(u"pi ø= 3.125")); } { a="123 456"; -- cgit v1.2.3