diff options
author | Thiago Macieira <thiago.macieira@intel.com> | 2024-02-15 15:04:18 -0800 |
---|---|---|
committer | Thiago Macieira <thiago.macieira@intel.com> | 2024-04-18 14:35:09 -0700 |
commit | 94c62e322264e2e7d61193ae74ba8556a330385c (patch) | |
tree | 294690436c07cb22159545f38daed4538c51e8fe /tests/auto | |
parent | 17c964c4e874ab59a2af7859ae23f5cb4ad01d36 (diff) |
QXmlStreamWriter: decode UTF-8 into code points
We were iterating over code *units* and that yielded wrong results. The
one from the bug report was simply caused by the fact that
QUtf8StringView::value_type is char, which is signed on x86, so the
expression:
*it <= u'\x1F'
was true for all non-Latin1 content.
But in attempting to fix this, I needed to do the proper UTF-8 decoding,
as otherwise we wouldn't catch non-Latin1 sequences and such.
[ChangeLog][QtCore][QXmlStreamWriter] Fixed a bug that caused the class
to fail to write UTF-8 strings with non-US-ASCII content when passed as
a QUtf8StringView.
Fixes: QTBUG-122241
Pick-to: 6.5 6.6 6.7
Change-Id: I83dda2d36c904517b3c0fffd17b42bbf09a493d0
Reviewed-by: Mate Barany <mate.barany@qt.io>
Diffstat (limited to 'tests/auto')
-rw-r--r-- | tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp | 125 |
1 files changed, 125 insertions, 0 deletions
diff --git a/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp b/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp index 436ff676f6..89b616b56e 100644 --- a/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp +++ b/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp @@ -572,6 +572,12 @@ private slots: void hasAttribute() const; void writeWithUtf8Codec() const; void writeWithStandalone() const; + void writeCharacters_data() const; + void writeCharacters() const; + void writeAttribute_data() const; + void writeAttribute() const; + void writeBadCharactersUtf8_data() const; + void writeBadCharactersUtf8() const; void entitiesAndWhitespace_1() const; void entitiesAndWhitespace_2() const; void testFalsePrematureError() const; @@ -1407,6 +1413,125 @@ void tst_QXmlStream::writeWithStandalone() const } } +static void writeCharacters_data_common() +{ + QTest::addColumn<QString>("input"); + QTest::addColumn<QString>("output"); + + QTest::newRow("empty") << QString() << QString(); + + // invalid content + QTest::newRow("null-character") << u"\0"_s << QString(); + QTest::newRow("vertical-tab") << "\v" << QString(); + QTest::newRow("form-feed") << "\f" << QString(); + QTest::newRow("esc") << "\x1f" << QString(); + QTest::newRow("U+FFFE") << u"\xfffe"_s << QString(); + QTest::newRow("U+FFFF") << u"\xffff"_s << QString(); + + // simple strings + QTest::newRow("us-ascii") << "Hello, world" << "Hello, world"; + QTest::newRow("latin1") << "Bokmål" << "Bokmål"; + QTest::newRow("nonlatin1") << "Ελληνικά" << "Ελληνικά"; + QTest::newRow("nonbmp") << u"\U00010000"_s << u"\U00010000"_s; + + // escaped content + QTest::newRow("less-than") << "<" << "<"; + QTest::newRow("greater-than") << ">" << ">"; + QTest::newRow("ampersand") << "&" << "&"; + QTest::newRow("quote") << "\"" << """; +} + +template <typename Execute, typename Transform> +static void writeCharacters_common(Execute &&exec, Transform &&transform) +{ + QFETCH(QString, input); + QFETCH(QString, output); + QStringView utf16 = input; + QByteArray utf8ba = input.toUtf8(); + QUtf8StringView utf8(utf8ba); + + // may be invalid if input is not Latin1 + QByteArray l1ba = input.toLatin1(); + QLatin1StringView l1(l1ba); + if (l1 != input) + l1 = {}; + + auto write = [&](auto input) -> std::optional<QString> { + QString result; + QXmlStreamWriter writer(&result); + writer.writeStartElement("a"); + exec(writer, input); + writer.writeEndElement(); + if (writer.hasError()) + return std::nullopt; + return result; + }; + + if (input.isNull() != output.isNull()) { + // error + QCOMPARE(write(utf16), std::nullopt); + QCOMPARE(write(utf8), std::nullopt); + if (!l1.isEmpty()) + QCOMPARE(write(l1), std::nullopt); + } else { + output = transform(output); + QCOMPARE(write(utf16), output); + QCOMPARE(write(utf8), output); + if (!l1.isEmpty()) + QCOMPARE(write(l1), output); + } +} + +void tst_QXmlStream::writeCharacters_data() const +{ + writeCharacters_data_common(); + QTest::newRow("tab") << "\t" << "\t"; + QTest::newRow("newline") << "\n" << "\n"; + QTest::newRow("carriage-return") << "\r" << "\r"; +} + +void tst_QXmlStream::writeCharacters() const +{ + auto exec = [](QXmlStreamWriter &writer, auto input) { + writer.writeCharacters(input); + }; + auto transform = [](auto output) { return "<a>" + output + "</a>"; }; + writeCharacters_common(exec, transform); +} + +void tst_QXmlStream::writeAttribute_data() const +{ + writeCharacters_data_common(); + QTest::newRow("tab") << "\t" << "	"; + QTest::newRow("newline") << "\n" << " "; + QTest::newRow("carriage-return") << "\r" << " "; +} + +void tst_QXmlStream::writeAttribute() const +{ + auto exec = [](QXmlStreamWriter &writer, auto input) { + writer.writeAttribute("b", input); + }; + auto transform = [](auto output) { return "<a b=\"" + output + "\"/>"; }; + writeCharacters_common(exec, transform); +} + +#include "../../io/qurlinternal/utf8data.cpp" +void tst_QXmlStream::writeBadCharactersUtf8_data() const +{ + QTest::addColumn<QByteArray>("input"); + loadInvalidUtf8Rows(); +} + +void tst_QXmlStream::writeBadCharactersUtf8() const +{ + QFETCH(QByteArray, input); + QString target; + QXmlStreamWriter writer(&target); + writer.writeTextElement("a", QUtf8StringView(input)); + QVERIFY(writer.hasError()); +} + void tst_QXmlStream::entitiesAndWhitespace_1() const { QXmlStreamReader reader(QLatin1String("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\"><test>&extEnt;</test>")); |