summaryrefslogtreecommitdiffstats
path: root/tests/auto
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2024-02-15 15:04:18 -0800
committerThiago Macieira <thiago.macieira@intel.com>2024-04-18 14:35:09 -0700
commit94c62e322264e2e7d61193ae74ba8556a330385c (patch)
tree294690436c07cb22159545f38daed4538c51e8fe /tests/auto
parent17c964c4e874ab59a2af7859ae23f5cb4ad01d36 (diff)
QXmlStreamWriter: decode UTF-8 into code points
We were iterating over code *units* and that yielded wrong results. The one from the bug report was simply caused by the fact that QUtf8StringView::value_type is char, which is signed on x86, so the expression: *it <= u'\x1F' was true for all non-Latin1 content. But in attempting to fix this, I needed to do the proper UTF-8 decoding, as otherwise we wouldn't catch non-Latin1 sequences and such. [ChangeLog][QtCore][QXmlStreamWriter] Fixed a bug that caused the class to fail to write UTF-8 strings with non-US-ASCII content when passed as a QUtf8StringView. Fixes: QTBUG-122241 Pick-to: 6.5 6.6 6.7 Change-Id: I83dda2d36c904517b3c0fffd17b42bbf09a493d0 Reviewed-by: Mate Barany <mate.barany@qt.io>
Diffstat (limited to 'tests/auto')
-rw-r--r--tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp125
1 files changed, 125 insertions, 0 deletions
diff --git a/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp b/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp
index 436ff676f6..89b616b56e 100644
--- a/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp
+++ b/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp
@@ -572,6 +572,12 @@ private slots:
void hasAttribute() const;
void writeWithUtf8Codec() const;
void writeWithStandalone() const;
+ void writeCharacters_data() const;
+ void writeCharacters() const;
+ void writeAttribute_data() const;
+ void writeAttribute() const;
+ void writeBadCharactersUtf8_data() const;
+ void writeBadCharactersUtf8() const;
void entitiesAndWhitespace_1() const;
void entitiesAndWhitespace_2() const;
void testFalsePrematureError() const;
@@ -1407,6 +1413,125 @@ void tst_QXmlStream::writeWithStandalone() const
}
}
+static void writeCharacters_data_common()
+{
+ QTest::addColumn<QString>("input");
+ QTest::addColumn<QString>("output");
+
+ QTest::newRow("empty") << QString() << QString();
+
+ // invalid content
+ QTest::newRow("null-character") << u"\0"_s << QString();
+ QTest::newRow("vertical-tab") << "\v" << QString();
+ QTest::newRow("form-feed") << "\f" << QString();
+ QTest::newRow("esc") << "\x1f" << QString();
+ QTest::newRow("U+FFFE") << u"\xfffe"_s << QString();
+ QTest::newRow("U+FFFF") << u"\xffff"_s << QString();
+
+ // simple strings
+ QTest::newRow("us-ascii") << "Hello, world" << "Hello, world";
+ QTest::newRow("latin1") << "Bokmål" << "Bokmål";
+ QTest::newRow("nonlatin1") << "Ελληνικά" << "Ελληνικά";
+ QTest::newRow("nonbmp") << u"\U00010000"_s << u"\U00010000"_s;
+
+ // escaped content
+ QTest::newRow("less-than") << "<" << "&lt;";
+ QTest::newRow("greater-than") << ">" << "&gt;";
+ QTest::newRow("ampersand") << "&" << "&amp;";
+ QTest::newRow("quote") << "\"" << "&quot;";
+}
+
+template <typename Execute, typename Transform>
+static void writeCharacters_common(Execute &&exec, Transform &&transform)
+{
+ QFETCH(QString, input);
+ QFETCH(QString, output);
+ QStringView utf16 = input;
+ QByteArray utf8ba = input.toUtf8();
+ QUtf8StringView utf8(utf8ba);
+
+ // may be invalid if input is not Latin1
+ QByteArray l1ba = input.toLatin1();
+ QLatin1StringView l1(l1ba);
+ if (l1 != input)
+ l1 = {};
+
+ auto write = [&](auto input) -> std::optional<QString> {
+ QString result;
+ QXmlStreamWriter writer(&result);
+ writer.writeStartElement("a");
+ exec(writer, input);
+ writer.writeEndElement();
+ if (writer.hasError())
+ return std::nullopt;
+ return result;
+ };
+
+ if (input.isNull() != output.isNull()) {
+ // error
+ QCOMPARE(write(utf16), std::nullopt);
+ QCOMPARE(write(utf8), std::nullopt);
+ if (!l1.isEmpty())
+ QCOMPARE(write(l1), std::nullopt);
+ } else {
+ output = transform(output);
+ QCOMPARE(write(utf16), output);
+ QCOMPARE(write(utf8), output);
+ if (!l1.isEmpty())
+ QCOMPARE(write(l1), output);
+ }
+}
+
+void tst_QXmlStream::writeCharacters_data() const
+{
+ writeCharacters_data_common();
+ QTest::newRow("tab") << "\t" << "\t";
+ QTest::newRow("newline") << "\n" << "\n";
+ QTest::newRow("carriage-return") << "\r" << "\r";
+}
+
+void tst_QXmlStream::writeCharacters() const
+{
+ auto exec = [](QXmlStreamWriter &writer, auto input) {
+ writer.writeCharacters(input);
+ };
+ auto transform = [](auto output) { return "<a>" + output + "</a>"; };
+ writeCharacters_common(exec, transform);
+}
+
+void tst_QXmlStream::writeAttribute_data() const
+{
+ writeCharacters_data_common();
+ QTest::newRow("tab") << "\t" << "&#9;";
+ QTest::newRow("newline") << "\n" << "&#10;";
+ QTest::newRow("carriage-return") << "\r" << "&#13;";
+}
+
+void tst_QXmlStream::writeAttribute() const
+{
+ auto exec = [](QXmlStreamWriter &writer, auto input) {
+ writer.writeAttribute("b", input);
+ };
+ auto transform = [](auto output) { return "<a b=\"" + output + "\"/>"; };
+ writeCharacters_common(exec, transform);
+}
+
+#include "../../io/qurlinternal/utf8data.cpp"
+void tst_QXmlStream::writeBadCharactersUtf8_data() const
+{
+ QTest::addColumn<QByteArray>("input");
+ loadInvalidUtf8Rows();
+}
+
+void tst_QXmlStream::writeBadCharactersUtf8() const
+{
+ QFETCH(QByteArray, input);
+ QString target;
+ QXmlStreamWriter writer(&target);
+ writer.writeTextElement("a", QUtf8StringView(input));
+ QVERIFY(writer.hasError());
+}
+
void tst_QXmlStream::entitiesAndWhitespace_1() const
{
QXmlStreamReader reader(QLatin1String("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\"><test>&extEnt;</test>"));