summaryrefslogtreecommitdiffstats
path: root/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp
diff options
context:
space:
mode:
authorShawn Rutledge <shawn.rutledge@qt.io>2024-03-01 00:39:50 -0700
committerShawn Rutledge <shawn.rutledge@qt.io>2024-03-26 00:47:37 -0700
commit0281005a711c3635114ba92f778d0e9c8a89027d (patch)
tree5705f86f23c9ea20105c1d447b0cf73ba74b73fc /tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp
parentae8031b5e72032f9e2884c18cd72639acfd0d1a4 (diff)
QTextMarkdownWriter: escape all backslashes
A literal backslash needs to be doubled so that the parser doesn't treat it as escaping the following character when the markdown is read back. In ca4774131b9b8ee40b4d7f5c1ba296af4700207f we tried to limit it to backslashes that were not already escaped. In case someone really needs a longer series of backslashes, it's more correct to escape them all; but this comes with the risk that if they do not get un-escaped by the markdown parser in some scenario, repeated round-trip saving and loading could multiply them excessively. So we also add a lot of tests to try to verify that this is safe. Task-number: QTBUG-96051 Fixes: QTBUG-122083 Pick-to: 6.7 Change-Id: I64f610d24e99f67ebdc30d5ab5c6cf3985aec5ec Reviewed-by: Axel Spoerl <axel.spoerl@qt.io>
Diffstat (limited to 'tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp')
-rw-r--r--tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp103
1 files changed, 102 insertions, 1 deletions
diff --git a/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp b/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp
index d0ae34d67d..0d261bc27e 100644
--- a/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp
+++ b/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp
@@ -45,6 +45,8 @@ private slots:
void rewriteDocument();
void fromHtml_data();
void fromHtml();
+ void fromPlainTextAndBack_data();
+ void fromPlainTextAndBack();
void escapeSpecialCharacters_data();
void escapeSpecialCharacters();
@@ -839,6 +841,27 @@ void tst_QTextMarkdownWriter::fromHtml_data()
QTest::newRow("table with backslash in cell") << // QTBUG-96051
"<table><tr><td>1011011 [</td><td>1011100 backslash \\</td></tr></table>" <<
"|1011011 [|1011100 backslash \\\\|";
+ // https://spec.commonmark.org/0.31.2/#example-12
+ // escaping punctuation is ok, but QTextMarkdownWriter currently doesn't do that (which is also ok)
+ QTest::newRow("punctuation") <<
+ R"(<p>!&quot;#$%&amp;'()*+,-./:;&lt;=&gt;?@[\]^_`{|}~</p>)" <<
+ R"(!"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~)";
+ // https://spec.commonmark.org/0.31.2/#example-14
+ QTest::newRow("backslash asterisk no emphasis") << // QTBUG-122083
+ R"(\*no emphasis*)" <<
+ R"(\\\*no emphasis*)";
+ // https://spec.commonmark.org/0.31.2/#example-15
+ QTest::newRow("backslash before emphasis") <<
+ R"(\<em>emphasis</em>)" <<
+ R"(\\*emphasis*)";
+ // https://spec.commonmark.org/0.31.2/#example-20
+ QTest::newRow("backslash-asterisk in autolink") <<
+ R"(<p><a href="https://example.com?find=\\*">https://example.com?find=\*</a></p>)" <<
+ R"(<https://example.com?find=\\*>)";
+ // https://spec.commonmark.org/0.31.2/#example-24
+ QTest::newRow("plus in fenced code lang") <<
+ "<pre class=\"language-foo+bar\">foo</pre>" <<
+ "```foo+bar\nfoo\n```";
}
void tst_QTextMarkdownWriter::fromHtml()
@@ -865,12 +888,90 @@ void tst_QTextMarkdownWriter::fromHtml()
QCOMPARE(output, expectedOutput);
}
+void tst_QTextMarkdownWriter::fromPlainTextAndBack_data()
+{
+ QTest::addColumn<QString>("input");
+ QTest::addColumn<QString>("expectedMarkdown");
+
+ // tests to verify that fixing QTBUG-122083 is safe
+ QTest::newRow("single backslashes") <<
+ R"(\ again: \ not esc: \* \-\-\ \*abc*)" <<
+ R"(\\ again: \\ not esc: \\* \\-\\-\\ \\\*abc*)";
+ // https://spec.commonmark.org/0.31.2/#example-12
+ QTest::newRow("punctuation") <<
+ R"(!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)" <<
+ R"(!"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~)";
+ // https://spec.commonmark.org/0.31.2/#example-13
+ QTest::newRow("literal backslashes") <<
+ QString(uR"(\→\A\a\ \3\φ\«)") <<
+ "\\\\\u2192\\\\A\\\\a\\\\ \\\\3\\\\\u03C6\\\\\u00AB";
+ // https://spec.commonmark.org/0.31.2/#example-14
+ QTest::newRow("escape to avoid em") <<
+ R"(*not emphasized*)" <<
+ R"(\*not emphasized*)";
+ QTest::newRow("escape to avoid html") <<
+ R"(<br/> not a tag)" <<
+ R"(\<br/> not a tag)";
+ QTest::newRow("escape to avoid link") <<
+ R"([not a link](/foo))" <<
+ R"(\[not a link](/foo))";
+ QTest::newRow("escape to avoid mono") <<
+ R"(`not code`)" <<
+ R"(\`not code`)";
+ QTest::newRow("escape to avoid num list") <<
+ R"(1. not a list)" <<
+ R"(1\. not a list)";
+ QTest::newRow("escape to avoid list") <<
+ R"(* not a list)" <<
+ R"(\* not a list)";
+ QTest::newRow("escape to avoid heading") <<
+ R"(# not a heading)" <<
+ R"(\# not a heading)";
+ QTest::newRow("escape to avoid reflink") <<
+ R"([foo]: /url "not a reference")" <<
+ R"(\[foo]: /url "not a reference")";
+ QTest::newRow("escape to avoid entity") <<
+ R"(&ouml; not a character entity)" <<
+ R"(\&ouml; not a character entity)";
+ // end of tests to verify that fixing QTBUG-122083 is safe
+ // (it's ok to add unrelated plain-to-markdown-to-plaintext cases later)
+}
+
+void tst_QTextMarkdownWriter::fromPlainTextAndBack()
+{
+ QFETCH(QString, input);
+ QFETCH(QString, expectedMarkdown);
+
+ document->setPlainText(input);
+ QString output = documentToUnixMarkdown();
+
+#ifdef DEBUG_WRITE_OUTPUT
+ {
+ QFile out("/tmp/" + QLatin1String(QTest::currentDataTag()) + ".md");
+ out.open(QFile::WriteOnly);
+ out.write(output.toUtf8());
+ out.close();
+ }
+#endif
+
+ output = output.trimmed();
+ expectedMarkdown = expectedMarkdown.trimmed();
+ if (output != expectedMarkdown && (isMainFontFixed() || isFixedFontProportional()))
+ QSKIP("", "fixed main font or proportional fixed font (QTBUG-103484)");
+ QCOMPARE(output, expectedMarkdown);
+ QCOMPARE(document->toPlainText(), input);
+ document->setMarkdown(output);
+ QCOMPARE(document->toPlainText(), input);
+ if (document->blockCount() == 1)
+ QCOMPARE(document->firstBlock().text(), input);
+}
+
void tst_QTextMarkdownWriter::escapeSpecialCharacters_data()
{
QTest::addColumn<QString>("input");
QTest::addColumn<QString>("expectedOutput");
- QTest::newRow("backslash") << "foo \\ bar \\\\ baz \\" << "foo \\\\ bar \\\\ baz \\\\";
+ QTest::newRow("backslash") << "foo \\ bar \\\\ baz \\" << "foo \\\\ bar \\\\\\\\ baz \\\\";
QTest::newRow("not emphasized") << "*normal* **normal too**" << "\\*normal* \\**normal too**";
QTest::newRow("not code") << "`normal` `normal too`" << "\\`normal` \\`normal too`";
QTest::newRow("code fence") << "```not a fence; ``` no risk here; ```not a fence" // TODO slightly inconsistent