diff options
author | Shawn Rutledge <shawn.rutledge@qt.io> | 2024-03-01 00:39:50 -0700 |
---|---|---|
committer | Shawn Rutledge <shawn.rutledge@qt.io> | 2024-03-26 00:47:37 -0700 |
commit | 0281005a711c3635114ba92f778d0e9c8a89027d (patch) | |
tree | 5705f86f23c9ea20105c1d447b0cf73ba74b73fc /tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp | |
parent | ae8031b5e72032f9e2884c18cd72639acfd0d1a4 (diff) |
QTextMarkdownWriter: escape all backslashes
A literal backslash needs to be doubled so that the parser doesn't treat
it as escaping the following character when the markdown is read back.
In ca4774131b9b8ee40b4d7f5c1ba296af4700207f we tried to limit it to
backslashes that were not already escaped. In case someone really needs
a longer series of backslashes, it's more correct to escape them all;
but this comes with the risk that if they do not get un-escaped by the
markdown parser in some scenario, repeated round-trip saving and loading
could multiply them excessively. So we also add a lot of tests to try
to verify that this is safe.
Task-number: QTBUG-96051
Fixes: QTBUG-122083
Pick-to: 6.7
Change-Id: I64f610d24e99f67ebdc30d5ab5c6cf3985aec5ec
Reviewed-by: Axel Spoerl <axel.spoerl@qt.io>
Diffstat (limited to 'tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp')
-rw-r--r-- | tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp | 103 |
1 files changed, 102 insertions, 1 deletions
diff --git a/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp b/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp index d0ae34d67d..0d261bc27e 100644 --- a/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp +++ b/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp @@ -45,6 +45,8 @@ private slots: void rewriteDocument(); void fromHtml_data(); void fromHtml(); + void fromPlainTextAndBack_data(); + void fromPlainTextAndBack(); void escapeSpecialCharacters_data(); void escapeSpecialCharacters(); @@ -839,6 +841,27 @@ void tst_QTextMarkdownWriter::fromHtml_data() QTest::newRow("table with backslash in cell") << // QTBUG-96051 "<table><tr><td>1011011 [</td><td>1011100 backslash \\</td></tr></table>" << "|1011011 [|1011100 backslash \\\\|"; + // https://spec.commonmark.org/0.31.2/#example-12 + // escaping punctuation is ok, but QTextMarkdownWriter currently doesn't do that (which is also ok) + QTest::newRow("punctuation") << + R"(<p>!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~</p>)" << + R"(!"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~)"; + // https://spec.commonmark.org/0.31.2/#example-14 + QTest::newRow("backslash asterisk no emphasis") << // QTBUG-122083 + R"(\*no emphasis*)" << + R"(\\\*no emphasis*)"; + // https://spec.commonmark.org/0.31.2/#example-15 + QTest::newRow("backslash before emphasis") << + R"(\<em>emphasis</em>)" << + R"(\\*emphasis*)"; + // https://spec.commonmark.org/0.31.2/#example-20 + QTest::newRow("backslash-asterisk in autolink") << + R"(<p><a href="https://example.com?find=\\*">https://example.com?find=\*</a></p>)" << + R"(<https://example.com?find=\\*>)"; + // https://spec.commonmark.org/0.31.2/#example-24 + QTest::newRow("plus in fenced code lang") << + "<pre class=\"language-foo+bar\">foo</pre>" << + "```foo+bar\nfoo\n```"; } void tst_QTextMarkdownWriter::fromHtml() @@ -865,12 +888,90 @@ void tst_QTextMarkdownWriter::fromHtml() QCOMPARE(output, expectedOutput); } +void tst_QTextMarkdownWriter::fromPlainTextAndBack_data() +{ + QTest::addColumn<QString>("input"); + QTest::addColumn<QString>("expectedMarkdown"); + + // tests to verify that fixing QTBUG-122083 is safe + QTest::newRow("single backslashes") << + R"(\ again: \ not esc: \* \-\-\ \*abc*)" << + R"(\\ again: \\ not esc: \\* \\-\\-\\ \\\*abc*)"; + // https://spec.commonmark.org/0.31.2/#example-12 + QTest::newRow("punctuation") << + R"(!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)" << + R"(!"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~)"; + // https://spec.commonmark.org/0.31.2/#example-13 + QTest::newRow("literal backslashes") << + QString(uR"(\→\A\a\ \3\φ\«)") << + "\\\\\u2192\\\\A\\\\a\\\\ \\\\3\\\\\u03C6\\\\\u00AB"; + // https://spec.commonmark.org/0.31.2/#example-14 + QTest::newRow("escape to avoid em") << + R"(*not emphasized*)" << + R"(\*not emphasized*)"; + QTest::newRow("escape to avoid html") << + R"(<br/> not a tag)" << + R"(\<br/> not a tag)"; + QTest::newRow("escape to avoid link") << + R"([not a link](/foo))" << + R"(\[not a link](/foo))"; + QTest::newRow("escape to avoid mono") << + R"(`not code`)" << + R"(\`not code`)"; + QTest::newRow("escape to avoid num list") << + R"(1. not a list)" << + R"(1\. not a list)"; + QTest::newRow("escape to avoid list") << + R"(* not a list)" << + R"(\* not a list)"; + QTest::newRow("escape to avoid heading") << + R"(# not a heading)" << + R"(\# not a heading)"; + QTest::newRow("escape to avoid reflink") << + R"([foo]: /url "not a reference")" << + R"(\[foo]: /url "not a reference")"; + QTest::newRow("escape to avoid entity") << + R"(ö not a character entity)" << + R"(\ö not a character entity)"; + // end of tests to verify that fixing QTBUG-122083 is safe + // (it's ok to add unrelated plain-to-markdown-to-plaintext cases later) +} + +void tst_QTextMarkdownWriter::fromPlainTextAndBack() +{ + QFETCH(QString, input); + QFETCH(QString, expectedMarkdown); + + document->setPlainText(input); + QString output = documentToUnixMarkdown(); + +#ifdef DEBUG_WRITE_OUTPUT + { + QFile out("/tmp/" + QLatin1String(QTest::currentDataTag()) + ".md"); + out.open(QFile::WriteOnly); + out.write(output.toUtf8()); + out.close(); + } +#endif + + output = output.trimmed(); + expectedMarkdown = expectedMarkdown.trimmed(); + if (output != expectedMarkdown && (isMainFontFixed() || isFixedFontProportional())) + QSKIP("", "fixed main font or proportional fixed font (QTBUG-103484)"); + QCOMPARE(output, expectedMarkdown); + QCOMPARE(document->toPlainText(), input); + document->setMarkdown(output); + QCOMPARE(document->toPlainText(), input); + if (document->blockCount() == 1) + QCOMPARE(document->firstBlock().text(), input); +} + void tst_QTextMarkdownWriter::escapeSpecialCharacters_data() { QTest::addColumn<QString>("input"); QTest::addColumn<QString>("expectedOutput"); - QTest::newRow("backslash") << "foo \\ bar \\\\ baz \\" << "foo \\\\ bar \\\\ baz \\\\"; + QTest::newRow("backslash") << "foo \\ bar \\\\ baz \\" << "foo \\\\ bar \\\\\\\\ baz \\\\"; QTest::newRow("not emphasized") << "*normal* **normal too**" << "\\*normal* \\**normal too**"; QTest::newRow("not code") << "`normal` `normal too`" << "\\`normal` \\`normal too`"; QTest::newRow("code fence") << "```not a fence; ``` no risk here; ```not a fence" // TODO slightly inconsistent |