summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorShawn Rutledge <shawn.rutledge@qt.io>2024-03-01 00:39:50 -0700
committerQt Cherry-pick Bot <cherrypick_bot@qt-project.org>2024-03-05 04:04:01 +0000
commit106442586e6ad5d8b2ebf0151cc074da8819e4a5 (patch)
treec25a2d77bae663a880287b30c53dfd240ebbf672
parent2d6034a9c8d1e08d93c856efb99ca9f6a4d548c8 (diff)
QTextMarkdownWriter: escape special characters (line or word prefix)
Try to avoid writing anything that the parser would misinterpret. Escape pre-existing backslashes, but not those that are already escaped. Optimize maybeEscapeFirstChar() slightly and apply it to every line of output (except in code blocks), not only to new lines created by word-wrapping. Since it would be hard to do this without using regular expressions, the markdown writer feature now depends on the regex feature. Fixes: QTBUG-96051 Fixes: QTBUG-122083 Change-Id: I8d95366501fd31441829081c668f11a3a3a23fe2 Reviewed-by: Axel Spoerl <axel.spoerl@qt.io> Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org> (cherry picked from commit ca4774131b9b8ee40b4d7f5c1ba296af4700207f) Reviewed-by: Qt Cherry-pick Bot <cherrypick_bot@qt-project.org>
-rw-r--r--src/gui/configure.cmake1
-rw-r--r--src/gui/text/qtextmarkdownwriter.cpp54
-rw-r--r--tests/auto/gui/text/qtextmarkdownimporter/tst_qtextmarkdownimporter.cpp2
-rw-r--r--tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp66
4 files changed, 115 insertions, 8 deletions
diff --git a/src/gui/configure.cmake b/src/gui/configure.cmake
index 51b4bc515a..4affd7b30a 100644
--- a/src/gui/configure.cmake
+++ b/src/gui/configure.cmake
@@ -1012,6 +1012,7 @@ qt_feature("system-textmarkdownreader" PUBLIC
qt_feature("textmarkdownwriter" PUBLIC
SECTION "Kernel"
LABEL "MarkdownWriter"
+ CONDITION QT_FEATURE_regularexpression
PURPOSE "Provides a Markdown (CommonMark and GitHub) writer"
)
qt_feature("textodfwriter" PUBLIC
diff --git a/src/gui/text/qtextmarkdownwriter.cpp b/src/gui/text/qtextmarkdownwriter.cpp
index ad4859f398..3c05fed6a6 100644
--- a/src/gui/text/qtextmarkdownwriter.cpp
+++ b/src/gui/text/qtextmarkdownwriter.cpp
@@ -11,6 +11,7 @@
#include "qtextcursor.h"
#include "qtextimagehandler_p.h"
#include "qloggingcategory.h"
+#include <QtCore/QRegularExpression>
#if QT_CONFIG(itemmodel)
#include "qabstractitemmodel.h"
#endif
@@ -273,15 +274,58 @@ static int adjacentBackticksCount(const QString &s)
return ret;
}
+/*! \internal
+ Escape anything at the beginning of a line of markdown that would be
+ misinterpreted by a markdown parser, including any period that follows a
+ number (to avoid misinterpretation as a numbered list item).
+ https://spec.commonmark.org/0.31.2/#backslash-escapes
+*/
static void maybeEscapeFirstChar(QString &s)
{
+ static const QRegularExpression numericListRe(uR"(\d+([\.)])\s)"_s);
+ static const QLatin1StringView specialFirstCharacters("#*+-");
+
QString sTrimmed = s.trimmed();
if (sTrimmed.isEmpty())
return;
- char firstChar = sTrimmed.at(0).toLatin1();
- if (firstChar == '*' || firstChar == '+' || firstChar == '-') {
- int i = s.indexOf(QLatin1Char(firstChar));
+ QChar firstChar = sTrimmed.at(0);
+ if (specialFirstCharacters.contains(firstChar)) {
+ int i = s.indexOf(firstChar); // == 0 unless s got trimmed
s.insert(i, u'\\');
+ } else {
+ auto match = numericListRe.match(s, 0, QRegularExpression::NormalMatch,
+ QRegularExpression::AnchorAtOffsetMatchOption);
+ if (match.hasMatch())
+ s.insert(match.capturedStart(1), qtmw_Backslash);
+ }
+}
+
+/*! \internal
+ Escape unescaped backslashes. Then escape any special character that stands
+ alone or prefixes a "word", including the \c < that starts an HTML tag.
+ https://spec.commonmark.org/0.31.2/#backslash-escapes
+*/
+static void escapeSpecialCharacters(QString &s)
+{
+ static const QRegularExpression backslashRe(uR"([^\\]\\)"_s);
+ static const QRegularExpression spaceRe(uR"(\s+)"_s);
+ static const QRegularExpression specialRe(uR"([<!*[`&]+[/\w])"_s);
+
+ int i = 0;
+ while (i >= 0) {
+ if (int j = s.indexOf(backslashRe, i); j >= 0) {
+ ++j; // we found some char before the backslash that needs escaping
+ if (s.size() == j + 1 || s.at(j + 1) != qtmw_Backslash)
+ s.insert(j, qtmw_Backslash);
+ i = j + 3;
+ }
+ if (int j = s.indexOf(specialRe, i); j >= 0 && (j == 0 || s.at(j - 1) != u'\\')) {
+ s.insert(j, qtmw_Backslash);
+ i = j + 3;
+ }
+ i = s.indexOf(spaceRe, i);
+ if (i >= 0)
+ ++i; // past the whitespace, if found
}
}
@@ -491,6 +535,10 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign
QString fragmentText = frag.fragment().text();
while (fragmentText.endsWith(qtmw_Newline))
fragmentText.chop(1);
+ if (!(m_fencedCodeBlock || m_indentedCodeBlock)) {
+ escapeSpecialCharacters(fragmentText);
+ maybeEscapeFirstChar(fragmentText);
+ }
if (block.textList()) { // <li>first line</br>continuation</li>
QString newlineIndent =
QString(qtmw_Newline) + QString(m_wrappedLineIndent, qtmw_Space);
diff --git a/tests/auto/gui/text/qtextmarkdownimporter/tst_qtextmarkdownimporter.cpp b/tests/auto/gui/text/qtextmarkdownimporter/tst_qtextmarkdownimporter.cpp
index 7b26a27110..0c889947a8 100644
--- a/tests/auto/gui/text/qtextmarkdownimporter/tst_qtextmarkdownimporter.cpp
+++ b/tests/auto/gui/text/qtextmarkdownimporter/tst_qtextmarkdownimporter.cpp
@@ -214,7 +214,7 @@ void tst_QTextMarkdownImporter::lists_data()
QTest::newRow("hyphen space newline") << "- \n" << 0 << 1 << 1 << true << "- \n";
QTest::newRow("hyphen space letter newline") << "- a\n" << 0 << 1 << 1 << false << "- a\n";
QTest::newRow("hyphen nbsp newline") <<
- QString::fromUtf8("-\u00A0\n") << 0 << 1 << 0 << true << "-\u00A0\n\n";
+ QString::fromUtf8("-\u00A0\n") << 0 << 1 << 0 << true << "\\-\u00A0\n\n";
QTest::newRow("nested empty lists") << "*\n *\n *\n" << 0 << 1 << 1 << true << " * \n";
QTest::newRow("list nested in empty list") << "-\n * a\n" << 0 << 1 << 2 << false << "- \n * a\n";
QTest::newRow("lists nested in empty lists")
diff --git a/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp b/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp
index ea99c87840..182aa920b9 100644
--- a/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp
+++ b/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp
@@ -44,6 +44,8 @@ private slots:
void rewriteDocument();
void fromHtml_data();
void fromHtml();
+ void escapeSpecialCharacters_data();
+ void escapeSpecialCharacters();
private:
bool isMainFontFixed();
@@ -807,16 +809,21 @@ void tst_QTextMarkdownWriter::fromHtml_data()
QTest::newRow("code") <<
"<pre class=\"language-pseudocode\">\n#include \"foo.h\"\n\nblock {\n statement();\n}\n\n</pre>" <<
"```pseudocode\n#include \"foo.h\"\n\nblock {\n statement();\n}\n\n```\n\n";
- // TODO
-// QTest::newRow("escaped number and paren after double newline") <<
-// "<p>(The first sentence of this paragraph is a line, the next paragraph has a number</p>13) but that's not part of an ordered list" <<
-// "(The first sentence of this paragraph is a line, the next paragraph has a number\n\n13\\) but that's not part of an ordered list\n\n";
+ QTest::newRow("escaped number and paren after single newline") <<
+ "<p>(The first sentence of this paragraph is a line, next paragraph has a number 13) but that's not part of an ordered list</p>" <<
+ "(The first sentence of this paragraph is a line, next paragraph has a number\n13\\) but that's not part of an ordered list\n\n";
+ QTest::newRow("escaped number and paren after double newline") <<
+ "<p>(The first sentence of this paragraph is a line, the next paragraph has a number</p>13) but that's not part of an ordered list" <<
+ "(The first sentence of this paragraph is a line, the next paragraph has a number\n\n13\\) but that's not part of an ordered list\n\n";
QTest::newRow("preformats with embedded backticks") <<
"<pre>none `one` ``two``</pre>plain<pre>```three``` ````four````</pre>plain" <<
"```\nnone `one` ``two``\n\n```\nplain\n\n```\n```three``` ````four````\n\n```\nplain\n\n";
QTest::newRow("list items with and without checkboxes") <<
"<ul><li>bullet</li><li class=\"unchecked\">unchecked item</li><li class=\"checked\">checked item</li></ul>" <<
"- bullet\n- [ ] unchecked item\n- [x] checked item\n";
+ QTest::newRow("table with backslash in cell") << // QTBUG-96051
+ "<table><tr><td>1011011 [</td><td>1011100 backslash \\</td></tr></table>" <<
+ "|1011011 [|1011100 backslash \\\\|";
}
void tst_QTextMarkdownWriter::fromHtml()
@@ -843,5 +850,56 @@ void tst_QTextMarkdownWriter::fromHtml()
QCOMPARE(output, expectedOutput);
}
+void tst_QTextMarkdownWriter::escapeSpecialCharacters_data()
+{
+ QTest::addColumn<QString>("input");
+ QTest::addColumn<QString>("expectedOutput");
+
+ QTest::newRow("backslash") << "foo \\ bar \\\\ baz \\" << "foo \\\\ bar \\\\ baz \\\\";
+ QTest::newRow("not emphasized") << "*normal* **normal too**" << "\\*normal* \\**normal too**";
+ QTest::newRow("not code") << "`normal` `normal too`" << "\\`normal` \\`normal too`";
+ QTest::newRow("code fence") << "```not a fence; ``` no risk here; ```not a fence" // TODO slightly inconsistent
+ << "\\```not a fence; ``` no risk here; \\```not a fence";
+ QTest::newRow("not html") << "<p>not a tag: <br/> nope</p>" << "\\<p>not a tag: \\<br/> nope\\</p>";
+ QTest::newRow("not a link") << "text [not a link](/foo)" << "text \\[not a link](/foo)";
+ QTest::newRow("not a circle") << "* polaris" << "\\* polaris";
+ QTest::newRow("not a square") << "+ groovy" << "\\+ groovy";
+ QTest::newRow("not a bullet") << "- stayin alive" << "\\- stayin alive";
+ QTest::newRow("arithmetic") << "1 + 2 - 3 * 4" << "1 + 2 - 3 * 4";
+ QTest::newRow("not a list") << "1. not a list" << "1\\. not a list";
+ QTest::newRow("not a list either") << "Jupiter and 10." << "Jupiter and 10.";
+ QTest::newRow("not a heading") << "# not a heading" << "\\# not a heading";
+ QTest::newRow("a non-entity") << "&ouml; not a character entity" << "\\&ouml; not a character entity";
+}
+
+/*! \internal
+ If the user types into a Qt-based editor plain text that the
+ markdown parser would misinterpret, escape it when we save to markdown
+ to clarify that it's plain text.
+ https://spec.commonmark.org/0.31.2/#backslash-escapes
+*/
+void tst_QTextMarkdownWriter::escapeSpecialCharacters() // QTBUG-96051, QTBUG-122083
+{
+ QFETCH(QString, input);
+ QFETCH(QString, expectedOutput);
+
+ document->setPlainText(input);
+ QString output = documentToUnixMarkdown();
+
+#ifdef DEBUG_WRITE_OUTPUT
+ {
+ QFile out("/tmp/" + QLatin1String(QTest::currentDataTag()) + ".md");
+ out.open(QFile::WriteOnly);
+ out.write(output.toUtf8());
+ out.close();
+ }
+#endif
+
+ output = output.trimmed();
+ if (output != expectedOutput && (isMainFontFixed() || isFixedFontProportional()))
+ QEXPECT_FAIL("", "fixed main font or proportional fixed font (QTBUG-103484)", Continue);
+ QCOMPARE(output, expectedOutput);
+}
+
QTEST_MAIN(tst_QTextMarkdownWriter)
#include "tst_qtextmarkdownwriter.moc"