diff options
4 files changed, 115 insertions, 8 deletions
diff --git a/src/gui/configure.cmake b/src/gui/configure.cmake index 51b4bc515a..4affd7b30a 100644 --- a/src/gui/configure.cmake +++ b/src/gui/configure.cmake @@ -1012,6 +1012,7 @@ qt_feature("system-textmarkdownreader" PUBLIC qt_feature("textmarkdownwriter" PUBLIC SECTION "Kernel" LABEL "MarkdownWriter" + CONDITION QT_FEATURE_regularexpression PURPOSE "Provides a Markdown (CommonMark and GitHub) writer" ) qt_feature("textodfwriter" PUBLIC diff --git a/src/gui/text/qtextmarkdownwriter.cpp b/src/gui/text/qtextmarkdownwriter.cpp index ad4859f398..3c05fed6a6 100644 --- a/src/gui/text/qtextmarkdownwriter.cpp +++ b/src/gui/text/qtextmarkdownwriter.cpp @@ -11,6 +11,7 @@ #include "qtextcursor.h" #include "qtextimagehandler_p.h" #include "qloggingcategory.h" +#include <QtCore/QRegularExpression> #if QT_CONFIG(itemmodel) #include "qabstractitemmodel.h" #endif @@ -273,15 +274,58 @@ static int adjacentBackticksCount(const QString &s) return ret; } +/*! \internal + Escape anything at the beginning of a line of markdown that would be + misinterpreted by a markdown parser, including any period that follows a + number (to avoid misinterpretation as a numbered list item). + https://spec.commonmark.org/0.31.2/#backslash-escapes +*/ static void maybeEscapeFirstChar(QString &s) { + static const QRegularExpression numericListRe(uR"(\d+([\.)])\s)"_s); + static const QLatin1StringView specialFirstCharacters("#*+-"); + QString sTrimmed = s.trimmed(); if (sTrimmed.isEmpty()) return; - char firstChar = sTrimmed.at(0).toLatin1(); - if (firstChar == '*' || firstChar == '+' || firstChar == '-') { - int i = s.indexOf(QLatin1Char(firstChar)); + QChar firstChar = sTrimmed.at(0); + if (specialFirstCharacters.contains(firstChar)) { + int i = s.indexOf(firstChar); // == 0 unless s got trimmed s.insert(i, u'\\'); + } else { + auto match = numericListRe.match(s, 0, QRegularExpression::NormalMatch, + QRegularExpression::AnchorAtOffsetMatchOption); + if (match.hasMatch()) + s.insert(match.capturedStart(1), qtmw_Backslash); + } +} + +/*! \internal + Escape unescaped backslashes. Then escape any special character that stands + alone or prefixes a "word", including the \c < that starts an HTML tag. + https://spec.commonmark.org/0.31.2/#backslash-escapes +*/ +static void escapeSpecialCharacters(QString &s) +{ + static const QRegularExpression backslashRe(uR"([^\\]\\)"_s); + static const QRegularExpression spaceRe(uR"(\s+)"_s); + static const QRegularExpression specialRe(uR"([<!*[`&]+[/\w])"_s); + + int i = 0; + while (i >= 0) { + if (int j = s.indexOf(backslashRe, i); j >= 0) { + ++j; // we found some char before the backslash that needs escaping + if (s.size() == j + 1 || s.at(j + 1) != qtmw_Backslash) + s.insert(j, qtmw_Backslash); + i = j + 3; + } + if (int j = s.indexOf(specialRe, i); j >= 0 && (j == 0 || s.at(j - 1) != u'\\')) { + s.insert(j, qtmw_Backslash); + i = j + 3; + } + i = s.indexOf(spaceRe, i); + if (i >= 0) + ++i; // past the whitespace, if found } } @@ -491,6 +535,10 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign QString fragmentText = frag.fragment().text(); while (fragmentText.endsWith(qtmw_Newline)) fragmentText.chop(1); + if (!(m_fencedCodeBlock || m_indentedCodeBlock)) { + escapeSpecialCharacters(fragmentText); + maybeEscapeFirstChar(fragmentText); + } if (block.textList()) { // <li>first line</br>continuation</li> QString newlineIndent = QString(qtmw_Newline) + QString(m_wrappedLineIndent, qtmw_Space); diff --git a/tests/auto/gui/text/qtextmarkdownimporter/tst_qtextmarkdownimporter.cpp b/tests/auto/gui/text/qtextmarkdownimporter/tst_qtextmarkdownimporter.cpp index 7b26a27110..0c889947a8 100644 --- a/tests/auto/gui/text/qtextmarkdownimporter/tst_qtextmarkdownimporter.cpp +++ b/tests/auto/gui/text/qtextmarkdownimporter/tst_qtextmarkdownimporter.cpp @@ -214,7 +214,7 @@ void tst_QTextMarkdownImporter::lists_data() QTest::newRow("hyphen space newline") << "- \n" << 0 << 1 << 1 << true << "- \n"; QTest::newRow("hyphen space letter newline") << "- a\n" << 0 << 1 << 1 << false << "- a\n"; QTest::newRow("hyphen nbsp newline") << - QString::fromUtf8("-\u00A0\n") << 0 << 1 << 0 << true << "-\u00A0\n\n"; + QString::fromUtf8("-\u00A0\n") << 0 << 1 << 0 << true << "\\-\u00A0\n\n"; QTest::newRow("nested empty lists") << "*\n *\n *\n" << 0 << 1 << 1 << true << " * \n"; QTest::newRow("list nested in empty list") << "-\n * a\n" << 0 << 1 << 2 << false << "- \n * a\n"; QTest::newRow("lists nested in empty lists") diff --git a/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp b/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp index ea99c87840..182aa920b9 100644 --- a/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp +++ b/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp @@ -44,6 +44,8 @@ private slots: void rewriteDocument(); void fromHtml_data(); void fromHtml(); + void escapeSpecialCharacters_data(); + void escapeSpecialCharacters(); private: bool isMainFontFixed(); @@ -807,16 +809,21 @@ void tst_QTextMarkdownWriter::fromHtml_data() QTest::newRow("code") << "<pre class=\"language-pseudocode\">\n#include \"foo.h\"\n\nblock {\n statement();\n}\n\n</pre>" << "```pseudocode\n#include \"foo.h\"\n\nblock {\n statement();\n}\n\n```\n\n"; - // TODO -// QTest::newRow("escaped number and paren after double newline") << -// "<p>(The first sentence of this paragraph is a line, the next paragraph has a number</p>13) but that's not part of an ordered list" << -// "(The first sentence of this paragraph is a line, the next paragraph has a number\n\n13\\) but that's not part of an ordered list\n\n"; + QTest::newRow("escaped number and paren after single newline") << + "<p>(The first sentence of this paragraph is a line, next paragraph has a number 13) but that's not part of an ordered list</p>" << + "(The first sentence of this paragraph is a line, next paragraph has a number\n13\\) but that's not part of an ordered list\n\n"; + QTest::newRow("escaped number and paren after double newline") << + "<p>(The first sentence of this paragraph is a line, the next paragraph has a number</p>13) but that's not part of an ordered list" << + "(The first sentence of this paragraph is a line, the next paragraph has a number\n\n13\\) but that's not part of an ordered list\n\n"; QTest::newRow("preformats with embedded backticks") << "<pre>none `one` ``two``</pre>plain<pre>```three``` ````four````</pre>plain" << "```\nnone `one` ``two``\n\n```\nplain\n\n```\n```three``` ````four````\n\n```\nplain\n\n"; QTest::newRow("list items with and without checkboxes") << "<ul><li>bullet</li><li class=\"unchecked\">unchecked item</li><li class=\"checked\">checked item</li></ul>" << "- bullet\n- [ ] unchecked item\n- [x] checked item\n"; + QTest::newRow("table with backslash in cell") << // QTBUG-96051 + "<table><tr><td>1011011 [</td><td>1011100 backslash \\</td></tr></table>" << + "|1011011 [|1011100 backslash \\\\|"; } void tst_QTextMarkdownWriter::fromHtml() @@ -843,5 +850,56 @@ void tst_QTextMarkdownWriter::fromHtml() QCOMPARE(output, expectedOutput); } +void tst_QTextMarkdownWriter::escapeSpecialCharacters_data() +{ + QTest::addColumn<QString>("input"); + QTest::addColumn<QString>("expectedOutput"); + + QTest::newRow("backslash") << "foo \\ bar \\\\ baz \\" << "foo \\\\ bar \\\\ baz \\\\"; + QTest::newRow("not emphasized") << "*normal* **normal too**" << "\\*normal* \\**normal too**"; + QTest::newRow("not code") << "`normal` `normal too`" << "\\`normal` \\`normal too`"; + QTest::newRow("code fence") << "```not a fence; ``` no risk here; ```not a fence" // TODO slightly inconsistent + << "\\```not a fence; ``` no risk here; \\```not a fence"; + QTest::newRow("not html") << "<p>not a tag: <br/> nope</p>" << "\\<p>not a tag: \\<br/> nope\\</p>"; + QTest::newRow("not a link") << "text [not a link](/foo)" << "text \\[not a link](/foo)"; + QTest::newRow("not a circle") << "* polaris" << "\\* polaris"; + QTest::newRow("not a square") << "+ groovy" << "\\+ groovy"; + QTest::newRow("not a bullet") << "- stayin alive" << "\\- stayin alive"; + QTest::newRow("arithmetic") << "1 + 2 - 3 * 4" << "1 + 2 - 3 * 4"; + QTest::newRow("not a list") << "1. not a list" << "1\\. not a list"; + QTest::newRow("not a list either") << "Jupiter and 10." << "Jupiter and 10."; + QTest::newRow("not a heading") << "# not a heading" << "\\# not a heading"; + QTest::newRow("a non-entity") << "ö not a character entity" << "\\ö not a character entity"; +} + +/*! \internal + If the user types into a Qt-based editor plain text that the + markdown parser would misinterpret, escape it when we save to markdown + to clarify that it's plain text. + https://spec.commonmark.org/0.31.2/#backslash-escapes +*/ +void tst_QTextMarkdownWriter::escapeSpecialCharacters() // QTBUG-96051, QTBUG-122083 +{ + QFETCH(QString, input); + QFETCH(QString, expectedOutput); + + document->setPlainText(input); + QString output = documentToUnixMarkdown(); + +#ifdef DEBUG_WRITE_OUTPUT + { + QFile out("/tmp/" + QLatin1String(QTest::currentDataTag()) + ".md"); + out.open(QFile::WriteOnly); + out.write(output.toUtf8()); + out.close(); + } +#endif + + output = output.trimmed(); + if (output != expectedOutput && (isMainFontFixed() || isFixedFontProportional())) + QEXPECT_FAIL("", "fixed main font or proportional fixed font (QTBUG-103484)", Continue); + QCOMPARE(output, expectedOutput); +} + QTEST_MAIN(tst_QTextMarkdownWriter) #include "tst_qtextmarkdownwriter.moc" |