summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gui/configure.cmake1
-rw-r--r--src/gui/text/qtextmarkdownwriter.cpp54
-rw-r--r--tests/auto/gui/text/qtextmarkdownimporter/tst_qtextmarkdownimporter.cpp2
-rw-r--r--tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp66
4 files changed, 115 insertions, 8 deletions
diff --git a/src/gui/configure.cmake b/src/gui/configure.cmake
index 51b4bc515a..4affd7b30a 100644
--- a/src/gui/configure.cmake
+++ b/src/gui/configure.cmake
@@ -1012,6 +1012,7 @@ qt_feature("system-textmarkdownreader" PUBLIC
qt_feature("textmarkdownwriter" PUBLIC
SECTION "Kernel"
LABEL "MarkdownWriter"
+ CONDITION QT_FEATURE_regularexpression
PURPOSE "Provides a Markdown (CommonMark and GitHub) writer"
)
qt_feature("textodfwriter" PUBLIC
diff --git a/src/gui/text/qtextmarkdownwriter.cpp b/src/gui/text/qtextmarkdownwriter.cpp
index ad4859f398..3c05fed6a6 100644
--- a/src/gui/text/qtextmarkdownwriter.cpp
+++ b/src/gui/text/qtextmarkdownwriter.cpp
@@ -11,6 +11,7 @@
#include "qtextcursor.h"
#include "qtextimagehandler_p.h"
#include "qloggingcategory.h"
+#include <QtCore/QRegularExpression>
#if QT_CONFIG(itemmodel)
#include "qabstractitemmodel.h"
#endif
@@ -273,15 +274,58 @@ static int adjacentBackticksCount(const QString &s)
return ret;
}
+/*! \internal
+ Escape anything at the beginning of a line of markdown that would be
+ misinterpreted by a markdown parser, including any period that follows a
+ number (to avoid misinterpretation as a numbered list item).
+ https://spec.commonmark.org/0.31.2/#backslash-escapes
+*/
static void maybeEscapeFirstChar(QString &s)
{
+ static const QRegularExpression numericListRe(uR"(\d+([\.)])\s)"_s);
+ static const QLatin1StringView specialFirstCharacters("#*+-");
+
QString sTrimmed = s.trimmed();
if (sTrimmed.isEmpty())
return;
- char firstChar = sTrimmed.at(0).toLatin1();
- if (firstChar == '*' || firstChar == '+' || firstChar == '-') {
- int i = s.indexOf(QLatin1Char(firstChar));
+ QChar firstChar = sTrimmed.at(0);
+ if (specialFirstCharacters.contains(firstChar)) {
+ int i = s.indexOf(firstChar); // == 0 unless s got trimmed
s.insert(i, u'\\');
+ } else {
+ auto match = numericListRe.match(s, 0, QRegularExpression::NormalMatch,
+ QRegularExpression::AnchorAtOffsetMatchOption);
+ if (match.hasMatch())
+ s.insert(match.capturedStart(1), qtmw_Backslash);
+ }
+}
+
+/*! \internal
+ Escape unescaped backslashes. Then escape any special character that stands
+ alone or prefixes a "word", including the \c < that starts an HTML tag.
+ https://spec.commonmark.org/0.31.2/#backslash-escapes
+*/
+static void escapeSpecialCharacters(QString &s)
+{
+ static const QRegularExpression backslashRe(uR"([^\\]\\)"_s);
+ static const QRegularExpression spaceRe(uR"(\s+)"_s);
+ static const QRegularExpression specialRe(uR"([<!*[`&]+[/\w])"_s);
+
+ int i = 0;
+ while (i >= 0) {
+ if (int j = s.indexOf(backslashRe, i); j >= 0) {
+ ++j; // we found some char before the backslash that needs escaping
+ if (s.size() == j + 1 || s.at(j + 1) != qtmw_Backslash)
+ s.insert(j, qtmw_Backslash);
+ i = j + 3;
+ }
+ if (int j = s.indexOf(specialRe, i); j >= 0 && (j == 0 || s.at(j - 1) != u'\\')) {
+ s.insert(j, qtmw_Backslash);
+ i = j + 3;
+ }
+ i = s.indexOf(spaceRe, i);
+ if (i >= 0)
+ ++i; // past the whitespace, if found
}
}
@@ -491,6 +535,10 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign
QString fragmentText = frag.fragment().text();
while (fragmentText.endsWith(qtmw_Newline))
fragmentText.chop(1);
+ if (!(m_fencedCodeBlock || m_indentedCodeBlock)) {
+ escapeSpecialCharacters(fragmentText);
+ maybeEscapeFirstChar(fragmentText);
+ }
if (block.textList()) { // <li>first line</br>continuation</li>
QString newlineIndent =
QString(qtmw_Newline) + QString(m_wrappedLineIndent, qtmw_Space);
diff --git a/tests/auto/gui/text/qtextmarkdownimporter/tst_qtextmarkdownimporter.cpp b/tests/auto/gui/text/qtextmarkdownimporter/tst_qtextmarkdownimporter.cpp
index 7b26a27110..0c889947a8 100644
--- a/tests/auto/gui/text/qtextmarkdownimporter/tst_qtextmarkdownimporter.cpp
+++ b/tests/auto/gui/text/qtextmarkdownimporter/tst_qtextmarkdownimporter.cpp
@@ -214,7 +214,7 @@ void tst_QTextMarkdownImporter::lists_data()
QTest::newRow("hyphen space newline") << "- \n" << 0 << 1 << 1 << true << "- \n";
QTest::newRow("hyphen space letter newline") << "- a\n" << 0 << 1 << 1 << false << "- a\n";
QTest::newRow("hyphen nbsp newline") <<
- QString::fromUtf8("-\u00A0\n") << 0 << 1 << 0 << true << "-\u00A0\n\n";
+ QString::fromUtf8("-\u00A0\n") << 0 << 1 << 0 << true << "\\-\u00A0\n\n";
QTest::newRow("nested empty lists") << "*\n *\n *\n" << 0 << 1 << 1 << true << " * \n";
QTest::newRow("list nested in empty list") << "-\n * a\n" << 0 << 1 << 2 << false << "- \n * a\n";
QTest::newRow("lists nested in empty lists")
diff --git a/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp b/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp
index ea99c87840..182aa920b9 100644
--- a/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp
+++ b/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp
@@ -44,6 +44,8 @@ private slots:
void rewriteDocument();
void fromHtml_data();
void fromHtml();
+ void escapeSpecialCharacters_data();
+ void escapeSpecialCharacters();
private:
bool isMainFontFixed();
@@ -807,16 +809,21 @@ void tst_QTextMarkdownWriter::fromHtml_data()
QTest::newRow("code") <<
"<pre class=\"language-pseudocode\">\n#include \"foo.h\"\n\nblock {\n statement();\n}\n\n</pre>" <<
"```pseudocode\n#include \"foo.h\"\n\nblock {\n statement();\n}\n\n```\n\n";
- // TODO
-// QTest::newRow("escaped number and paren after double newline") <<
-// "<p>(The first sentence of this paragraph is a line, the next paragraph has a number</p>13) but that's not part of an ordered list" <<
-// "(The first sentence of this paragraph is a line, the next paragraph has a number\n\n13\\) but that's not part of an ordered list\n\n";
+ QTest::newRow("escaped number and paren after single newline") <<
+ "<p>(The first sentence of this paragraph is a line, next paragraph has a number 13) but that's not part of an ordered list</p>" <<
+ "(The first sentence of this paragraph is a line, next paragraph has a number\n13\\) but that's not part of an ordered list\n\n";
+ QTest::newRow("escaped number and paren after double newline") <<
+ "<p>(The first sentence of this paragraph is a line, the next paragraph has a number</p>13) but that's not part of an ordered list" <<
+ "(The first sentence of this paragraph is a line, the next paragraph has a number\n\n13\\) but that's not part of an ordered list\n\n";
QTest::newRow("preformats with embedded backticks") <<
"<pre>none `one` ``two``</pre>plain<pre>```three``` ````four````</pre>plain" <<
"```\nnone `one` ``two``\n\n```\nplain\n\n```\n```three``` ````four````\n\n```\nplain\n\n";
QTest::newRow("list items with and without checkboxes") <<
"<ul><li>bullet</li><li class=\"unchecked\">unchecked item</li><li class=\"checked\">checked item</li></ul>" <<
"- bullet\n- [ ] unchecked item\n- [x] checked item\n";
+ QTest::newRow("table with backslash in cell") << // QTBUG-96051
+ "<table><tr><td>1011011 [</td><td>1011100 backslash \\</td></tr></table>" <<
+ "|1011011 [|1011100 backslash \\\\|";
}
void tst_QTextMarkdownWriter::fromHtml()
@@ -843,5 +850,56 @@ void tst_QTextMarkdownWriter::fromHtml()
QCOMPARE(output, expectedOutput);
}
+void tst_QTextMarkdownWriter::escapeSpecialCharacters_data()
+{
+ QTest::addColumn<QString>("input");
+ QTest::addColumn<QString>("expectedOutput");
+
+ QTest::newRow("backslash") << "foo \\ bar \\\\ baz \\" << "foo \\\\ bar \\\\ baz \\\\";
+ QTest::newRow("not emphasized") << "*normal* **normal too**" << "\\*normal* \\**normal too**";
+ QTest::newRow("not code") << "`normal` `normal too`" << "\\`normal` \\`normal too`";
+ QTest::newRow("code fence") << "```not a fence; ``` no risk here; ```not a fence" // TODO slightly inconsistent
+ << "\\```not a fence; ``` no risk here; \\```not a fence";
+ QTest::newRow("not html") << "<p>not a tag: <br/> nope</p>" << "\\<p>not a tag: \\<br/> nope\\</p>";
+ QTest::newRow("not a link") << "text [not a link](/foo)" << "text \\[not a link](/foo)";
+ QTest::newRow("not a circle") << "* polaris" << "\\* polaris";
+ QTest::newRow("not a square") << "+ groovy" << "\\+ groovy";
+ QTest::newRow("not a bullet") << "- stayin alive" << "\\- stayin alive";
+ QTest::newRow("arithmetic") << "1 + 2 - 3 * 4" << "1 + 2 - 3 * 4";
+ QTest::newRow("not a list") << "1. not a list" << "1\\. not a list";
+ QTest::newRow("not a list either") << "Jupiter and 10." << "Jupiter and 10.";
+ QTest::newRow("not a heading") << "# not a heading" << "\\# not a heading";
+ QTest::newRow("a non-entity") << "&ouml; not a character entity" << "\\&ouml; not a character entity";
+}
+
+/*! \internal
+ If the user types into a Qt-based editor plain text that the
+ markdown parser would misinterpret, escape it when we save to markdown
+ to clarify that it's plain text.
+ https://spec.commonmark.org/0.31.2/#backslash-escapes
+*/
+void tst_QTextMarkdownWriter::escapeSpecialCharacters() // QTBUG-96051, QTBUG-122083
+{
+ QFETCH(QString, input);
+ QFETCH(QString, expectedOutput);
+
+ document->setPlainText(input);
+ QString output = documentToUnixMarkdown();
+
+#ifdef DEBUG_WRITE_OUTPUT
+ {
+ QFile out("/tmp/" + QLatin1String(QTest::currentDataTag()) + ".md");
+ out.open(QFile::WriteOnly);
+ out.write(output.toUtf8());
+ out.close();
+ }
+#endif
+
+ output = output.trimmed();
+ if (output != expectedOutput && (isMainFontFixed() || isFixedFontProportional()))
+ QEXPECT_FAIL("", "fixed main font or proportional fixed font (QTBUG-103484)", Continue);
+ QCOMPARE(output, expectedOutput);
+}
+
QTEST_MAIN(tst_QTextMarkdownWriter)
#include "tst_qtextmarkdownwriter.moc"