summaryrefslogtreecommitdiffstats
path: root/src/gui/text/qtextmarkdownwriter.cpp
diff options
context:
space:
mode:
authorShawn Rutledge <shawn.rutledge@qt.io>2024-03-01 00:39:50 -0700
committerShawn Rutledge <shawn.rutledge@qt.io>2024-03-04 21:03:51 -0700
commitca4774131b9b8ee40b4d7f5c1ba296af4700207f (patch)
tree4478f2fcd8fea0a240bbb26b2c6ac7ec88e039a5 /src/gui/text/qtextmarkdownwriter.cpp
parent5670d5f7e1ed65b6d6158b73edc43111d7b8ca53 (diff)
QTextMarkdownWriter: escape special characters (line or word prefix)
Try to avoid writing anything that the parser would misinterpret. Escape pre-existing backslashes, but not those that are already escaped. Optimize maybeEscapeFirstChar() slightly and apply it to every line of output (except in code blocks), not only to new lines created by word-wrapping. Since it would be hard to do this without using regular expressions, the markdown writer feature now depends on the regex feature. Fixes: QTBUG-96051 Fixes: QTBUG-122083 Pick-to: 6.7 Change-Id: I8d95366501fd31441829081c668f11a3a3a23fe2 Reviewed-by: Axel Spoerl <axel.spoerl@qt.io> Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org>
Diffstat (limited to 'src/gui/text/qtextmarkdownwriter.cpp')
-rw-r--r--src/gui/text/qtextmarkdownwriter.cpp54
1 files changed, 51 insertions, 3 deletions
diff --git a/src/gui/text/qtextmarkdownwriter.cpp b/src/gui/text/qtextmarkdownwriter.cpp
index 64dd88d82c..5ab733df00 100644
--- a/src/gui/text/qtextmarkdownwriter.cpp
+++ b/src/gui/text/qtextmarkdownwriter.cpp
@@ -12,6 +12,7 @@
#include "qtextimagehandler_p.h"
#include "qtextmarkdownimporter_p.h"
#include "qloggingcategory.h"
+#include <QtCore/QRegularExpression>
#if QT_CONFIG(itemmodel)
#include "qabstractitemmodel.h"
#endif
@@ -286,15 +287,58 @@ static int adjacentBackticksCount(const QString &s)
return ret;
}
+/*! \internal
+ Escape anything at the beginning of a line of markdown that would be
+ misinterpreted by a markdown parser, including any period that follows a
+ number (to avoid misinterpretation as a numbered list item).
+ https://spec.commonmark.org/0.31.2/#backslash-escapes
+*/
static void maybeEscapeFirstChar(QString &s)
{
+ static const QRegularExpression numericListRe(uR"(\d+([\.)])\s)"_s);
+ static const QLatin1StringView specialFirstCharacters("#*+-");
+
QString sTrimmed = s.trimmed();
if (sTrimmed.isEmpty())
return;
- char firstChar = sTrimmed.at(0).toLatin1();
- if (firstChar == '*' || firstChar == '+' || firstChar == '-') {
- int i = s.indexOf(QLatin1Char(firstChar));
+ QChar firstChar = sTrimmed.at(0);
+ if (specialFirstCharacters.contains(firstChar)) {
+ int i = s.indexOf(firstChar); // == 0 unless s got trimmed
s.insert(i, u'\\');
+ } else {
+ auto match = numericListRe.match(s, 0, QRegularExpression::NormalMatch,
+ QRegularExpression::AnchorAtOffsetMatchOption);
+ if (match.hasMatch())
+ s.insert(match.capturedStart(1), qtmw_Backslash);
+ }
+}
+
+/*! \internal
+ Escape unescaped backslashes. Then escape any special character that stands
+ alone or prefixes a "word", including the \c < that starts an HTML tag.
+ https://spec.commonmark.org/0.31.2/#backslash-escapes
+*/
+static void escapeSpecialCharacters(QString &s)
+{
+ static const QRegularExpression backslashRe(uR"([^\\]\\)"_s);
+ static const QRegularExpression spaceRe(uR"(\s+)"_s);
+ static const QRegularExpression specialRe(uR"([<!*[`&]+[/\w])"_s);
+
+ int i = 0;
+ while (i >= 0) {
+ if (int j = s.indexOf(backslashRe, i); j >= 0) {
+ ++j; // we found some char before the backslash that needs escaping
+ if (s.size() == j + 1 || s.at(j + 1) != qtmw_Backslash)
+ s.insert(j, qtmw_Backslash);
+ i = j + 3;
+ }
+ if (int j = s.indexOf(specialRe, i); j >= 0 && (j == 0 || s.at(j - 1) != u'\\')) {
+ s.insert(j, qtmw_Backslash);
+ i = j + 3;
+ }
+ i = s.indexOf(spaceRe, i);
+ if (i >= 0)
+ ++i; // past the whitespace, if found
}
}
@@ -504,6 +548,10 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign
QString fragmentText = frag.fragment().text();
while (fragmentText.endsWith(qtmw_Newline))
fragmentText.chop(1);
+ if (!(m_fencedCodeBlock || m_indentedCodeBlock)) {
+ escapeSpecialCharacters(fragmentText);
+ maybeEscapeFirstChar(fragmentText);
+ }
if (block.textList()) { // <li>first line</br>continuation</li>
QString newlineIndent =
QString(qtmw_Newline) + QString(m_wrappedLineIndent, qtmw_Space);