diff options
author | Shawn Rutledge <shawn.rutledge@qt.io> | 2024-03-01 00:39:50 -0700 |
---|---|---|
committer | Shawn Rutledge <shawn.rutledge@qt.io> | 2024-03-04 21:03:51 -0700 |
commit | ca4774131b9b8ee40b4d7f5c1ba296af4700207f (patch) | |
tree | 4478f2fcd8fea0a240bbb26b2c6ac7ec88e039a5 /src/gui/text/qtextmarkdownwriter.cpp | |
parent | 5670d5f7e1ed65b6d6158b73edc43111d7b8ca53 (diff) |
QTextMarkdownWriter: escape special characters (line or word prefix)
Try to avoid writing anything that the parser would misinterpret.
Escape pre-existing backslashes, but not those that are already escaped.
Optimize maybeEscapeFirstChar() slightly and apply it to every line
of output (except in code blocks), not only to new lines created by
word-wrapping.
Since it would be hard to do this without using regular expressions,
the markdown writer feature now depends on the regex feature.
Fixes: QTBUG-96051
Fixes: QTBUG-122083
Pick-to: 6.7
Change-Id: I8d95366501fd31441829081c668f11a3a3a23fe2
Reviewed-by: Axel Spoerl <axel.spoerl@qt.io>
Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org>
Diffstat (limited to 'src/gui/text/qtextmarkdownwriter.cpp')
-rw-r--r-- | src/gui/text/qtextmarkdownwriter.cpp | 54 |
1 files changed, 51 insertions, 3 deletions
diff --git a/src/gui/text/qtextmarkdownwriter.cpp b/src/gui/text/qtextmarkdownwriter.cpp index 64dd88d82c..5ab733df00 100644 --- a/src/gui/text/qtextmarkdownwriter.cpp +++ b/src/gui/text/qtextmarkdownwriter.cpp @@ -12,6 +12,7 @@ #include "qtextimagehandler_p.h" #include "qtextmarkdownimporter_p.h" #include "qloggingcategory.h" +#include <QtCore/QRegularExpression> #if QT_CONFIG(itemmodel) #include "qabstractitemmodel.h" #endif @@ -286,15 +287,58 @@ static int adjacentBackticksCount(const QString &s) return ret; } +/*! \internal + Escape anything at the beginning of a line of markdown that would be + misinterpreted by a markdown parser, including any period that follows a + number (to avoid misinterpretation as a numbered list item). + https://spec.commonmark.org/0.31.2/#backslash-escapes +*/ static void maybeEscapeFirstChar(QString &s) { + static const QRegularExpression numericListRe(uR"(\d+([\.)])\s)"_s); + static const QLatin1StringView specialFirstCharacters("#*+-"); + QString sTrimmed = s.trimmed(); if (sTrimmed.isEmpty()) return; - char firstChar = sTrimmed.at(0).toLatin1(); - if (firstChar == '*' || firstChar == '+' || firstChar == '-') { - int i = s.indexOf(QLatin1Char(firstChar)); + QChar firstChar = sTrimmed.at(0); + if (specialFirstCharacters.contains(firstChar)) { + int i = s.indexOf(firstChar); // == 0 unless s got trimmed s.insert(i, u'\\'); + } else { + auto match = numericListRe.match(s, 0, QRegularExpression::NormalMatch, + QRegularExpression::AnchorAtOffsetMatchOption); + if (match.hasMatch()) + s.insert(match.capturedStart(1), qtmw_Backslash); + } +} + +/*! \internal + Escape unescaped backslashes. Then escape any special character that stands + alone or prefixes a "word", including the \c < that starts an HTML tag. + https://spec.commonmark.org/0.31.2/#backslash-escapes +*/ +static void escapeSpecialCharacters(QString &s) +{ + static const QRegularExpression backslashRe(uR"([^\\]\\)"_s); + static const QRegularExpression spaceRe(uR"(\s+)"_s); + static const QRegularExpression specialRe(uR"([<!*[`&]+[/\w])"_s); + + int i = 0; + while (i >= 0) { + if (int j = s.indexOf(backslashRe, i); j >= 0) { + ++j; // we found some char before the backslash that needs escaping + if (s.size() == j + 1 || s.at(j + 1) != qtmw_Backslash) + s.insert(j, qtmw_Backslash); + i = j + 3; + } + if (int j = s.indexOf(specialRe, i); j >= 0 && (j == 0 || s.at(j - 1) != u'\\')) { + s.insert(j, qtmw_Backslash); + i = j + 3; + } + i = s.indexOf(spaceRe, i); + if (i >= 0) + ++i; // past the whitespace, if found } } @@ -504,6 +548,10 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign QString fragmentText = frag.fragment().text(); while (fragmentText.endsWith(qtmw_Newline)) fragmentText.chop(1); + if (!(m_fencedCodeBlock || m_indentedCodeBlock)) { + escapeSpecialCharacters(fragmentText); + maybeEscapeFirstChar(fragmentText); + } if (block.textList()) { // <li>first line</br>continuation</li> QString newlineIndent = QString(qtmw_Newline) + QString(m_wrappedLineIndent, qtmw_Space); |