diff options
Diffstat (limited to 'src/gui/text/qtextmarkdownwriter.cpp')
-rw-r--r-- | src/gui/text/qtextmarkdownwriter.cpp | 171 |
1 files changed, 143 insertions, 28 deletions
diff --git a/src/gui/text/qtextmarkdownwriter.cpp b/src/gui/text/qtextmarkdownwriter.cpp index f535ff8aa8..2e23dfcd94 100644 --- a/src/gui/text/qtextmarkdownwriter.cpp +++ b/src/gui/text/qtextmarkdownwriter.cpp @@ -10,7 +10,9 @@ #include "qtexttable.h" #include "qtextcursor.h" #include "qtextimagehandler_p.h" +#include "qtextmarkdownimporter_p.h" #include "qloggingcategory.h" +#include <QtCore/QRegularExpression> #if QT_CONFIG(itemmodel) #include "qabstractitemmodel.h" #endif @@ -19,7 +21,7 @@ QT_BEGIN_NAMESPACE using namespace Qt::StringLiterals; -Q_LOGGING_CATEGORY(lcMDW, "qt.text.markdown.writer") +Q_STATIC_LOGGING_CATEGORY(lcMDW, "qt.text.markdown.writer") static const QChar qtmw_Space = u' '; static const QChar qtmw_Tab = u'\t'; @@ -38,6 +40,7 @@ QTextMarkdownWriter::QTextMarkdownWriter(QTextStream &stream, QTextDocument::Mar bool QTextMarkdownWriter::writeAll(const QTextDocument *document) { + writeFrontMatter(document->metaInformation(QTextDocument::FrontMatter)); writeFrame(document->rootFrame()); return true; } @@ -76,6 +79,19 @@ void QTextMarkdownWriter::writeTable(const QAbstractItemModel *table) } #endif +void QTextMarkdownWriter::writeFrontMatter(const QString &fm) +{ + const bool featureEnabled = m_features.testFlag( + static_cast<QTextDocument::MarkdownFeature>(QTextMarkdownImporter::FeatureFrontMatter)); + qCDebug(lcMDW) << "writing FrontMatter?" << featureEnabled << "size" << fm.size(); + if (fm.isEmpty() || !featureEnabled) + return; + m_stream << "---\n"_L1 << fm; + if (!fm.endsWith(qtmw_Newline)) + m_stream << qtmw_Newline; + m_stream << "---\n"_L1; +} + void QTextMarkdownWriter::writeFrame(const QTextFrame *frame) { Q_ASSERT(frame); @@ -112,17 +128,22 @@ void QTextMarkdownWriter::writeFrame(const QTextFrame *frame) // suppress needless blank lines, when there will be a big change in block format bool nextIsDifferent = false; bool ending = false; + int blockQuoteIndent = 0; + int nextBlockQuoteIndent = 0; { QTextFrame::iterator next = iterator; ++next; + QTextBlockFormat format = iterator.currentBlock().blockFormat(); + QTextBlockFormat nextFormat = next.currentBlock().blockFormat(); + blockQuoteIndent = format.intProperty(QTextFormat::BlockQuoteLevel); + nextBlockQuoteIndent = nextFormat.intProperty(QTextFormat::BlockQuoteLevel); if (next.atEnd()) { nextIsDifferent = true; ending = true; } else { - QTextBlockFormat format = iterator.currentBlock().blockFormat(); - QTextBlockFormat nextFormat = next.currentBlock().blockFormat(); if (nextFormat.indent() != format.indent() || - nextFormat.property(QTextFormat::BlockCodeLanguage) != format.property(QTextFormat::BlockCodeLanguage)) + nextFormat.property(QTextFormat::BlockCodeLanguage) != + format.property(QTextFormat::BlockCodeLanguage)) nextIsDifferent = true; } } @@ -139,8 +160,10 @@ void QTextMarkdownWriter::writeFrame(const QTextFrame *frame) tableRow = cell.row(); } } else if (!block.textList()) { - if (lastWasList) + if (lastWasList) { m_stream << qtmw_Newline; + m_linePrefixWritten = false; + } } int endingCol = writeBlock(block, !table, table && tableRow == 0, nextIsDifferent && !block.textList()); @@ -164,8 +187,16 @@ void QTextMarkdownWriter::writeFrame(const QTextFrame *frame) } else if (endingCol > 0) { if (block.textList() || block.blockFormat().hasProperty(QTextFormat::BlockCodeLanguage)) { m_stream << qtmw_Newline; + if (block.textList()) { + m_stream << m_linePrefix; + m_linePrefixWritten = true; + } } else { - m_stream << qtmw_Newline << qtmw_Newline; + m_stream << qtmw_Newline; + if (nextBlockQuoteIndent < blockQuoteIndent) + setLinePrefixForBlockQuote(nextBlockQuoteIndent); + m_stream << m_linePrefix; + m_stream << qtmw_Newline; m_doubleNewlineWritten = true; } } @@ -211,6 +242,16 @@ QTextMarkdownWriter::ListInfo QTextMarkdownWriter::listInfo(QTextList *list) return m_listInfo.value(list); } +void QTextMarkdownWriter::setLinePrefixForBlockQuote(int level) +{ + m_linePrefix.clear(); + if (level > 0) { + m_linePrefix.reserve(level * 2); + for (int i = 0; i < level; ++i) + m_linePrefix += u"> "; + } +} + static int nearestWordWrapIndex(const QString &s, int before) { before = qMin(before, s.size()); @@ -248,15 +289,53 @@ static int adjacentBackticksCount(const QString &s) return ret; } +/*! \internal + Escape anything at the beginning of a line of markdown that would be + misinterpreted by a markdown parser, including any period that follows a + number (to avoid misinterpretation as a numbered list item). + https://spec.commonmark.org/0.31.2/#backslash-escapes +*/ static void maybeEscapeFirstChar(QString &s) { + static const QRegularExpression numericListRe(uR"(\d+([\.)])\s)"_s); + static const QLatin1StringView specialFirstCharacters("#*+-"); + QString sTrimmed = s.trimmed(); if (sTrimmed.isEmpty()) return; - char firstChar = sTrimmed.at(0).toLatin1(); - if (firstChar == '*' || firstChar == '+' || firstChar == '-') { - int i = s.indexOf(QLatin1Char(firstChar)); + QChar firstChar = sTrimmed.at(0); + if (specialFirstCharacters.contains(firstChar)) { + int i = s.indexOf(firstChar); // == 0 unless s got trimmed s.insert(i, u'\\'); + } else { + auto match = numericListRe.match(s, 0, QRegularExpression::NormalMatch, + QRegularExpression::AnchorAtOffsetMatchOption); + if (match.hasMatch()) + s.insert(match.capturedStart(1), qtmw_Backslash); + } +} + +/*! \internal + Escape all backslashes. Then escape any special character that stands + alone or prefixes a "word", including the \c < that starts an HTML tag. + https://spec.commonmark.org/0.31.2/#backslash-escapes +*/ +static void escapeSpecialCharacters(QString &s) +{ + static const QRegularExpression spaceRe(uR"(\s+)"_s); + static const QRegularExpression specialRe(uR"([<!*[`&]+[/\w])"_s); + + s.replace("\\"_L1, "\\\\"_L1); + + int i = 0; + while (i >= 0) { + if (int j = s.indexOf(specialRe, i); j >= 0) { + s.insert(j, qtmw_Backslash); + i = j + 3; + } + i = s.indexOf(spaceRe, i); + if (i >= 0) + ++i; // past the whitespace, if found } } @@ -336,15 +415,27 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign const bool codeBlock = blockFmt.hasProperty(QTextFormat::BlockCodeFence) || blockFmt.stringProperty(QTextFormat::BlockCodeLanguage).size() > 0 || blockFmt.nonBreakableLines(); + const int blockQuoteLevel = blockFmt.intProperty(QTextFormat::BlockQuoteLevel); if (m_fencedCodeBlock && !codeBlock) { m_stream << m_linePrefix << m_codeBlockFence << qtmw_Newline; m_fencedCodeBlock = false; m_codeBlockFence.clear(); + m_linePrefixWritten = m_linePrefix.size() > 0; + } + m_linePrefix.clear(); + if (!blockFmt.headingLevel() && blockQuoteLevel > 0) { + setLinePrefixForBlockQuote(blockQuoteLevel); + if (!m_linePrefixWritten) { + m_stream << m_linePrefix; + m_linePrefixWritten = true; + } } if (block.textList()) { // it's a list-item auto fmt = block.textList()->format(); const int listLevel = fmt.indent(); - const int number = block.textList()->itemNumber(block) + 1; + // Negative numbers don't start a list in Markdown, so ignore them. + const int start = fmt.start() >= 0 ? fmt.start() : 1; + const int number = block.textList()->itemNumber(block) + start; QByteArray bullet = " "; bool numeric = false; switch (fmt.style()) { @@ -414,31 +505,27 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign if (blockFmt.hasProperty(QTextFormat::BlockIndent)) m_codeBlockFence = QString(m_wrappedLineIndent, qtmw_Space) + m_codeBlockFence; // A block quote can contain an indented code block, but not vice-versa. - m_stream << m_linePrefix << m_codeBlockFence - << blockFmt.stringProperty(QTextFormat::BlockCodeLanguage) << qtmw_Newline; + m_stream << m_codeBlockFence << blockFmt.stringProperty(QTextFormat::BlockCodeLanguage) + << qtmw_Newline << m_linePrefix; m_fencedCodeBlock = true; } wrap = false; } else if (!blockFmt.indent()) { m_wrappedLineIndent = 0; - m_linePrefix.clear(); - if (blockFmt.hasProperty(QTextFormat::BlockQuoteLevel)) { - int level = blockFmt.intProperty(QTextFormat::BlockQuoteLevel); - QString quoteMarker = QStringLiteral("> "); - m_linePrefix.reserve(level * 2); - for (int i = 0; i < level; ++i) - m_linePrefix += quoteMarker; - } if (blockFmt.hasProperty(QTextFormat::BlockCodeLanguage)) { // A block quote can contain an indented code block, but not vice-versa. m_linePrefix += QString(4, qtmw_Space); m_indentedCodeBlock = true; } + if (!m_linePrefixWritten) { + m_stream << m_linePrefix; + m_linePrefixWritten = true; + } } - if (blockFmt.headingLevel()) + if (blockFmt.headingLevel()) { m_stream << QByteArray(blockFmt.headingLevel(), '#') << ' '; - else - m_stream << m_linePrefix; + wrap = false; + } QString wrapIndentString = m_linePrefix + QString(m_wrappedLineIndent, qtmw_Space); // It would be convenient if QTextStream had a lineCharPos() accessor, @@ -451,12 +538,17 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign bool italic = false; bool underline = false; bool strikeOut = false; + bool endingMarkers = false; QString backticks(qtmw_Backtick); for (QTextBlock::Iterator frag = block.begin(); !frag.atEnd(); ++frag) { missedBlankCodeBlockLine = false; QString fragmentText = frag.fragment().text(); while (fragmentText.endsWith(qtmw_Newline)) fragmentText.chop(1); + if (!(m_fencedCodeBlock || m_indentedCodeBlock)) { + escapeSpecialCharacters(fragmentText); + maybeEscapeFirstChar(fragmentText); + } if (block.textList()) { // <li>first line</br>continuation</li> QString newlineIndent = QString(qtmw_Newline) + QString(m_wrappedLineIndent, qtmw_Space); @@ -518,26 +610,36 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign if (startsOrEndsWithBacktick) markers += qtmw_Space; mono = monoFrag; + if (!mono) + endingMarkers = true; } if (!blockFmt.headingLevel() && !mono) { if (fontInfo.bold() != bold) { markers += "**"_L1; bold = fontInfo.bold(); + if (!bold) + endingMarkers = true; } if (fontInfo.italic() != italic) { markers += u'*'; italic = fontInfo.italic(); + if (!italic) + endingMarkers = true; } if (fontInfo.strikeOut() != strikeOut) { markers += "~~"_L1; strikeOut = fontInfo.strikeOut(); + if (!strikeOut) + endingMarkers = true; } if (fontInfo.underline() != underline) { - // Markdown doesn't support underline, but the parser will treat a single underline - // the same as a single asterisk, and the marked fragment will be rendered in italics. - // That will have to do. + // CommonMark specifies underline as another way to get emphasis (italics): + // https://spec.commonmark.org/0.31.2/#example-148 + // but md4c allows us to distinguish them; so we support underlining (in GitHub dialect). markers += u'_'; underline = fontInfo.underline(); + if (!underline) + endingMarkers = true; } } } @@ -547,7 +649,8 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign bool breakingLine = false; while (i < fragLen) { if (col >= ColumnLimit) { - m_stream << qtmw_Newline << wrapIndentString; + m_stream << markers << qtmw_Newline << wrapIndentString; + markers.clear(); col = m_wrappedLineIndent; while (i < fragLen && fragmentText[i].isSpace()) ++i; @@ -557,6 +660,13 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign int wi = nearestWordWrapIndex(fragmentText, j); if (wi < 0) { j = fragLen; + // can't break within the fragment: we need to break already _before_ it + if (endingMarkers) { + m_stream << markers; + markers.clear(); + } + m_stream << qtmw_Newline << wrapIndentString; + col = m_wrappedLineIndent; } else if (wi >= i) { j = wi; breakingLine = true; @@ -580,8 +690,12 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign col += subfrag.size(); } i = j + 1; - } + } // loop over fragment characters (we know we need to break somewhere) } else { + if (!m_linePrefixWritten && col == wrapIndentString.size()) { + m_stream << m_linePrefix; + col += m_linePrefix.size(); + } m_stream << markers << fragmentText; col += markers.size() + fragmentText.size(); } @@ -613,6 +727,7 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign } if (missedBlankCodeBlockLine) m_stream << qtmw_Newline; + m_linePrefixWritten = false; return col; } |