From 7dd71e812542c561a00dd792d314843a81c5687c Mon Sep 17 00:00:00 2001 From: Shawn Rutledge Date: Fri, 26 Apr 2019 08:12:18 +0200 Subject: Markdown: blockquotes, code blocks, and generalized nesting Can now detect nested quotes and code blocks inside quotes, and can rewrite the markdown too. QTextHtmlParser sets hard-coded left and right margins, so we need to do the same to be able to read HTML and write markdown, or vice-versa, and to ensure that all views (QTextEdit, QTextBrowser, QML Text etc.) will render it with margins. But now we add a semantic memory too: BlockQuoteLevel is similar to HeadingLevel, which was added in 310daae53926628f80c08e4415b94b90ad525c8f to preserve H1..H6 heading levels, because detecting it via font size didn't make sense in QTextMarkdownWriter. Likewise detecting quote level by its margins didn't make sense; markdown supports nesting quotes; and indenting nested quotes via 40 pixels may be a bit too much, so we should consider it subject to change (and perhaps be able to change it via CSS later on). Since we're adding BlockQuoteLevel and depending on it in QTextMarkdownWriter, it's necessary to set it in QTextHtmlParser to enable HTML->markdown conversion. (But so far, nested blockquotes in HTML are not supported.) Quotes (and nested quotes) can contain indented code blocks, but it seems the reverse is not true (according to https://spec.commonmark.org/0.29/#example-201 ) Quotes can contain fenced code blocks. Quotes can contain lists. Nested lists can be interrupted with nested code blocks and nested quotes. So far the writer assumes all code blocks are the indented type. It will be necessary to add another attribute to remember whether the code block is indented or fenced (assuming that's necessary). Fenced code blocks would work better for writing inside block quotes and list items because the fence is less ambiguous than the indent. Postponing cursor->insertBlock() as long as possible helps with nesting. cursor->insertBlock() needs to be done "just in time" before inserting text that will go in the block. The block and char formats aren't necessarily known until that time. When a nested block (such as a nested quote) ends, the context reverts to the previous block format, which then needs to be re-determined and set before we insert text into the outer block; but if no text will be inserted, no new block is necessary. But we can't use QTextBlockFormat itself as storage, because for some reason bullets become very "sticky" and it becomes impossible to have plain continuation paragraphs inside list items: they all get bullets. Somehow QTextBlockFormat remembers, if we copy it. But we can create a new one each time and it's OK. Change-Id: Icd0529eb90d2b6a3cb57f0104bf78a7be81ede52 Reviewed-by: Gatis Paeglis --- src/gui/text/qtextformat.h | 4 +- src/gui/text/qtexthtmlparser.cpp | 1 + src/gui/text/qtextmarkdownimporter.cpp | 103 ++++++++++++++++++++++++++------- src/gui/text/qtextmarkdownimporter_p.h | 8 +++ src/gui/text/qtextmarkdownwriter.cpp | 58 ++++++++++++------- src/gui/text/qtextmarkdownwriter_p.h | 1 + 6 files changed, 133 insertions(+), 42 deletions(-) (limited to 'src') diff --git a/src/gui/text/qtextformat.h b/src/gui/text/qtextformat.h index 1eb52a379c..a631309ae0 100644 --- a/src/gui/text/qtextformat.h +++ b/src/gui/text/qtextformat.h @@ -176,7 +176,9 @@ public: BlockNonBreakableLines = 0x1050, BlockTrailingHorizontalRulerWidth = 0x1060, HeadingLevel = 0x1070, - BlockMarker = 0x1080, + BlockQuoteLevel = 0x1080, + BlockCodeLanguage = 0x1090, + BlockMarker = 0x10A0, // character properties FirstFontProperty = 0x1FE0, diff --git a/src/gui/text/qtexthtmlparser.cpp b/src/gui/text/qtexthtmlparser.cpp index 895232e4c7..37051502fa 100644 --- a/src/gui/text/qtexthtmlparser.cpp +++ b/src/gui/text/qtexthtmlparser.cpp @@ -1125,6 +1125,7 @@ void QTextHtmlParserNode::initializeProperties(const QTextHtmlParserNode *parent margin[QTextHtmlParser::MarginBottom] = 12; margin[QTextHtmlParser::MarginLeft] = 40; margin[QTextHtmlParser::MarginRight] = 40; + blockFormat.setProperty(QTextFormat::BlockQuoteLevel, 1); break; case Html_dl: margin[QTextHtmlParser::MarginTop] = 8; diff --git a/src/gui/text/qtextmarkdownimporter.cpp b/src/gui/text/qtextmarkdownimporter.cpp index 5cee01d932..d8ffec2496 100644 --- a/src/gui/text/qtextmarkdownimporter.cpp +++ b/src/gui/text/qtextmarkdownimporter.cpp @@ -55,6 +55,9 @@ Q_LOGGING_CATEGORY(lcMD, "qt.text.markdown") static const QChar Newline = QLatin1Char('\n'); static const QChar Space = QLatin1Char(' '); +// TODO maybe eliminate the margins after all views recognize BlockQuoteLevel, CSS can format it, etc. +static const int BlockQuoteIndent = 40; // pixels, same as in QTextHtmlParserNode::initializeProperties + // -------------------------------------------------------- // MD4C callback function wrappers @@ -131,6 +134,7 @@ void QTextMarkdownImporter::import(QTextDocument *doc, const QString &markdown) nullptr // syntax }; m_doc = doc; + m_paragraphMargin = m_doc->defaultFont().pointSize() * 2 / 3; m_cursor = new QTextCursor(doc); doc->clear(); qCDebug(lcMD) << "default font" << doc->defaultFont() << "mono font" << m_monoFont; @@ -146,11 +150,7 @@ int QTextMarkdownImporter::cbEnterBlock(int blockType, void *det) switch (blockType) { case MD_BLOCK_P: if (m_listStack.isEmpty()) { - QTextBlockFormat blockFmt; - int margin = m_doc->defaultFont().pointSize() / 2; - blockFmt.setTopMargin(margin); - blockFmt.setBottomMargin(margin); - m_cursor->insertBlock(blockFmt, QTextCharFormat()); + m_needsInsertBlock = true; qCDebug(lcMD, "P"); } else { if (m_emptyListItem) { @@ -159,18 +159,25 @@ int QTextMarkdownImporter::cbEnterBlock(int blockType, void *det) m_emptyListItem = false; } else { qCDebug(lcMD, "P inside LI at level %d", m_listStack.count()); - QTextBlockFormat blockFmt; - blockFmt.setIndent(m_listStack.count()); - m_cursor->insertBlock(blockFmt, QTextCharFormat()); + m_needsInsertBlock = true; } } break; + case MD_BLOCK_QUOTE: { + ++m_blockQuoteDepth; + qCDebug(lcMD, "QUOTE level %d", m_blockQuoteDepth); + break; + } case MD_BLOCK_CODE: { - QTextBlockFormat blockFmt; - QTextCharFormat charFmt; - charFmt.setFont(m_monoFont); - m_cursor->insertBlock(blockFmt, charFmt); - qCDebug(lcMD, "CODE"); + MD_BLOCK_CODE_DETAIL *detail = static_cast(det); + m_codeBlock = true; + m_blockCodeLanguage = QLatin1String(detail->lang.text, int(detail->lang.size)); + QString info = QLatin1String(detail->info.text, int(detail->info.size)); + m_needsInsertBlock = true; + if (m_blockQuoteDepth) + qCDebug(lcMD, "CODE lang '%s' info '%s' inside QUOTE %d", qPrintable(m_blockCodeLanguage), qPrintable(info), m_blockQuoteDepth); + else + qCDebug(lcMD, "CODE lang '%s' info '%s'", qPrintable(m_blockCodeLanguage), qPrintable(info)); } break; case MD_BLOCK_H: { MD_BLOCK_H_DETAIL *detail = static_cast(det); @@ -180,10 +187,12 @@ int QTextMarkdownImporter::cbEnterBlock(int blockType, void *det) charFmt.setProperty(QTextFormat::FontSizeAdjustment, sizeAdjustment); charFmt.setFontWeight(QFont::Bold); blockFmt.setHeadingLevel(int(detail->level)); + m_needsInsertBlock = false; m_cursor->insertBlock(blockFmt, charFmt); qCDebug(lcMD, "H%d", detail->level); } break; case MD_BLOCK_LI: { + m_needsInsertBlock = false; MD_BLOCK_LI_DETAIL *detail = static_cast(det); QTextList *list = m_listStack.top(); QTextBlockFormat bfmt = list->item(list->count() - 1).blockFormat(); @@ -316,9 +325,9 @@ int QTextMarkdownImporter::cbLeaveBlock(int blockType, void *detail) } } break; case MD_BLOCK_QUOTE: { - QTextBlockFormat blockFmt = m_cursor->blockFormat(); - blockFmt.setIndent(1); - m_cursor->setBlockFormat(blockFmt); + qCDebug(lcMD, "QUOTE level %d ended", m_blockQuoteDepth); + --m_blockQuoteDepth; + m_needsInsertBlock = true; } break; case MD_BLOCK_TABLE: qCDebug(lcMD) << "table ended with" << m_currentTable->columns() << "cols and" << m_currentTable->rows() << "rows"; @@ -329,7 +338,15 @@ int QTextMarkdownImporter::cbLeaveBlock(int blockType, void *detail) qCDebug(lcMD, "LI at level %d ended", m_listStack.count()); m_listItem = false; break; - case MD_BLOCK_CODE: + case MD_BLOCK_CODE: { + m_codeBlock = false; + m_blockCodeLanguage.clear(); + if (m_blockQuoteDepth) + qCDebug(lcMD, "CODE ended inside QUOTE %d", m_blockQuoteDepth); + else + qCDebug(lcMD, "CODE ended"); + m_needsInsertBlock = true; + } break; case MD_BLOCK_H: m_cursor->setCharFormat(QTextCharFormat()); break; @@ -365,6 +382,8 @@ int QTextMarkdownImporter::cbEnterSpan(int spanType, void *det) QString title = QString::fromUtf8(detail->title.text, int(detail->title.size)); QTextImageFormat img; img.setName(src); + if (m_needsInsertBlock) + insertBlock(); qCDebug(lcMD) << "image" << src << "title" << title << "relative to" << m_doc->baseUrl(); m_cursor->insertImage(img); break; @@ -377,6 +396,8 @@ int QTextMarkdownImporter::cbEnterSpan(int spanType, void *det) break; } m_spanFormatStack.push(charFmt); + qCDebug(lcMD) << spanType << "setCharFormat" << charFmt.font().family() << charFmt.fontWeight() + << (charFmt.fontItalic() ? "italic" : "") << charFmt.foreground().color().name(); m_cursor->setCharFormat(charFmt); return 0; // no error } @@ -391,6 +412,8 @@ int QTextMarkdownImporter::cbLeaveSpan(int spanType, void *detail) charFmt = m_spanFormatStack.top(); } m_cursor->setCharFormat(charFmt); + qCDebug(lcMD) << spanType << "setCharFormat" << charFmt.font().family() << charFmt.fontWeight() + << (charFmt.fontItalic() ? "italic" : "") << charFmt.foreground().color().name(); if (spanType == int(MD_SPAN_IMG)) m_imageSpan = false; return 0; // no error @@ -400,6 +423,8 @@ int QTextMarkdownImporter::cbText(int textType, const char *text, unsigned size) { if (m_imageSpan) return 0; // it's the alt-text + if (m_needsInsertBlock) + insertBlock(); static const QRegularExpression openingBracket(QStringLiteral("<[a-zA-Z]")); static const QRegularExpression closingBracket(QStringLiteral("(/>|insertText(s); if (m_cursor->currentList()) { // The list item will indent the list item's text, so we don't need indentation on the block. - QTextBlockFormat blockFmt = m_cursor->blockFormat(); - blockFmt.setIndent(0); - m_cursor->setBlockFormat(blockFmt); + QTextBlockFormat bfmt = m_cursor->blockFormat(); + bfmt.setIndent(0); + m_cursor->setBlockFormat(bfmt); + } + if (lcMD().isEnabled(QtDebugMsg)) { + QTextBlockFormat bfmt = m_cursor->blockFormat(); + QString debugInfo; + if (m_cursor->currentList()) + debugInfo = QLatin1String("in list at depth ") + QString::number(m_cursor->currentList()->format().indent()); + if (bfmt.hasProperty(QTextFormat::BlockQuoteLevel)) + debugInfo += QLatin1String("in blockquote at depth ") + + QString::number(bfmt.intProperty(QTextFormat::BlockQuoteLevel)); + if (bfmt.hasProperty(QTextFormat::BlockCodeLanguage)) + debugInfo += QLatin1String("in a code block"); + qCDebug(lcMD) << textType << "in block" << m_blockType << s << qPrintable(debugInfo) + << "bindent" << bfmt.indent() << "tindent" << bfmt.textIndent() + << "margins" << bfmt.leftMargin() << bfmt.topMargin() << bfmt.bottomMargin() << bfmt.rightMargin(); } qCDebug(lcMD) << textType << "in block" << m_blockType << s << "in list?" << m_cursor->currentList() << "indent" << m_cursor->blockFormat().indent(); return 0; // no error } +void QTextMarkdownImporter::insertBlock() +{ + QTextCharFormat charFormat; + if (!m_spanFormatStack.isEmpty()) + charFormat = m_spanFormatStack.top(); + QTextBlockFormat blockFormat; + if (m_blockQuoteDepth) { + blockFormat.setProperty(QTextFormat::BlockQuoteLevel, m_blockQuoteDepth); + blockFormat.setLeftMargin(BlockQuoteIndent * m_blockQuoteDepth); + blockFormat.setRightMargin(BlockQuoteIndent); + } + if (m_listStack.count()) + blockFormat.setIndent(m_listStack.count()); + if (m_codeBlock) { + blockFormat.setProperty(QTextFormat::BlockCodeLanguage, m_blockCodeLanguage); + charFormat.setFont(m_monoFont); + } else { + blockFormat.setTopMargin(m_paragraphMargin); + blockFormat.setBottomMargin(m_paragraphMargin); + } + m_cursor->insertBlock(blockFormat, charFormat); + m_needsInsertBlock = false; +} + QT_END_NAMESPACE diff --git a/src/gui/text/qtextmarkdownimporter_p.h b/src/gui/text/qtextmarkdownimporter_p.h index 8ab119d051..1716530b1d 100644 --- a/src/gui/text/qtextmarkdownimporter_p.h +++ b/src/gui/text/qtextmarkdownimporter_p.h @@ -99,26 +99,34 @@ public: int cbLeaveSpan(int spanType, void* detail); int cbText(int textType, const char* text, unsigned size); +private: + void insertBlock(); + private: QTextDocument *m_doc = nullptr; QTextCursor *m_cursor = nullptr; QTextTable *m_currentTable = nullptr; // because m_cursor->currentTable() doesn't work QString m_htmlAccumulator; + QString m_blockCodeLanguage; QVector m_nonEmptyTableCells; // in the current row QStack m_listStack; QStack m_spanFormatStack; QFont m_monoFont; QPalette m_palette; int m_htmlTagDepth = 0; + int m_blockQuoteDepth = 0; int m_tableColumnCount = 0; int m_tableRowCount = 0; int m_tableCol = -1; // because relative cell movements (e.g. m_cursor->movePosition(QTextCursor::NextCell)) don't work + int m_paragraphMargin = 0; Features m_features; int m_blockType = 0; bool m_emptyList = false; // true when the last thing we did was insertList bool m_listItem = false; bool m_emptyListItem = false; + bool m_codeBlock = false; bool m_imageSpan = false; + bool m_needsInsertBlock = false; }; Q_DECLARE_OPERATORS_FOR_FLAGS(QTextMarkdownImporter::Features) diff --git a/src/gui/text/qtextmarkdownwriter.cpp b/src/gui/text/qtextmarkdownwriter.cpp index a445ee7e83..f180098db2 100644 --- a/src/gui/text/qtextmarkdownwriter.cpp +++ b/src/gui/text/qtextmarkdownwriter.cpp @@ -106,7 +106,7 @@ void QTextMarkdownWriter::writeFrame(const QTextFrame *frame) Q_ASSERT(frame); const QTextTable *table = qobject_cast (frame); QTextFrame::iterator iterator = frame->begin(); - QTextFrame *child = 0; + QTextFrame *child = nullptr; int tableRow = -1; bool lastWasList = false; QVector tableColumnWidths; @@ -161,7 +161,7 @@ void QTextMarkdownWriter::writeFrame(const QTextFrame *frame) m_stream << QString(paddingLen, Space); for (int col = cell.column(); col < spanEndCol; ++col) m_stream << "|"; - } else if (block.textList()) { + } else if (block.textList() || block.blockFormat().hasProperty(QTextFormat::BlockCodeLanguage)) { m_stream << Newline; } else if (endingCol > 0) { m_stream << Newline << Newline; @@ -252,6 +252,8 @@ static void maybeEscapeFirstChar(QString &s) int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ignoreFormat) { int ColumnLimit = 80; + QTextBlockFormat blockFmt = block.blockFormat(); + bool indentedCodeBlock = false; if (block.textList()) { // it's a list-item auto fmt = block.textList()->format(); const int listLevel = fmt.indent(); @@ -281,7 +283,7 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign m_wrappedLineIndent = 4; break; } - switch (block.blockFormat().marker()) { + switch (blockFmt.marker()) { case QTextBlockFormat::Checked: bullet += " [x]"; break; @@ -309,21 +311,35 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign prefix += QLatin1String(bullet) + Space; } m_stream << prefix; - } else if (block.blockFormat().hasProperty(QTextFormat::BlockTrailingHorizontalRulerWidth)) { + } else if (blockFmt.hasProperty(QTextFormat::BlockTrailingHorizontalRulerWidth)) { m_stream << "- - -\n"; // unambiguous horizontal rule, not an underline under a heading return 0; - } else if (!block.blockFormat().indent()) { + } else if (!blockFmt.indent()) { m_wrappedLineIndent = 0; + m_linePrefix.clear(); + if (blockFmt.hasProperty(QTextFormat::BlockQuoteLevel)) { + int level = blockFmt.intProperty(QTextFormat::BlockQuoteLevel); + QString quoteMarker = QStringLiteral("> "); + m_linePrefix.reserve(level * 2); + for (int i = 0; i < level; ++i) + m_linePrefix += quoteMarker; + } + if (blockFmt.hasProperty(QTextFormat::BlockCodeLanguage)) { + // A block quote can contain an indented code block, but not vice-versa. + m_linePrefix += QString(4, Space); + indentedCodeBlock = true; + } } + if (blockFmt.headingLevel()) + m_stream << QByteArray(blockFmt.headingLevel(), '#') << ' '; + else + m_stream << m_linePrefix; - if (block.blockFormat().headingLevel()) - m_stream << QByteArray(block.blockFormat().headingLevel(), '#') << ' '; - - QString wrapIndentString(m_wrappedLineIndent, Space); + QString wrapIndentString = m_linePrefix + QString(m_wrappedLineIndent, Space); // It would be convenient if QTextStream had a lineCharPos() accessor, // to keep track of how many characters (not bytes) have been written on the current line, // but it doesn't. So we have to keep track with this col variable. - int col = m_wrappedLineIndent; + int col = wrapIndentString.length(); bool mono = false; bool startsOrEndsWithBacktick = false; bool bold = false; @@ -338,7 +354,7 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign if (block.textList()) { //
  • first line
    continuation
  • QString newlineIndent = QString(Newline) + QString(m_wrappedLineIndent, Space); fragmentText.replace(QString(LineBreak), newlineIndent); - } else if (block.blockFormat().indent() > 0) { //
  • first line

    continuation

  • + } else if (blockFmt.indent() > 0) { //
  • first line

    continuation

  • m_stream << QString(m_wrappedLineIndent, Space); } else { fragmentText.replace(LineBreak, Newline); @@ -368,7 +384,7 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign bool monoFrag = fontInfo.fixedPitch(); QString markers; if (!ignoreFormat) { - if (monoFrag != mono) { + if (monoFrag != mono && !indentedCodeBlock) { if (monoFrag) backticks = QString(adjacentBackticksCount(fragmentText) + 1, Backtick); markers += backticks; @@ -376,25 +392,25 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign markers += Space; mono = monoFrag; } - if (!block.blockFormat().headingLevel() && !mono) { - if (fmt.font().bold() != bold) { + if (!blockFmt.headingLevel() && !mono) { + if (fontInfo.bold() != bold) { markers += QLatin1String("**"); - bold = fmt.font().bold(); + bold = fontInfo.bold(); } - if (fmt.font().italic() != italic) { + if (fontInfo.italic() != italic) { markers += QLatin1Char('*'); - italic = fmt.font().italic(); + italic = fontInfo.italic(); } - if (fmt.font().strikeOut() != strikeOut) { + if (fontInfo.strikeOut() != strikeOut) { markers += QLatin1String("~~"); - strikeOut = fmt.font().strikeOut(); + strikeOut = fontInfo.strikeOut(); } - if (fmt.font().underline() != underline) { + if (fontInfo.underline() != underline) { // Markdown doesn't support underline, but the parser will treat a single underline // the same as a single asterisk, and the marked fragment will be rendered in italics. // That will have to do. markers += QLatin1Char('_'); - underline = fmt.font().underline(); + underline = fontInfo.underline(); } } } diff --git a/src/gui/text/qtextmarkdownwriter_p.h b/src/gui/text/qtextmarkdownwriter_p.h index 4c07bad2e7..96ceb445cd 100644 --- a/src/gui/text/qtextmarkdownwriter_p.h +++ b/src/gui/text/qtextmarkdownwriter_p.h @@ -81,6 +81,7 @@ private: QTextStream &m_stream; QTextDocument::MarkdownFeatures m_features; QMap m_listInfo; + QString m_linePrefix; int m_wrappedLineIndent = 0; int m_lastListIndent = 1; bool m_doubleNewlineWritten = false; -- cgit v1.2.3