diff options
author | Shawn Rutledge <shawn.rutledge@qt.io> | 2019-04-26 08:12:18 +0200 |
---|---|---|
committer | Shawn Rutledge <shawn.rutledge@qt.io> | 2019-05-08 20:28:53 +0000 |
commit | 7dd71e812542c561a00dd792d314843a81c5687c (patch) | |
tree | f3b1c60d4d323f47786a206279e1468a16501aef /src | |
parent | 82b26444a456d4d5ddf5f483b7766977659bee35 (diff) |
Markdown: blockquotes, code blocks, and generalized nesting
Can now detect nested quotes and code blocks inside quotes, and can
rewrite the markdown too.
QTextHtmlParser sets hard-coded left and right margins, so we need to do
the same to be able to read HTML and write markdown, or vice-versa,
and to ensure that all views (QTextEdit, QTextBrowser, QML Text etc.)
will render it with margins. But now we add a semantic memory too:
BlockQuoteLevel is similar to HeadingLevel, which was added in
310daae53926628f80c08e4415b94b90ad525c8f to preserve H1..H6 heading
levels, because detecting it via font size didn't make sense in
QTextMarkdownWriter. Likewise detecting quote level by its margins
didn't make sense; markdown supports nesting quotes; and indenting
nested quotes via 40 pixels may be a bit too much, so we should consider
it subject to change (and perhaps be able to change it via CSS later on).
Since we're adding BlockQuoteLevel and depending on it in QTextMarkdownWriter,
it's necessary to set it in QTextHtmlParser to enable HTML->markdown
conversion. (But so far, nested blockquotes in HTML are not supported.)
Quotes (and nested quotes) can contain indented code blocks, but it seems
the reverse is not true (according to https://spec.commonmark.org/0.29/#example-201 )
Quotes can contain fenced code blocks.
Quotes can contain lists. Nested lists can be interrupted with
nested code blocks and nested quotes.
So far the writer assumes all code blocks are the indented type.
It will be necessary to add another attribute to remember whether the
code block is indented or fenced (assuming that's necessary).
Fenced code blocks would work better for writing inside block quotes
and list items because the fence is less ambiguous than the indent.
Postponing cursor->insertBlock() as long as possible helps with nesting.
cursor->insertBlock() needs to be done "just in time" before inserting
text that will go in the block. The block and char formats aren't
necessarily known until that time. When a nested block (such as a
nested quote) ends, the context reverts to the previous block format,
which then needs to be re-determined and set before we insert text
into the outer block; but if no text will be inserted, no new block
is necessary. But we can't use QTextBlockFormat itself as storage,
because for some reason bullets become very "sticky" and it becomes
impossible to have plain continuation paragraphs inside list items:
they all get bullets. Somehow QTextBlockFormat remembers, if we copy it.
But we can create a new one each time and it's OK.
Change-Id: Icd0529eb90d2b6a3cb57f0104bf78a7be81ede52
Reviewed-by: Gatis Paeglis <gatis.paeglis@qt.io>
Diffstat (limited to 'src')
-rw-r--r-- | src/gui/text/qtextformat.h | 4 | ||||
-rw-r--r-- | src/gui/text/qtexthtmlparser.cpp | 1 | ||||
-rw-r--r-- | src/gui/text/qtextmarkdownimporter.cpp | 103 | ||||
-rw-r--r-- | src/gui/text/qtextmarkdownimporter_p.h | 8 | ||||
-rw-r--r-- | src/gui/text/qtextmarkdownwriter.cpp | 58 | ||||
-rw-r--r-- | src/gui/text/qtextmarkdownwriter_p.h | 1 |
6 files changed, 133 insertions, 42 deletions
diff --git a/src/gui/text/qtextformat.h b/src/gui/text/qtextformat.h index 1eb52a379c..a631309ae0 100644 --- a/src/gui/text/qtextformat.h +++ b/src/gui/text/qtextformat.h @@ -176,7 +176,9 @@ public: BlockNonBreakableLines = 0x1050, BlockTrailingHorizontalRulerWidth = 0x1060, HeadingLevel = 0x1070, - BlockMarker = 0x1080, + BlockQuoteLevel = 0x1080, + BlockCodeLanguage = 0x1090, + BlockMarker = 0x10A0, // character properties FirstFontProperty = 0x1FE0, diff --git a/src/gui/text/qtexthtmlparser.cpp b/src/gui/text/qtexthtmlparser.cpp index 895232e4c7..37051502fa 100644 --- a/src/gui/text/qtexthtmlparser.cpp +++ b/src/gui/text/qtexthtmlparser.cpp @@ -1125,6 +1125,7 @@ void QTextHtmlParserNode::initializeProperties(const QTextHtmlParserNode *parent margin[QTextHtmlParser::MarginBottom] = 12; margin[QTextHtmlParser::MarginLeft] = 40; margin[QTextHtmlParser::MarginRight] = 40; + blockFormat.setProperty(QTextFormat::BlockQuoteLevel, 1); break; case Html_dl: margin[QTextHtmlParser::MarginTop] = 8; diff --git a/src/gui/text/qtextmarkdownimporter.cpp b/src/gui/text/qtextmarkdownimporter.cpp index 5cee01d932..d8ffec2496 100644 --- a/src/gui/text/qtextmarkdownimporter.cpp +++ b/src/gui/text/qtextmarkdownimporter.cpp @@ -55,6 +55,9 @@ Q_LOGGING_CATEGORY(lcMD, "qt.text.markdown") static const QChar Newline = QLatin1Char('\n'); static const QChar Space = QLatin1Char(' '); +// TODO maybe eliminate the margins after all views recognize BlockQuoteLevel, CSS can format it, etc. +static const int BlockQuoteIndent = 40; // pixels, same as in QTextHtmlParserNode::initializeProperties + // -------------------------------------------------------- // MD4C callback function wrappers @@ -131,6 +134,7 @@ void QTextMarkdownImporter::import(QTextDocument *doc, const QString &markdown) nullptr // syntax }; m_doc = doc; + m_paragraphMargin = m_doc->defaultFont().pointSize() * 2 / 3; m_cursor = new QTextCursor(doc); doc->clear(); qCDebug(lcMD) << "default font" << doc->defaultFont() << "mono font" << m_monoFont; @@ -146,11 +150,7 @@ int QTextMarkdownImporter::cbEnterBlock(int blockType, void *det) switch (blockType) { case MD_BLOCK_P: if (m_listStack.isEmpty()) { - QTextBlockFormat blockFmt; - int margin = m_doc->defaultFont().pointSize() / 2; - blockFmt.setTopMargin(margin); - blockFmt.setBottomMargin(margin); - m_cursor->insertBlock(blockFmt, QTextCharFormat()); + m_needsInsertBlock = true; qCDebug(lcMD, "P"); } else { if (m_emptyListItem) { @@ -159,18 +159,25 @@ int QTextMarkdownImporter::cbEnterBlock(int blockType, void *det) m_emptyListItem = false; } else { qCDebug(lcMD, "P inside LI at level %d", m_listStack.count()); - QTextBlockFormat blockFmt; - blockFmt.setIndent(m_listStack.count()); - m_cursor->insertBlock(blockFmt, QTextCharFormat()); + m_needsInsertBlock = true; } } break; + case MD_BLOCK_QUOTE: { + ++m_blockQuoteDepth; + qCDebug(lcMD, "QUOTE level %d", m_blockQuoteDepth); + break; + } case MD_BLOCK_CODE: { - QTextBlockFormat blockFmt; - QTextCharFormat charFmt; - charFmt.setFont(m_monoFont); - m_cursor->insertBlock(blockFmt, charFmt); - qCDebug(lcMD, "CODE"); + MD_BLOCK_CODE_DETAIL *detail = static_cast<MD_BLOCK_CODE_DETAIL *>(det); + m_codeBlock = true; + m_blockCodeLanguage = QLatin1String(detail->lang.text, int(detail->lang.size)); + QString info = QLatin1String(detail->info.text, int(detail->info.size)); + m_needsInsertBlock = true; + if (m_blockQuoteDepth) + qCDebug(lcMD, "CODE lang '%s' info '%s' inside QUOTE %d", qPrintable(m_blockCodeLanguage), qPrintable(info), m_blockQuoteDepth); + else + qCDebug(lcMD, "CODE lang '%s' info '%s'", qPrintable(m_blockCodeLanguage), qPrintable(info)); } break; case MD_BLOCK_H: { MD_BLOCK_H_DETAIL *detail = static_cast<MD_BLOCK_H_DETAIL *>(det); @@ -180,10 +187,12 @@ int QTextMarkdownImporter::cbEnterBlock(int blockType, void *det) charFmt.setProperty(QTextFormat::FontSizeAdjustment, sizeAdjustment); charFmt.setFontWeight(QFont::Bold); blockFmt.setHeadingLevel(int(detail->level)); + m_needsInsertBlock = false; m_cursor->insertBlock(blockFmt, charFmt); qCDebug(lcMD, "H%d", detail->level); } break; case MD_BLOCK_LI: { + m_needsInsertBlock = false; MD_BLOCK_LI_DETAIL *detail = static_cast<MD_BLOCK_LI_DETAIL *>(det); QTextList *list = m_listStack.top(); QTextBlockFormat bfmt = list->item(list->count() - 1).blockFormat(); @@ -316,9 +325,9 @@ int QTextMarkdownImporter::cbLeaveBlock(int blockType, void *detail) } } break; case MD_BLOCK_QUOTE: { - QTextBlockFormat blockFmt = m_cursor->blockFormat(); - blockFmt.setIndent(1); - m_cursor->setBlockFormat(blockFmt); + qCDebug(lcMD, "QUOTE level %d ended", m_blockQuoteDepth); + --m_blockQuoteDepth; + m_needsInsertBlock = true; } break; case MD_BLOCK_TABLE: qCDebug(lcMD) << "table ended with" << m_currentTable->columns() << "cols and" << m_currentTable->rows() << "rows"; @@ -329,7 +338,15 @@ int QTextMarkdownImporter::cbLeaveBlock(int blockType, void *detail) qCDebug(lcMD, "LI at level %d ended", m_listStack.count()); m_listItem = false; break; - case MD_BLOCK_CODE: + case MD_BLOCK_CODE: { + m_codeBlock = false; + m_blockCodeLanguage.clear(); + if (m_blockQuoteDepth) + qCDebug(lcMD, "CODE ended inside QUOTE %d", m_blockQuoteDepth); + else + qCDebug(lcMD, "CODE ended"); + m_needsInsertBlock = true; + } break; case MD_BLOCK_H: m_cursor->setCharFormat(QTextCharFormat()); break; @@ -365,6 +382,8 @@ int QTextMarkdownImporter::cbEnterSpan(int spanType, void *det) QString title = QString::fromUtf8(detail->title.text, int(detail->title.size)); QTextImageFormat img; img.setName(src); + if (m_needsInsertBlock) + insertBlock(); qCDebug(lcMD) << "image" << src << "title" << title << "relative to" << m_doc->baseUrl(); m_cursor->insertImage(img); break; @@ -377,6 +396,8 @@ int QTextMarkdownImporter::cbEnterSpan(int spanType, void *det) break; } m_spanFormatStack.push(charFmt); + qCDebug(lcMD) << spanType << "setCharFormat" << charFmt.font().family() << charFmt.fontWeight() + << (charFmt.fontItalic() ? "italic" : "") << charFmt.foreground().color().name(); m_cursor->setCharFormat(charFmt); return 0; // no error } @@ -391,6 +412,8 @@ int QTextMarkdownImporter::cbLeaveSpan(int spanType, void *detail) charFmt = m_spanFormatStack.top(); } m_cursor->setCharFormat(charFmt); + qCDebug(lcMD) << spanType << "setCharFormat" << charFmt.font().family() << charFmt.fontWeight() + << (charFmt.fontItalic() ? "italic" : "") << charFmt.foreground().color().name(); if (spanType == int(MD_SPAN_IMG)) m_imageSpan = false; return 0; // no error @@ -400,6 +423,8 @@ int QTextMarkdownImporter::cbText(int textType, const char *text, unsigned size) { if (m_imageSpan) return 0; // it's the alt-text + if (m_needsInsertBlock) + insertBlock(); static const QRegularExpression openingBracket(QStringLiteral("<[a-zA-Z]")); static const QRegularExpression closingBracket(QStringLiteral("(/>|</)")); QString s = QString::fromUtf8(text, int(size)); @@ -467,13 +492,51 @@ int QTextMarkdownImporter::cbText(int textType, const char *text, unsigned size) m_cursor->insertText(s); if (m_cursor->currentList()) { // The list item will indent the list item's text, so we don't need indentation on the block. - QTextBlockFormat blockFmt = m_cursor->blockFormat(); - blockFmt.setIndent(0); - m_cursor->setBlockFormat(blockFmt); + QTextBlockFormat bfmt = m_cursor->blockFormat(); + bfmt.setIndent(0); + m_cursor->setBlockFormat(bfmt); + } + if (lcMD().isEnabled(QtDebugMsg)) { + QTextBlockFormat bfmt = m_cursor->blockFormat(); + QString debugInfo; + if (m_cursor->currentList()) + debugInfo = QLatin1String("in list at depth ") + QString::number(m_cursor->currentList()->format().indent()); + if (bfmt.hasProperty(QTextFormat::BlockQuoteLevel)) + debugInfo += QLatin1String("in blockquote at depth ") + + QString::number(bfmt.intProperty(QTextFormat::BlockQuoteLevel)); + if (bfmt.hasProperty(QTextFormat::BlockCodeLanguage)) + debugInfo += QLatin1String("in a code block"); + qCDebug(lcMD) << textType << "in block" << m_blockType << s << qPrintable(debugInfo) + << "bindent" << bfmt.indent() << "tindent" << bfmt.textIndent() + << "margins" << bfmt.leftMargin() << bfmt.topMargin() << bfmt.bottomMargin() << bfmt.rightMargin(); } qCDebug(lcMD) << textType << "in block" << m_blockType << s << "in list?" << m_cursor->currentList() << "indent" << m_cursor->blockFormat().indent(); return 0; // no error } +void QTextMarkdownImporter::insertBlock() +{ + QTextCharFormat charFormat; + if (!m_spanFormatStack.isEmpty()) + charFormat = m_spanFormatStack.top(); + QTextBlockFormat blockFormat; + if (m_blockQuoteDepth) { + blockFormat.setProperty(QTextFormat::BlockQuoteLevel, m_blockQuoteDepth); + blockFormat.setLeftMargin(BlockQuoteIndent * m_blockQuoteDepth); + blockFormat.setRightMargin(BlockQuoteIndent); + } + if (m_listStack.count()) + blockFormat.setIndent(m_listStack.count()); + if (m_codeBlock) { + blockFormat.setProperty(QTextFormat::BlockCodeLanguage, m_blockCodeLanguage); + charFormat.setFont(m_monoFont); + } else { + blockFormat.setTopMargin(m_paragraphMargin); + blockFormat.setBottomMargin(m_paragraphMargin); + } + m_cursor->insertBlock(blockFormat, charFormat); + m_needsInsertBlock = false; +} + QT_END_NAMESPACE diff --git a/src/gui/text/qtextmarkdownimporter_p.h b/src/gui/text/qtextmarkdownimporter_p.h index 8ab119d051..1716530b1d 100644 --- a/src/gui/text/qtextmarkdownimporter_p.h +++ b/src/gui/text/qtextmarkdownimporter_p.h @@ -100,25 +100,33 @@ public: int cbText(int textType, const char* text, unsigned size); private: + void insertBlock(); + +private: QTextDocument *m_doc = nullptr; QTextCursor *m_cursor = nullptr; QTextTable *m_currentTable = nullptr; // because m_cursor->currentTable() doesn't work QString m_htmlAccumulator; + QString m_blockCodeLanguage; QVector<int> m_nonEmptyTableCells; // in the current row QStack<QTextList *> m_listStack; QStack<QTextCharFormat> m_spanFormatStack; QFont m_monoFont; QPalette m_palette; int m_htmlTagDepth = 0; + int m_blockQuoteDepth = 0; int m_tableColumnCount = 0; int m_tableRowCount = 0; int m_tableCol = -1; // because relative cell movements (e.g. m_cursor->movePosition(QTextCursor::NextCell)) don't work + int m_paragraphMargin = 0; Features m_features; int m_blockType = 0; bool m_emptyList = false; // true when the last thing we did was insertList bool m_listItem = false; bool m_emptyListItem = false; + bool m_codeBlock = false; bool m_imageSpan = false; + bool m_needsInsertBlock = false; }; Q_DECLARE_OPERATORS_FOR_FLAGS(QTextMarkdownImporter::Features) diff --git a/src/gui/text/qtextmarkdownwriter.cpp b/src/gui/text/qtextmarkdownwriter.cpp index a445ee7e83..f180098db2 100644 --- a/src/gui/text/qtextmarkdownwriter.cpp +++ b/src/gui/text/qtextmarkdownwriter.cpp @@ -106,7 +106,7 @@ void QTextMarkdownWriter::writeFrame(const QTextFrame *frame) Q_ASSERT(frame); const QTextTable *table = qobject_cast<const QTextTable*> (frame); QTextFrame::iterator iterator = frame->begin(); - QTextFrame *child = 0; + QTextFrame *child = nullptr; int tableRow = -1; bool lastWasList = false; QVector<int> tableColumnWidths; @@ -161,7 +161,7 @@ void QTextMarkdownWriter::writeFrame(const QTextFrame *frame) m_stream << QString(paddingLen, Space); for (int col = cell.column(); col < spanEndCol; ++col) m_stream << "|"; - } else if (block.textList()) { + } else if (block.textList() || block.blockFormat().hasProperty(QTextFormat::BlockCodeLanguage)) { m_stream << Newline; } else if (endingCol > 0) { m_stream << Newline << Newline; @@ -252,6 +252,8 @@ static void maybeEscapeFirstChar(QString &s) int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ignoreFormat) { int ColumnLimit = 80; + QTextBlockFormat blockFmt = block.blockFormat(); + bool indentedCodeBlock = false; if (block.textList()) { // it's a list-item auto fmt = block.textList()->format(); const int listLevel = fmt.indent(); @@ -281,7 +283,7 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign m_wrappedLineIndent = 4; break; } - switch (block.blockFormat().marker()) { + switch (blockFmt.marker()) { case QTextBlockFormat::Checked: bullet += " [x]"; break; @@ -309,21 +311,35 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign prefix += QLatin1String(bullet) + Space; } m_stream << prefix; - } else if (block.blockFormat().hasProperty(QTextFormat::BlockTrailingHorizontalRulerWidth)) { + } else if (blockFmt.hasProperty(QTextFormat::BlockTrailingHorizontalRulerWidth)) { m_stream << "- - -\n"; // unambiguous horizontal rule, not an underline under a heading return 0; - } else if (!block.blockFormat().indent()) { + } else if (!blockFmt.indent()) { m_wrappedLineIndent = 0; + m_linePrefix.clear(); + if (blockFmt.hasProperty(QTextFormat::BlockQuoteLevel)) { + int level = blockFmt.intProperty(QTextFormat::BlockQuoteLevel); + QString quoteMarker = QStringLiteral("> "); + m_linePrefix.reserve(level * 2); + for (int i = 0; i < level; ++i) + m_linePrefix += quoteMarker; + } + if (blockFmt.hasProperty(QTextFormat::BlockCodeLanguage)) { + // A block quote can contain an indented code block, but not vice-versa. + m_linePrefix += QString(4, Space); + indentedCodeBlock = true; + } } + if (blockFmt.headingLevel()) + m_stream << QByteArray(blockFmt.headingLevel(), '#') << ' '; + else + m_stream << m_linePrefix; - if (block.blockFormat().headingLevel()) - m_stream << QByteArray(block.blockFormat().headingLevel(), '#') << ' '; - - QString wrapIndentString(m_wrappedLineIndent, Space); + QString wrapIndentString = m_linePrefix + QString(m_wrappedLineIndent, Space); // It would be convenient if QTextStream had a lineCharPos() accessor, // to keep track of how many characters (not bytes) have been written on the current line, // but it doesn't. So we have to keep track with this col variable. - int col = m_wrappedLineIndent; + int col = wrapIndentString.length(); bool mono = false; bool startsOrEndsWithBacktick = false; bool bold = false; @@ -338,7 +354,7 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign if (block.textList()) { // <li>first line</br>continuation</li> QString newlineIndent = QString(Newline) + QString(m_wrappedLineIndent, Space); fragmentText.replace(QString(LineBreak), newlineIndent); - } else if (block.blockFormat().indent() > 0) { // <li>first line<p>continuation</p></li> + } else if (blockFmt.indent() > 0) { // <li>first line<p>continuation</p></li> m_stream << QString(m_wrappedLineIndent, Space); } else { fragmentText.replace(LineBreak, Newline); @@ -368,7 +384,7 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign bool monoFrag = fontInfo.fixedPitch(); QString markers; if (!ignoreFormat) { - if (monoFrag != mono) { + if (monoFrag != mono && !indentedCodeBlock) { if (monoFrag) backticks = QString(adjacentBackticksCount(fragmentText) + 1, Backtick); markers += backticks; @@ -376,25 +392,25 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign markers += Space; mono = monoFrag; } - if (!block.blockFormat().headingLevel() && !mono) { - if (fmt.font().bold() != bold) { + if (!blockFmt.headingLevel() && !mono) { + if (fontInfo.bold() != bold) { markers += QLatin1String("**"); - bold = fmt.font().bold(); + bold = fontInfo.bold(); } - if (fmt.font().italic() != italic) { + if (fontInfo.italic() != italic) { markers += QLatin1Char('*'); - italic = fmt.font().italic(); + italic = fontInfo.italic(); } - if (fmt.font().strikeOut() != strikeOut) { + if (fontInfo.strikeOut() != strikeOut) { markers += QLatin1String("~~"); - strikeOut = fmt.font().strikeOut(); + strikeOut = fontInfo.strikeOut(); } - if (fmt.font().underline() != underline) { + if (fontInfo.underline() != underline) { // Markdown doesn't support underline, but the parser will treat a single underline // the same as a single asterisk, and the marked fragment will be rendered in italics. // That will have to do. markers += QLatin1Char('_'); - underline = fmt.font().underline(); + underline = fontInfo.underline(); } } } diff --git a/src/gui/text/qtextmarkdownwriter_p.h b/src/gui/text/qtextmarkdownwriter_p.h index 4c07bad2e7..96ceb445cd 100644 --- a/src/gui/text/qtextmarkdownwriter_p.h +++ b/src/gui/text/qtextmarkdownwriter_p.h @@ -81,6 +81,7 @@ private: QTextStream &m_stream; QTextDocument::MarkdownFeatures m_features; QMap<QTextList *, ListInfo> m_listInfo; + QString m_linePrefix; int m_wrappedLineIndent = 0; int m_lastListIndent = 1; bool m_doubleNewlineWritten = false; |