From b3cc9403c4f7814d3e14915fdacee42b3a805073 Mon Sep 17 00:00:00 2001 From: Shawn Rutledge Date: Mon, 20 May 2019 17:40:12 +0200 Subject: QTextMarkdownWriter: write fenced code blocks with language declaration MD4C now makes it possible to detect indented and fenced code blocks: https://github.com/mity/md4c/issues/81 Fenced code blocks have the advantages of being easier to write by hand, and having an "info string" following the opening fence, which is commonly used to declare the language. Also, the HTML parser now recognizes tags of the form
which is one convention for declaring the programming language
(as opposed to human language, for which the lang attribute would be used):
https://stackoverflow.com/questions/5134242/semantics-standards-and-using-the-lang-attribute-for-source-code-in-markup
So it's possible to read HTML and write markdown without losing this information.

It's also possible to read markdown with any type of code block:
fenced with ``` or ~~~, or indented, and rewrite it the same way.

Change-Id: I33c2bf7d7b66c8f3ba5bdd41ab32572f09349c47
Reviewed-by: Gatis Paeglis 
---
 src/gui/text/qtextformat.cpp                       |  3 +
 src/gui/text/qtextformat.h                         |  1 +
 src/gui/text/qtexthtmlparser.cpp                   |  4 ++
 src/gui/text/qtextmarkdownimporter.cpp             |  8 ++-
 src/gui/text/qtextmarkdownimporter_p.h             |  1 +
 src/gui/text/qtextmarkdownwriter.cpp               | 68 +++++++++++++++++++---
 src/gui/text/qtextmarkdownwriter_p.h               |  5 +-
 .../text/qtextmarkdownwriter/data/blockquotes.md   | 52 ++++++++++++-----
 .../tst_qtextmarkdownwriter.cpp                    |  3 +
 9 files changed, 119 insertions(+), 26 deletions(-)

diff --git a/src/gui/text/qtextformat.cpp b/src/gui/text/qtextformat.cpp
index 644dd5558d..090c6cc4ce 100644
--- a/src/gui/text/qtextformat.cpp
+++ b/src/gui/text/qtextformat.cpp
@@ -564,6 +564,9 @@ Q_GUI_EXPORT QDataStream &operator>>(QDataStream &stream, QTextFormat &fmt)
     \value BlockTrailingHorizontalRulerWidth The width of a horizontal ruler element.
     \value HeadingLevel     The level of a heading, for example 1 corresponds to an HTML H1 tag; otherwise 0.
                             This enum value has been added in Qt 5.12.
+    \value BlockCodeFence   The character that was used in the "fences" around a Markdown code block.
+                            If the code block was indented rather than fenced, the block should not have this property.
+                            This enum value has been added in Qt 5.14.
 
     \value BlockQuoteLevel  The depth of nested quoting on this block: 1 means the block is a top-level block quote.
                             Blocks that are not block quotes should not have this property.
diff --git a/src/gui/text/qtextformat.h b/src/gui/text/qtextformat.h
index 4f534fb65d..a91461dcae 100644
--- a/src/gui/text/qtextformat.h
+++ b/src/gui/text/qtextformat.h
@@ -178,6 +178,7 @@ public:
         HeadingLevel = 0x1070,
         BlockQuoteLevel = 0x1080,
         BlockCodeLanguage = 0x1090,
+        BlockCodeFence = 0x1091,
         BlockMarker = 0x10A0,
 
         // character properties
diff --git a/src/gui/text/qtexthtmlparser.cpp b/src/gui/text/qtexthtmlparser.cpp
index 49ee6394ee..642f0893b4 100644
--- a/src/gui/text/qtexthtmlparser.cpp
+++ b/src/gui/text/qtexthtmlparser.cpp
@@ -1635,6 +1635,10 @@ void QTextHtmlParser::applyAttributes(const QStringList &attributes)
                 else if (key == QLatin1String("type"))
                     linkType = value;
                 break;
+            case Html_pre:
+                if (key == QLatin1String("class") && value.startsWith(QLatin1String("language-")))
+                    node->blockFormat.setProperty(QTextFormat::BlockCodeLanguage, value.mid(9));
+                break;
             default:
                 break;
         }
diff --git a/src/gui/text/qtextmarkdownimporter.cpp b/src/gui/text/qtextmarkdownimporter.cpp
index 223eb01e55..8a9bb3953c 100644
--- a/src/gui/text/qtextmarkdownimporter.cpp
+++ b/src/gui/text/qtextmarkdownimporter.cpp
@@ -165,12 +165,13 @@ int QTextMarkdownImporter::cbEnterBlock(int blockType, void *det)
         MD_BLOCK_CODE_DETAIL *detail = static_cast(det);
         m_codeBlock = true;
         m_blockCodeLanguage = QLatin1String(detail->lang.text, int(detail->lang.size));
+        m_blockCodeFence = detail->fence_char;
         QString info = QLatin1String(detail->info.text, int(detail->info.size));
         m_needsInsertBlock = true;
         if (m_blockQuoteDepth)
-            qCDebug(lcMD, "CODE lang '%s' info '%s' inside QUOTE %d", qPrintable(m_blockCodeLanguage), qPrintable(info), m_blockQuoteDepth);
+            qCDebug(lcMD, "CODE lang '%s' info '%s' fenced with '%c' inside QUOTE %d", qPrintable(m_blockCodeLanguage), qPrintable(info), m_blockCodeFence, m_blockQuoteDepth);
         else
-            qCDebug(lcMD, "CODE lang '%s' info '%s'", qPrintable(m_blockCodeLanguage), qPrintable(info));
+            qCDebug(lcMD, "CODE lang '%s' info '%s' fenced with '%c'", qPrintable(m_blockCodeLanguage), qPrintable(info), m_blockCodeFence);
     } break;
     case MD_BLOCK_H: {
         MD_BLOCK_H_DETAIL *detail = static_cast(det);
@@ -326,6 +327,7 @@ int QTextMarkdownImporter::cbLeaveBlock(int blockType, void *detail)
     case MD_BLOCK_CODE: {
         m_codeBlock = false;
         m_blockCodeLanguage.clear();
+        m_blockCodeFence = 0;
         if (m_blockQuoteDepth)
             qCDebug(lcMD, "CODE ended inside QUOTE %d", m_blockQuoteDepth);
         else
@@ -540,6 +542,8 @@ void QTextMarkdownImporter::insertBlock()
     }
     if (m_codeBlock) {
         blockFormat.setProperty(QTextFormat::BlockCodeLanguage, m_blockCodeLanguage);
+        if (m_blockCodeFence)
+            blockFormat.setProperty(QTextFormat::BlockCodeFence, QString(QLatin1Char(m_blockCodeFence)));
         charFormat.setFont(m_monoFont);
     } else {
         blockFormat.setTopMargin(m_paragraphMargin);
diff --git a/src/gui/text/qtextmarkdownimporter_p.h b/src/gui/text/qtextmarkdownimporter_p.h
index fdce74483b..1b8c2ca354 100644
--- a/src/gui/text/qtextmarkdownimporter_p.h
+++ b/src/gui/text/qtextmarkdownimporter_p.h
@@ -124,6 +124,7 @@ private:
     int m_tableCol = -1; // because relative cell movements (e.g. m_cursor->movePosition(QTextCursor::NextCell)) don't work
     int m_paragraphMargin = 0;
     int m_blockType = 0;
+    char m_blockCodeFence = 0;
     Features m_features;
     QTextImageFormat m_imageFormat;
     QTextListFormat m_listFormat;
diff --git a/src/gui/text/qtextmarkdownwriter.cpp b/src/gui/text/qtextmarkdownwriter.cpp
index 58e0c86b95..f351c8d20b 100644
--- a/src/gui/text/qtextmarkdownwriter.cpp
+++ b/src/gui/text/qtextmarkdownwriter.cpp
@@ -134,6 +134,24 @@ void QTextMarkdownWriter::writeFrame(const QTextFrame *frame)
             writeFrame(iterator.currentFrame());
         else { // no frame, it's a block
             QTextBlock block = iterator.currentBlock();
+            // Look ahead and detect some cases when we should
+            // suppress needless blank lines, when there will be a big change in block format
+            bool nextIsDifferent = false;
+            bool ending = false;
+            {
+                QTextFrame::iterator next = iterator;
+                ++next;
+                if (next.atEnd()) {
+                    nextIsDifferent = true;
+                    ending = true;
+                } else {
+                    QTextBlockFormat format = iterator.currentBlock().blockFormat();
+                    QTextBlockFormat nextFormat = next.currentBlock().blockFormat();
+                    if (nextFormat.indent() != format.indent() ||
+                            nextFormat.property(QTextFormat::BlockCodeLanguage) != format.property(QTextFormat::BlockCodeLanguage))
+                        nextIsDifferent = true;
+                }
+            }
             if (table) {
                 QTextTableCell cell = table->cellAt(block.position());
                 if (tableRow < cell.row()) {
@@ -150,7 +168,7 @@ void QTextMarkdownWriter::writeFrame(const QTextFrame *frame)
                 if (lastWasList)
                     m_stream << Newline;
             }
-            int endingCol = writeBlock(block, !table, table && tableRow == 0);
+            int endingCol = writeBlock(block, !table, table && tableRow == 0, nextIsDifferent);
             m_doubleNewlineWritten = false;
             if (table) {
                 QTextTableCell cell = table->cellAt(block.position());
@@ -162,11 +180,19 @@ void QTextMarkdownWriter::writeFrame(const QTextFrame *frame)
                     m_stream << QString(paddingLen, Space);
                 for (int col = cell.column(); col < spanEndCol; ++col)
                     m_stream << "|";
-            } else if (block.textList() || block.blockFormat().hasProperty(QTextFormat::BlockCodeLanguage)) {
+            } else if (m_fencedCodeBlock && ending) {
+                m_stream << m_linePrefix << QString(m_wrappedLineIndent, Space)
+                         << m_codeBlockFence << Newline << Newline;
+                m_codeBlockFence.clear();
+            } else if (m_indentedCodeBlock && nextIsDifferent) {
                 m_stream << Newline;
             } else if (endingCol > 0) {
-                m_stream << Newline << Newline;
-                m_doubleNewlineWritten = true;
+                if (block.textList() || block.blockFormat().hasProperty(QTextFormat::BlockCodeLanguage)) {
+                    m_stream << Newline;
+                } else {
+                    m_stream << Newline << Newline;
+                    m_doubleNewlineWritten = true;
+                }
             }
             lastWasList = block.textList();
         }
@@ -259,11 +285,13 @@ static void maybeEscapeFirstChar(QString &s)
     }
 }
 
-int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ignoreFormat)
+int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ignoreFormat, bool ignoreEmpty)
 {
+    if (block.text().isEmpty() && ignoreEmpty)
+        return 0;
     const int ColumnLimit = 80;
     QTextBlockFormat blockFmt = block.blockFormat();
-    bool indentedCodeBlock = false;
+    bool missedBlankCodeBlockLine = false;
     if (block.textList()) { // it's a list-item
         auto fmt = block.textList()->format();
         const int listLevel = fmt.indent();
@@ -324,7 +352,28 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign
     } else if (blockFmt.hasProperty(QTextFormat::BlockTrailingHorizontalRulerWidth)) {
         m_stream << "- - -\n"; // unambiguous horizontal rule, not an underline under a heading
         return 0;
+    } else if (blockFmt.hasProperty(QTextFormat::BlockCodeFence) || blockFmt.stringProperty(QTextFormat::BlockCodeLanguage).length() > 0) {
+        // It's important to preserve blank lines in code blocks.  But blank lines in code blocks
+        // inside block quotes are getting preserved anyway (along with the "> " prefix).
+        if (!blockFmt.hasProperty(QTextFormat::BlockQuoteLevel))
+            missedBlankCodeBlockLine = true; // only if we don't get any fragments below
+        if (!m_fencedCodeBlock) {
+            QString fenceChar = blockFmt.stringProperty(QTextFormat::BlockCodeFence);
+            if (fenceChar.isEmpty())
+                fenceChar = QLatin1String("`");
+            m_codeBlockFence = QString(3, fenceChar.at(0));
+            // A block quote can contain an indented code block, but not vice-versa.
+            m_stream << m_linePrefix << QString(m_wrappedLineIndent, Space) << m_codeBlockFence
+                     << Space << blockFmt.stringProperty(QTextFormat::BlockCodeLanguage) << Newline;
+            m_fencedCodeBlock = true;
+        }
     } else if (!blockFmt.indent()) {
+        if (m_fencedCodeBlock) {
+            m_stream << m_linePrefix << QString(m_wrappedLineIndent, Space)
+                     << m_codeBlockFence << Newline;
+            m_fencedCodeBlock = false;
+            m_codeBlockFence.clear();
+        }
         m_wrappedLineIndent = 0;
         m_linePrefix.clear();
         if (blockFmt.hasProperty(QTextFormat::BlockQuoteLevel)) {
@@ -337,7 +386,7 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign
         if (blockFmt.hasProperty(QTextFormat::BlockCodeLanguage)) {
             // A block quote can contain an indented code block, but not vice-versa.
             m_linePrefix += QString(4, Space);
-            indentedCodeBlock = true;
+            m_indentedCodeBlock = true;
         }
     }
     if (blockFmt.headingLevel())
@@ -358,6 +407,7 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign
     bool strikeOut = false;
     QString backticks(Backtick);
     for (QTextBlock::Iterator frag = block.begin(); !frag.atEnd(); ++frag) {
+        missedBlankCodeBlockLine = false;
         QString fragmentText = frag.fragment().text();
         while (fragmentText.endsWith(Newline))
             fragmentText.chop(1);
@@ -401,7 +451,7 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign
             bool monoFrag = fontInfo.fixedPitch();
             QString markers;
             if (!ignoreFormat) {
-                if (monoFrag != mono && !indentedCodeBlock) {
+                if (monoFrag != mono && !m_indentedCodeBlock && !m_fencedCodeBlock) {
                     if (monoFrag)
                         backticks = QString(adjacentBackticksCount(fragmentText) + 1, Backtick);
                     markers += backticks;
@@ -501,6 +551,8 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign
         m_stream << "~~";
         col += 2;
     }
+    if (missedBlankCodeBlockLine)
+        m_stream << Newline;
     return col;
 }
 
diff --git a/src/gui/text/qtextmarkdownwriter_p.h b/src/gui/text/qtextmarkdownwriter_p.h
index 96ceb445cd..90310250ac 100644
--- a/src/gui/text/qtextmarkdownwriter_p.h
+++ b/src/gui/text/qtextmarkdownwriter_p.h
@@ -67,7 +67,7 @@ public:
     bool writeAll(const QTextDocument *document);
     void writeTable(const QAbstractItemModel *table);
 
-    int writeBlock(const QTextBlock &block, bool table, bool ignoreFormat);
+    int writeBlock(const QTextBlock &block, bool table, bool ignoreFormat, bool ignoreEmpty);
     void writeFrame(const QTextFrame *frame);
 
 private:
@@ -82,9 +82,12 @@ private:
     QTextDocument::MarkdownFeatures m_features;
     QMap m_listInfo;
     QString m_linePrefix;
+    QString m_codeBlockFence;
     int m_wrappedLineIndent = 0;
     int m_lastListIndent = 1;
     bool m_doubleNewlineWritten = false;
+    bool m_indentedCodeBlock = false;
+    bool m_fencedCodeBlock = false;
 };
 
 QT_END_NAMESPACE
diff --git a/tests/auto/gui/text/qtextmarkdownwriter/data/blockquotes.md b/tests/auto/gui/text/qtextmarkdownwriter/data/blockquotes.md
index 44c198fdc5..6336d0219f 100644
--- a/tests/auto/gui/text/qtextmarkdownwriter/data/blockquotes.md
+++ b/tests/auto/gui/text/qtextmarkdownwriter/data/blockquotes.md
@@ -20,21 +20,43 @@ MacFarlane writes:
 > equivalent sample of Markdown. Here is a sample of AsciiDoc from the AsciiDoc
 > manual:
 
->     1. List item one.
->     +
->     List item one continued with a second paragraph followed by an
->     Indented block.
->     +
->     .................
->     $ ls *.sh
->     $ mv *.sh ~/tmp
->     .................
->     +
->     List item continued with a third paragraph.
->     
->     2. List item two continued with an open block.
->     ...
->     
+> ``` AsciiDoc
+> 1. List item one.
+> +
+> List item one continued with a second paragraph followed by an
+> Indented block.
+> +
+> .................
+> $ ls *.sh
+> $ mv *.sh ~/tmp
+> .................
+> +
+> List item continued with a third paragraph.
+> 
+> 2. List item two continued with an open block.
+> ...
+> ```
 The quotation includes an embedded quotation and a code quotation and ends with
 an ellipsis due to being incomplete.
 
+Now let's have an indented code block:
+
+    #include 
+    
+    int main(void)
+    {
+        printf("# hello markdown\n");
+        return 0;
+    }
+
+and end with a fenced code block:
+~~~ pseudocode
+#include 
+#include 
+
+a block {
+    a statement;
+    another statement;
+}
+~~~
+
diff --git a/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp b/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp
index 1935e58dec..8d38cbb18a 100644
--- a/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp
+++ b/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp
@@ -419,6 +419,9 @@ void tst_QTextMarkdownWriter::fromHtml_data()
     QTest::newRow("image") <<
         "\"foo\"" <<
         "![foo](/url \"title\")\n\n";
+    QTest::newRow("code") <<
+        "
\n#include \"foo.h\"\n\nblock {\n    statement();\n}\n\n
" << + "``` pseudocode\n#include \"foo.h\"\n\nblock {\n statement();\n}\n```\n\n"; // TODO // QTest::newRow("escaped number and paren after double newline") << // "

(The first sentence of this paragraph is a line, the next paragraph has a number

13) but that's not part of an ordered list" << -- cgit v1.2.3