summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorShawn Rutledge <shawn.rutledge@qt.io>2019-05-20 17:40:12 +0200
committerShawn Rutledge <shawn.rutledge@qt.io>2019-06-04 08:01:34 +0200
commitb3cc9403c4f7814d3e14915fdacee42b3a805073 (patch)
tree903a727b9b78301b9ed7dd5031ac063f3483dc19
parent57f38bc49d43140f3b04e625a47eb1e6de8d2ae3 (diff)
QTextMarkdownWriter: write fenced code blocks with language declaration
MD4C now makes it possible to detect indented and fenced code blocks: https://github.com/mity/md4c/issues/81 Fenced code blocks have the advantages of being easier to write by hand, and having an "info string" following the opening fence, which is commonly used to declare the language. Also, the HTML parser now recognizes tags of the form <pre class="language-foo"> which is one convention for declaring the programming language (as opposed to human language, for which the lang attribute would be used): https://stackoverflow.com/questions/5134242/semantics-standards-and-using-the-lang-attribute-for-source-code-in-markup So it's possible to read HTML and write markdown without losing this information. It's also possible to read markdown with any type of code block: fenced with ``` or ~~~, or indented, and rewrite it the same way. Change-Id: I33c2bf7d7b66c8f3ba5bdd41ab32572f09349c47 Reviewed-by: Gatis Paeglis <gatis.paeglis@qt.io>
-rw-r--r--src/gui/text/qtextformat.cpp3
-rw-r--r--src/gui/text/qtextformat.h1
-rw-r--r--src/gui/text/qtexthtmlparser.cpp4
-rw-r--r--src/gui/text/qtextmarkdownimporter.cpp8
-rw-r--r--src/gui/text/qtextmarkdownimporter_p.h1
-rw-r--r--src/gui/text/qtextmarkdownwriter.cpp68
-rw-r--r--src/gui/text/qtextmarkdownwriter_p.h5
-rw-r--r--tests/auto/gui/text/qtextmarkdownwriter/data/blockquotes.md52
-rw-r--r--tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp3
9 files changed, 119 insertions, 26 deletions
diff --git a/src/gui/text/qtextformat.cpp b/src/gui/text/qtextformat.cpp
index 644dd5558d..090c6cc4ce 100644
--- a/src/gui/text/qtextformat.cpp
+++ b/src/gui/text/qtextformat.cpp
@@ -564,6 +564,9 @@ Q_GUI_EXPORT QDataStream &operator>>(QDataStream &stream, QTextFormat &fmt)
\value BlockTrailingHorizontalRulerWidth The width of a horizontal ruler element.
\value HeadingLevel The level of a heading, for example 1 corresponds to an HTML H1 tag; otherwise 0.
This enum value has been added in Qt 5.12.
+ \value BlockCodeFence The character that was used in the "fences" around a Markdown code block.
+ If the code block was indented rather than fenced, the block should not have this property.
+ This enum value has been added in Qt 5.14.
\value BlockQuoteLevel The depth of nested quoting on this block: 1 means the block is a top-level block quote.
Blocks that are not block quotes should not have this property.
diff --git a/src/gui/text/qtextformat.h b/src/gui/text/qtextformat.h
index 4f534fb65d..a91461dcae 100644
--- a/src/gui/text/qtextformat.h
+++ b/src/gui/text/qtextformat.h
@@ -178,6 +178,7 @@ public:
HeadingLevel = 0x1070,
BlockQuoteLevel = 0x1080,
BlockCodeLanguage = 0x1090,
+ BlockCodeFence = 0x1091,
BlockMarker = 0x10A0,
// character properties
diff --git a/src/gui/text/qtexthtmlparser.cpp b/src/gui/text/qtexthtmlparser.cpp
index 49ee6394ee..642f0893b4 100644
--- a/src/gui/text/qtexthtmlparser.cpp
+++ b/src/gui/text/qtexthtmlparser.cpp
@@ -1635,6 +1635,10 @@ void QTextHtmlParser::applyAttributes(const QStringList &attributes)
else if (key == QLatin1String("type"))
linkType = value;
break;
+ case Html_pre:
+ if (key == QLatin1String("class") && value.startsWith(QLatin1String("language-")))
+ node->blockFormat.setProperty(QTextFormat::BlockCodeLanguage, value.mid(9));
+ break;
default:
break;
}
diff --git a/src/gui/text/qtextmarkdownimporter.cpp b/src/gui/text/qtextmarkdownimporter.cpp
index 223eb01e55..8a9bb3953c 100644
--- a/src/gui/text/qtextmarkdownimporter.cpp
+++ b/src/gui/text/qtextmarkdownimporter.cpp
@@ -165,12 +165,13 @@ int QTextMarkdownImporter::cbEnterBlock(int blockType, void *det)
MD_BLOCK_CODE_DETAIL *detail = static_cast<MD_BLOCK_CODE_DETAIL *>(det);
m_codeBlock = true;
m_blockCodeLanguage = QLatin1String(detail->lang.text, int(detail->lang.size));
+ m_blockCodeFence = detail->fence_char;
QString info = QLatin1String(detail->info.text, int(detail->info.size));
m_needsInsertBlock = true;
if (m_blockQuoteDepth)
- qCDebug(lcMD, "CODE lang '%s' info '%s' inside QUOTE %d", qPrintable(m_blockCodeLanguage), qPrintable(info), m_blockQuoteDepth);
+ qCDebug(lcMD, "CODE lang '%s' info '%s' fenced with '%c' inside QUOTE %d", qPrintable(m_blockCodeLanguage), qPrintable(info), m_blockCodeFence, m_blockQuoteDepth);
else
- qCDebug(lcMD, "CODE lang '%s' info '%s'", qPrintable(m_blockCodeLanguage), qPrintable(info));
+ qCDebug(lcMD, "CODE lang '%s' info '%s' fenced with '%c'", qPrintable(m_blockCodeLanguage), qPrintable(info), m_blockCodeFence);
} break;
case MD_BLOCK_H: {
MD_BLOCK_H_DETAIL *detail = static_cast<MD_BLOCK_H_DETAIL *>(det);
@@ -326,6 +327,7 @@ int QTextMarkdownImporter::cbLeaveBlock(int blockType, void *detail)
case MD_BLOCK_CODE: {
m_codeBlock = false;
m_blockCodeLanguage.clear();
+ m_blockCodeFence = 0;
if (m_blockQuoteDepth)
qCDebug(lcMD, "CODE ended inside QUOTE %d", m_blockQuoteDepth);
else
@@ -540,6 +542,8 @@ void QTextMarkdownImporter::insertBlock()
}
if (m_codeBlock) {
blockFormat.setProperty(QTextFormat::BlockCodeLanguage, m_blockCodeLanguage);
+ if (m_blockCodeFence)
+ blockFormat.setProperty(QTextFormat::BlockCodeFence, QString(QLatin1Char(m_blockCodeFence)));
charFormat.setFont(m_monoFont);
} else {
blockFormat.setTopMargin(m_paragraphMargin);
diff --git a/src/gui/text/qtextmarkdownimporter_p.h b/src/gui/text/qtextmarkdownimporter_p.h
index fdce74483b..1b8c2ca354 100644
--- a/src/gui/text/qtextmarkdownimporter_p.h
+++ b/src/gui/text/qtextmarkdownimporter_p.h
@@ -124,6 +124,7 @@ private:
int m_tableCol = -1; // because relative cell movements (e.g. m_cursor->movePosition(QTextCursor::NextCell)) don't work
int m_paragraphMargin = 0;
int m_blockType = 0;
+ char m_blockCodeFence = 0;
Features m_features;
QTextImageFormat m_imageFormat;
QTextListFormat m_listFormat;
diff --git a/src/gui/text/qtextmarkdownwriter.cpp b/src/gui/text/qtextmarkdownwriter.cpp
index 58e0c86b95..f351c8d20b 100644
--- a/src/gui/text/qtextmarkdownwriter.cpp
+++ b/src/gui/text/qtextmarkdownwriter.cpp
@@ -134,6 +134,24 @@ void QTextMarkdownWriter::writeFrame(const QTextFrame *frame)
writeFrame(iterator.currentFrame());
else { // no frame, it's a block
QTextBlock block = iterator.currentBlock();
+ // Look ahead and detect some cases when we should
+ // suppress needless blank lines, when there will be a big change in block format
+ bool nextIsDifferent = false;
+ bool ending = false;
+ {
+ QTextFrame::iterator next = iterator;
+ ++next;
+ if (next.atEnd()) {
+ nextIsDifferent = true;
+ ending = true;
+ } else {
+ QTextBlockFormat format = iterator.currentBlock().blockFormat();
+ QTextBlockFormat nextFormat = next.currentBlock().blockFormat();
+ if (nextFormat.indent() != format.indent() ||
+ nextFormat.property(QTextFormat::BlockCodeLanguage) != format.property(QTextFormat::BlockCodeLanguage))
+ nextIsDifferent = true;
+ }
+ }
if (table) {
QTextTableCell cell = table->cellAt(block.position());
if (tableRow < cell.row()) {
@@ -150,7 +168,7 @@ void QTextMarkdownWriter::writeFrame(const QTextFrame *frame)
if (lastWasList)
m_stream << Newline;
}
- int endingCol = writeBlock(block, !table, table && tableRow == 0);
+ int endingCol = writeBlock(block, !table, table && tableRow == 0, nextIsDifferent);
m_doubleNewlineWritten = false;
if (table) {
QTextTableCell cell = table->cellAt(block.position());
@@ -162,11 +180,19 @@ void QTextMarkdownWriter::writeFrame(const QTextFrame *frame)
m_stream << QString(paddingLen, Space);
for (int col = cell.column(); col < spanEndCol; ++col)
m_stream << "|";
- } else if (block.textList() || block.blockFormat().hasProperty(QTextFormat::BlockCodeLanguage)) {
+ } else if (m_fencedCodeBlock && ending) {
+ m_stream << m_linePrefix << QString(m_wrappedLineIndent, Space)
+ << m_codeBlockFence << Newline << Newline;
+ m_codeBlockFence.clear();
+ } else if (m_indentedCodeBlock && nextIsDifferent) {
m_stream << Newline;
} else if (endingCol > 0) {
- m_stream << Newline << Newline;
- m_doubleNewlineWritten = true;
+ if (block.textList() || block.blockFormat().hasProperty(QTextFormat::BlockCodeLanguage)) {
+ m_stream << Newline;
+ } else {
+ m_stream << Newline << Newline;
+ m_doubleNewlineWritten = true;
+ }
}
lastWasList = block.textList();
}
@@ -259,11 +285,13 @@ static void maybeEscapeFirstChar(QString &s)
}
}
-int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ignoreFormat)
+int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ignoreFormat, bool ignoreEmpty)
{
+ if (block.text().isEmpty() && ignoreEmpty)
+ return 0;
const int ColumnLimit = 80;
QTextBlockFormat blockFmt = block.blockFormat();
- bool indentedCodeBlock = false;
+ bool missedBlankCodeBlockLine = false;
if (block.textList()) { // it's a list-item
auto fmt = block.textList()->format();
const int listLevel = fmt.indent();
@@ -324,7 +352,28 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign
} else if (blockFmt.hasProperty(QTextFormat::BlockTrailingHorizontalRulerWidth)) {
m_stream << "- - -\n"; // unambiguous horizontal rule, not an underline under a heading
return 0;
+ } else if (blockFmt.hasProperty(QTextFormat::BlockCodeFence) || blockFmt.stringProperty(QTextFormat::BlockCodeLanguage).length() > 0) {
+ // It's important to preserve blank lines in code blocks. But blank lines in code blocks
+ // inside block quotes are getting preserved anyway (along with the "> " prefix).
+ if (!blockFmt.hasProperty(QTextFormat::BlockQuoteLevel))
+ missedBlankCodeBlockLine = true; // only if we don't get any fragments below
+ if (!m_fencedCodeBlock) {
+ QString fenceChar = blockFmt.stringProperty(QTextFormat::BlockCodeFence);
+ if (fenceChar.isEmpty())
+ fenceChar = QLatin1String("`");
+ m_codeBlockFence = QString(3, fenceChar.at(0));
+ // A block quote can contain an indented code block, but not vice-versa.
+ m_stream << m_linePrefix << QString(m_wrappedLineIndent, Space) << m_codeBlockFence
+ << Space << blockFmt.stringProperty(QTextFormat::BlockCodeLanguage) << Newline;
+ m_fencedCodeBlock = true;
+ }
} else if (!blockFmt.indent()) {
+ if (m_fencedCodeBlock) {
+ m_stream << m_linePrefix << QString(m_wrappedLineIndent, Space)
+ << m_codeBlockFence << Newline;
+ m_fencedCodeBlock = false;
+ m_codeBlockFence.clear();
+ }
m_wrappedLineIndent = 0;
m_linePrefix.clear();
if (blockFmt.hasProperty(QTextFormat::BlockQuoteLevel)) {
@@ -337,7 +386,7 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign
if (blockFmt.hasProperty(QTextFormat::BlockCodeLanguage)) {
// A block quote can contain an indented code block, but not vice-versa.
m_linePrefix += QString(4, Space);
- indentedCodeBlock = true;
+ m_indentedCodeBlock = true;
}
}
if (blockFmt.headingLevel())
@@ -358,6 +407,7 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign
bool strikeOut = false;
QString backticks(Backtick);
for (QTextBlock::Iterator frag = block.begin(); !frag.atEnd(); ++frag) {
+ missedBlankCodeBlockLine = false;
QString fragmentText = frag.fragment().text();
while (fragmentText.endsWith(Newline))
fragmentText.chop(1);
@@ -401,7 +451,7 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign
bool monoFrag = fontInfo.fixedPitch();
QString markers;
if (!ignoreFormat) {
- if (monoFrag != mono && !indentedCodeBlock) {
+ if (monoFrag != mono && !m_indentedCodeBlock && !m_fencedCodeBlock) {
if (monoFrag)
backticks = QString(adjacentBackticksCount(fragmentText) + 1, Backtick);
markers += backticks;
@@ -501,6 +551,8 @@ int QTextMarkdownWriter::writeBlock(const QTextBlock &block, bool wrap, bool ign
m_stream << "~~";
col += 2;
}
+ if (missedBlankCodeBlockLine)
+ m_stream << Newline;
return col;
}
diff --git a/src/gui/text/qtextmarkdownwriter_p.h b/src/gui/text/qtextmarkdownwriter_p.h
index 96ceb445cd..90310250ac 100644
--- a/src/gui/text/qtextmarkdownwriter_p.h
+++ b/src/gui/text/qtextmarkdownwriter_p.h
@@ -67,7 +67,7 @@ public:
bool writeAll(const QTextDocument *document);
void writeTable(const QAbstractItemModel *table);
- int writeBlock(const QTextBlock &block, bool table, bool ignoreFormat);
+ int writeBlock(const QTextBlock &block, bool table, bool ignoreFormat, bool ignoreEmpty);
void writeFrame(const QTextFrame *frame);
private:
@@ -82,9 +82,12 @@ private:
QTextDocument::MarkdownFeatures m_features;
QMap<QTextList *, ListInfo> m_listInfo;
QString m_linePrefix;
+ QString m_codeBlockFence;
int m_wrappedLineIndent = 0;
int m_lastListIndent = 1;
bool m_doubleNewlineWritten = false;
+ bool m_indentedCodeBlock = false;
+ bool m_fencedCodeBlock = false;
};
QT_END_NAMESPACE
diff --git a/tests/auto/gui/text/qtextmarkdownwriter/data/blockquotes.md b/tests/auto/gui/text/qtextmarkdownwriter/data/blockquotes.md
index 44c198fdc5..6336d0219f 100644
--- a/tests/auto/gui/text/qtextmarkdownwriter/data/blockquotes.md
+++ b/tests/auto/gui/text/qtextmarkdownwriter/data/blockquotes.md
@@ -20,21 +20,43 @@ MacFarlane writes:
> equivalent sample of Markdown. Here is a sample of AsciiDoc from the AsciiDoc
> manual:
-> 1. List item one.
-> +
-> List item one continued with a second paragraph followed by an
-> Indented block.
-> +
-> .................
-> $ ls *.sh
-> $ mv *.sh ~/tmp
-> .................
-> +
-> List item continued with a third paragraph.
->
-> 2. List item two continued with an open block.
-> ...
->
+> ``` AsciiDoc
+> 1. List item one.
+> +
+> List item one continued with a second paragraph followed by an
+> Indented block.
+> +
+> .................
+> $ ls *.sh
+> $ mv *.sh ~/tmp
+> .................
+> +
+> List item continued with a third paragraph.
+>
+> 2. List item two continued with an open block.
+> ...
+> ```
The quotation includes an embedded quotation and a code quotation and ends with
an ellipsis due to being incomplete.
+Now let's have an indented code block:
+
+ #include <stdio.h>
+
+ int main(void)
+ {
+ printf("# hello markdown\n");
+ return 0;
+ }
+
+and end with a fenced code block:
+~~~ pseudocode
+#include <something.h>
+#include <else.h>
+
+a block {
+ a statement;
+ another statement;
+}
+~~~
+
diff --git a/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp b/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp
index 1935e58dec..8d38cbb18a 100644
--- a/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp
+++ b/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp
@@ -419,6 +419,9 @@ void tst_QTextMarkdownWriter::fromHtml_data()
QTest::newRow("image") <<
"<img src=\"/url\" alt=\"foo\" title=\"title\"/>" <<
"![foo](/url \"title\")\n\n";
+ QTest::newRow("code") <<
+ "<pre class=\"language-pseudocode\">\n#include \"foo.h\"\n\nblock {\n statement();\n}\n\n</pre>" <<
+ "``` pseudocode\n#include \"foo.h\"\n\nblock {\n statement();\n}\n```\n\n";
// TODO
// QTest::newRow("escaped number and paren after double newline") <<
// "<p>(The first sentence of this paragraph is a line, the next paragraph has a number</p>13) but that's not part of an ordered list" <<