aboutsummaryrefslogtreecommitdiffstats
path: root/src/qmlls/qdochtmlparser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/qmlls/qdochtmlparser.cpp')
-rw-r--r--src/qmlls/qdochtmlparser.cpp214
1 files changed, 214 insertions, 0 deletions
diff --git a/src/qmlls/qdochtmlparser.cpp b/src/qmlls/qdochtmlparser.cpp
new file mode 100644
index 0000000000..bba18facb5
--- /dev/null
+++ b/src/qmlls/qdochtmlparser.cpp
@@ -0,0 +1,214 @@
+// Copyright (C) 2024 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
+
+#include <qdochtmlparser_p.h>
+#include <QtCore/qregularexpression.h>
+
+QT_BEGIN_NAMESPACE
+
+using namespace Qt::StringLiterals;
+
+namespace { //anonymous
+
+// An emprical value to avoid too much content
+static constexpr qsizetype firstIndexOfParagraphTag = 400;
+
+// A paragraph can start with <p><i>, or <p><tt>
+// We need smallest value to use QString::indexOf
+static constexpr auto lengthOfSmallestOpeningTag = qsizetype(std::char_traits<char>::length("<p><i>"));
+static constexpr auto lengthOfStartParagraphTag = qsizetype(std::char_traits<char>::length("<p>"));
+static constexpr auto lengthOfEndParagraphTag = qsizetype(std::char_traits<char>::length("</p>"));
+static constexpr auto lengthOfPeriod = qsizetype(std::char_traits<char>::length("."));
+
+QString getContentsByMarks(const QString &html, QString startMark, QString endMark)
+{
+ startMark.prepend("$$$"_L1);
+ endMark.prepend("<!-- @@@"_L1);
+
+ QString contents;
+ qsizetype start = html.indexOf(startMark);
+ if (start != -1) {
+ start = html.indexOf("-->"_L1, start);
+ if (start != -1) {
+ qsizetype end = html.indexOf(endMark, start);
+ if (end != -1) {
+ start += qsizetype(std::char_traits<char>::length("-->"));
+ contents = html.mid(start, end - start);
+ }
+ }
+ }
+ return contents;
+}
+
+
+void stripAllHtml(QString *html)
+{
+ Q_ASSERT(html);
+ html->remove(QRegularExpression("<.*?>"_L1));
+}
+
+/*! \internal
+ \brief Process the string obtained from start mark to end mark.
+ This is duplicated from QtC's Utils::HtmlExtractor, modified on top of it.
+*/
+void processOutput(QString *html)
+{
+ Q_ASSERT(html);
+ if (html->isEmpty())
+ return;
+
+ // Do not write the first paragraph in case it has extra tags below.
+ // <p><i>This is only used on the Maemo platform.</i></p>
+ // or: <p><tt>This is used on Windows only.</tt></p>
+ // or: <p>[Conditional]</p>
+ const auto skipFirstParagraphIfNeeded = [html](qsizetype &index){
+ const bool shouldSkipFirstParagraph = html->indexOf(QLatin1String("<p><i>")) == index ||
+ html->indexOf(QLatin1String("<p><tt>")) == index ||
+ html->indexOf(QLatin1String("<p>[Conditional]</p>")) == index;
+
+ if (shouldSkipFirstParagraph)
+ index = html->indexOf(QLatin1String("<p>"), index + lengthOfSmallestOpeningTag);
+ };
+
+ // Try to get the entire first paragraph, but if one is not found or if its opening
+ // tag is not in the very beginning (using an empirical value as the limit)
+ // the html is cleared out to avoid too much content.
+ qsizetype index = html->indexOf(QLatin1String("<p>"));
+ if (index != -1 && index < firstIndexOfParagraphTag) {
+ skipFirstParagraphIfNeeded(index);
+ index = html->indexOf(QLatin1String("</p>"), index + lengthOfStartParagraphTag);
+ if (index != -1) {
+ // Most paragraphs end with a period, but there are cases without punctuation
+ // and cases like this: <p>This is a description. Example:</p>
+ const auto period = html->lastIndexOf(QLatin1Char('.'), index);
+ if (period != -1) {
+ html->truncate(period + lengthOfPeriod);
+ html->append(QLatin1String("</p>"));
+ } else {
+ html->truncate(index + lengthOfEndParagraphTag);
+ }
+ } else {
+ html->clear();
+ }
+ } else {
+ html->clear();
+ }
+}
+
+}
+
+QDocHtmlExtractor::QDocHtmlExtractor(const QString &code) : m_code{ code }
+{
+}
+
+QString QDocHtmlExtractor::extract(const QDocHtmlExtractor::Element &element, ExtractionMode mode)
+{
+ QString result;
+ switch (element.type) {
+ case ElementType::QmlType:
+ result = parseForQmlType(element.name, mode);
+ break;
+
+ case ElementType::QmlProperty:
+ result = parseForQmlProperty(element.name, mode);
+ break;
+ case ElementType::QmlMethod:
+ case ElementType::QmlSignal:
+ result = parseForQmlMethodOrSignal(element.name, mode);
+ break;
+ default:
+ return {};
+ }
+
+ stripAllHtml(&result);
+
+ // Also remove leading and trailing whitespaces
+ return result.trimmed();
+}
+
+QString QDocHtmlExtractor::parseForQmlType(const QString &element, ExtractionMode mode)
+{
+ QString result;
+ // Get brief description
+ if (mode == QDocHtmlExtractor::ExtractionMode::Simplified) {
+ result = getContentsByMarks(m_code, element + "-brief"_L1 , element);
+ // Remove More...
+ if (!result.isEmpty()) {
+ const auto tailToRemove = "More..."_L1;
+ const auto lastIndex = result.lastIndexOf(tailToRemove);
+ if (lastIndex != -1)
+ result.remove(lastIndex, tailToRemove.length());
+ }
+ } else {
+ result = getContentsByMarks(m_code, element + "-description"_L1, element);
+ // Remove header
+ if (!result.isEmpty()) {
+ const auto headerToRemove = "Detailed Description"_L1;
+ const auto firstIndex = result.indexOf(headerToRemove);
+ if (firstIndex != -1)
+ result.remove(firstIndex, headerToRemove.length());
+ }
+ }
+
+ return result;
+}
+
+QString QDocHtmlExtractor::parseForQmlProperty(const QString &element, ExtractionMode mode)
+{
+ // Qt 5.15 way of finding properties in doc
+ QString startMark = QString::fromLatin1("<a name=\"%1-prop\">").arg(element);
+ qsizetype startIndex = m_code.indexOf(startMark);
+ if (startIndex == -1) {
+ // if not found, try Qt6
+ startMark = QString::fromLatin1(
+ "<td class=\"tblQmlPropNode\"><p>\n<span class=\"name\">%1</span>")
+ .arg(element);
+ startIndex = m_code.indexOf(startMark);
+ if (startIndex == -1)
+ return {};
+ }
+
+ QString contents = m_code.mid(startIndex + startMark.size());
+ startIndex = contents.indexOf(QLatin1String("<div class=\"qmldoc\"><p>"));
+ if (startIndex == -1)
+ return {};
+
+ contents = contents.mid(startIndex);
+ if (mode == ExtractionMode::Simplified)
+ processOutput(&contents);
+ return contents;
+}
+
+QString QDocHtmlExtractor::parseForQmlMethodOrSignal(const QString &functionName, ExtractionMode mode)
+{
+ // the case with <!-- $$$childAt[overload1]$$$childAtrealreal -->
+ QString mark = QString::fromLatin1("$$$%1[overload1]$$$%1").arg(functionName);
+ qsizetype startIndex = m_code.indexOf(mark);
+ if (startIndex != -1) {
+ startIndex = m_code.indexOf("-->"_L1, startIndex + mark.length());
+ if (startIndex == -1)
+ return {};
+ } else {
+ // it could be part of the method list
+ mark = QString::fromLatin1("<span class=\"name\">%1</span>")
+ .arg(functionName);
+ startIndex = m_code.indexOf(mark);
+ if (startIndex != -1)
+ startIndex += mark.length();
+ else
+ return {};
+ }
+
+ startIndex = m_code.indexOf(QLatin1String("<div class=\"qmldoc\"><p>"), startIndex);
+ if (startIndex == -1)
+ return {};
+
+ QString endMark = QString::fromLatin1("<!-- @@@");
+ qsizetype endIndex = m_code.indexOf(endMark, startIndex);
+ QString contents = m_code.mid(startIndex, endIndex);
+ if (mode == ExtractionMode::Simplified)
+ processOutput(&contents);
+ return contents;
+}
+
+QT_END_NAMESPACE