aboutsummaryrefslogtreecommitdiffstats
path: root/plugins/haskell
diff options
context:
space:
mode:
authorEike Ziller <git@eikeziller.de>2017-04-29 16:17:11 +0200
committerEike Ziller <git@eikeziller.de>2017-10-01 20:11:08 +0200
commit5798e33d742c0f413d2d865fdb75739b4374ce98 (patch)
treee7d36edf5de22ab74ed4b56e2e2b22be24f50ef6 /plugins/haskell
parent2f69373309cfe88084c5777baeff6bb46eecd071 (diff)
Add highlighter
See lexical structure of Haskell https://www.haskell.org/onlinereport/haskell2010/haskellch2.html
Diffstat (limited to 'plugins/haskell')
-rw-r--r--plugins/haskell/haskell.pro8
-rw-r--r--plugins/haskell/haskelleditorfactory.cpp2
-rw-r--r--plugins/haskell/haskellhighlighter.cpp152
-rw-r--r--plugins/haskell/haskellhighlighter.h58
-rw-r--r--plugins/haskell/haskelltokenizer.cpp631
-rw-r--r--plugins/haskell/haskelltokenizer.h91
6 files changed, 940 insertions, 2 deletions
diff --git a/plugins/haskell/haskell.pro b/plugins/haskell/haskell.pro
index 9c557b1..aa92d3c 100644
--- a/plugins/haskell/haskell.pro
+++ b/plugins/haskell/haskell.pro
@@ -5,14 +5,18 @@ DEFINES += HASKELL_LIBRARY
SOURCES += \
haskellcompletionassist.cpp \
haskelleditorfactory.cpp \
- haskellplugin.cpp
+ haskellplugin.cpp \
+ haskellhighlighter.cpp \
+ haskelltokenizer.cpp
HEADERS += \
haskell_global.h \
haskellcompletionassist.h \
haskellconstants.h \
haskelleditorfactory.h \
- haskellplugin.h
+ haskellplugin.h \
+ haskellhighlighter.h \
+ haskelltokenizer.h
## uncomment to build plugin into user config directory
## <localappdata>/plugins/<ideversion>
diff --git a/plugins/haskell/haskelleditorfactory.cpp b/plugins/haskell/haskelleditorfactory.cpp
index 220e52e..8119105 100644
--- a/plugins/haskell/haskelleditorfactory.cpp
+++ b/plugins/haskell/haskelleditorfactory.cpp
@@ -27,6 +27,7 @@
#include "haskellcompletionassist.h"
#include "haskellconstants.h"
+#include "haskellhighlighter.h"
#include <texteditor/textdocument.h>
#include <texteditor/texteditoractionhandler.h>
@@ -47,6 +48,7 @@ HaskellEditorFactory::HaskellEditorFactory()
setParenthesesMatchingEnabled(true);
setMarksVisible(true);
setCompletionAssistProvider(new HaskellCompletionAssistProvider);
+ setSyntaxHighlighterCreator([] { return new HaskellHighlighter(); });
}
} // Internal
diff --git a/plugins/haskell/haskellhighlighter.cpp b/plugins/haskell/haskellhighlighter.cpp
new file mode 100644
index 0000000..9899cc4
--- /dev/null
+++ b/plugins/haskell/haskellhighlighter.cpp
@@ -0,0 +1,152 @@
+/****************************************************************************
+**
+** Copyright (C) 2017 The Qt Company Ltd.
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of Qt Creator.
+**
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3 as published by the Free Software
+** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-3.0.html.
+**
+****************************************************************************/
+
+#include "haskellhighlighter.h"
+
+#include "haskelltokenizer.h"
+
+#include <texteditor/fontsettings.h>
+#include <texteditor/texteditorconstants.h>
+#include <texteditor/texteditorsettings.h>
+
+#include <QDebug>
+#include <QVector>
+
+Q_GLOBAL_STATIC_WITH_ARGS(QSet<QString>, IMPORT_HIGHLIGHTS, ({
+ "qualified",
+ "as",
+ "hiding"
+}));
+
+using namespace TextEditor;
+
+namespace Haskell {
+namespace Internal {
+
+HaskellHighlighter::HaskellHighlighter()
+{
+ setDefaultTextFormatCategories();
+ updateFormats(TextEditorSettings::fontSettings());
+}
+
+void HaskellHighlighter::highlightBlock(const QString &text)
+{
+ const Tokens tokens = HaskellTokenizer::tokenize(text, previousBlockState());
+ setCurrentBlockState(tokens.state);
+ const Token *firstNonWS = 0;
+ const Token *secondNonWS = 0;
+ bool inType = false;
+ bool inImport = false;
+ for (const Token & token : tokens) {
+ switch (token.type) {
+ case TokenType::Variable:
+ if (inType)
+ setTokenFormat(token, C_LOCAL);
+ else if (inImport && IMPORT_HIGHLIGHTS->contains(token.text.toString()))
+ setTokenFormat(token, C_KEYWORD);
+// else
+// setTokenFormat(token, C_TEXT);
+ break;
+ case TokenType::Constructor:
+ case TokenType::OperatorConstructor:
+ setTokenFormat(token, C_TYPE);
+ break;
+ case TokenType::Operator:
+ setTokenFormat(token, C_OPERATOR);
+ break;
+ case TokenType::Whitespace:
+ setTokenFormat(token, C_VISUAL_WHITESPACE);
+ break;
+ case TokenType::Keyword:
+ if (token.text == "::" && firstNonWS && !secondNonWS) { // toplevel declaration
+ setFormat(firstNonWS->startCol, firstNonWS->length, m_toplevelDeclFormat);
+ inType = true;
+ } else if (token.text == "import") {
+ inImport = true;
+ }
+ setTokenFormat(token, C_KEYWORD);
+ break;
+ case TokenType::Integer:
+ case TokenType::Float:
+ setTokenFormat(token, C_NUMBER);
+ break;
+ case TokenType::String:
+ setTokenFormatWithSpaces(text, token, C_STRING);
+ break;
+ case TokenType::Char:
+ setTokenFormatWithSpaces(text, token, C_STRING);
+ break;
+ case TokenType::EscapeSequence:
+ setTokenFormat(token, C_PRIMITIVE_TYPE);
+ break;
+ case TokenType::SingleLineComment:
+ setTokenFormatWithSpaces(text, token, C_COMMENT);
+ break;
+ case TokenType::MultiLineComment:
+ setTokenFormatWithSpaces(text, token, C_COMMENT);
+ break;
+ case TokenType::Special:
+// setTokenFormat(token, C_TEXT);
+ break;
+ case TokenType::StringError:
+ case TokenType::CharError:
+ case TokenType::Unknown:
+ setTokenFormat(token, C_PARENTHESES_MISMATCH);
+ break;
+ }
+ if (token.type != TokenType::Whitespace) {
+ if (!firstNonWS)
+ firstNonWS = &token;
+ else if (!secondNonWS)
+ secondNonWS = &token;
+ }
+ }
+}
+
+void HaskellHighlighter::setFontSettings(const FontSettings &fontSettings)
+{
+ SyntaxHighlighter::setFontSettings(fontSettings);
+ updateFormats(fontSettings);
+}
+
+void HaskellHighlighter::updateFormats(const FontSettings &fontSettings)
+{
+ m_toplevelDeclFormat = fontSettings.toTextCharFormat(
+ TextStyles::mixinStyle(C_FUNCTION, C_DECLARATION));
+}
+
+void HaskellHighlighter::setTokenFormat(const Token &token, TextStyle style)
+{
+ setFormat(token.startCol, token.length, formatForCategory(style));
+}
+
+void HaskellHighlighter::setTokenFormatWithSpaces(const QString &text, const Token &token,
+ TextStyle style)
+{
+ setFormatWithSpaces(text, token.startCol, token.length, formatForCategory(style));
+}
+
+} // Internal
+} // Haskell
diff --git a/plugins/haskell/haskellhighlighter.h b/plugins/haskell/haskellhighlighter.h
new file mode 100644
index 0000000..6213333
--- /dev/null
+++ b/plugins/haskell/haskellhighlighter.h
@@ -0,0 +1,58 @@
+/****************************************************************************
+**
+** Copyright (C) 2017 The Qt Company Ltd.
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of Qt Creator.
+**
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3 as published by the Free Software
+** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-3.0.html.
+**
+****************************************************************************/
+
+#pragma once
+
+#include <texteditor/syntaxhighlighter.h>
+
+#include <QHash>
+#include <QTextFormat>
+
+namespace Haskell {
+namespace Internal {
+
+class Token;
+
+class HaskellHighlighter : public TextEditor::SyntaxHighlighter
+{
+ Q_OBJECT
+
+public:
+ HaskellHighlighter();
+
+protected:
+ void highlightBlock(const QString &text) override;
+
+private:
+ void setFontSettings(const TextEditor::FontSettings &fontSettings) override;
+ void updateFormats(const TextEditor::FontSettings &fontSettings);
+ void setTokenFormat(const Token &token, TextEditor::TextStyle style);
+ void setTokenFormatWithSpaces(const QString &text, const Token &token,
+ TextEditor::TextStyle style);
+ QTextCharFormat m_toplevelDeclFormat;
+};
+
+} // Internal
+} // Haskell
diff --git a/plugins/haskell/haskelltokenizer.cpp b/plugins/haskell/haskelltokenizer.cpp
new file mode 100644
index 0000000..527e505
--- /dev/null
+++ b/plugins/haskell/haskelltokenizer.cpp
@@ -0,0 +1,631 @@
+/****************************************************************************
+**
+** Copyright (C) 2017 The Qt Company Ltd.
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of Qt Creator.
+**
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3 as published by the Free Software
+** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-3.0.html.
+**
+****************************************************************************/
+
+#include "haskelltokenizer.h"
+
+#include <QSet>
+
+#include <algorithm>
+#include <functional>
+
+Q_GLOBAL_STATIC_WITH_ARGS(QSet<QString>, RESERVED_OP, ({
+ "..",
+ ":",
+ "::",
+ "=",
+ "\\",
+ "|",
+ "<-",
+ "->",
+ "@",
+ "~",
+ "=>",
+
+ // Arrows GHC extension
+ "-<",
+ "-<<",
+ ">-",
+ ">>-",
+ "(|",
+ "|)"
+}));
+
+Q_GLOBAL_STATIC_WITH_ARGS(QSet<QString>, RESERVED_ID, ({
+ "case",
+ "class",
+ "data",
+ "default",
+ "deriving",
+ "do",
+ "else",
+ "foreign",
+ "if",
+ "import",
+ "in",
+ "infix",
+ "infixl",
+ "infixr",
+ "instance",
+ "let",
+ "module",
+ "newtype",
+ "of",
+ "then",
+ "type",
+ "where",
+ "_",
+
+ // from GHC extensions
+ "family",
+ "forall",
+ "mdo",
+ "proc",
+ "rec"
+}));
+
+Q_GLOBAL_STATIC_WITH_ARGS(QSet<QChar>, SPECIAL, ({
+ '(',
+ ')',
+ ',',
+ ';',
+ '[',
+ ']',
+ '`',
+ '{',
+ '}',
+}));
+
+Q_GLOBAL_STATIC_WITH_ARGS(QSet<QChar>, CHAR_ESCAPES,
+ ({'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"', '\'', '&'}));
+
+Q_GLOBAL_STATIC_WITH_ARGS(QVector<QString>, ASCII_ESCAPES, ({
+ "NUL",
+ "SOH", // must be before "SO" to match
+ "STX",
+ "ETX",
+ "EOT",
+ "ENQ",
+ "ACK",
+ "BEL",
+ "BS",
+ "HT",
+ "LF",
+ "VT",
+ "FF",
+ "CR",
+ "SO",
+ "SI",
+ "DLE",
+ "DC1",
+ "DC2",
+ "DC3",
+ "DC4",
+ "NAK",
+ "SYN",
+ "ETB",
+ "CAN",
+ "EM",
+ "SUB",
+ "ESC",
+ "FS",
+ "GS",
+ "RS",
+ "US",
+ "SP",
+ "DEL"
+}));
+
+namespace Haskell {
+namespace Internal {
+
+Token token(TokenType type, std::shared_ptr<QString> line, int start, int end)
+{
+ return {type, start, end - start, line->midRef(start, end - start), line};
+}
+
+Tokens::Tokens(std::shared_ptr<QString> source)
+ : source(source)
+{
+}
+
+static int grab(const QString &line, int begin,
+ const std::function<bool(const QChar&)> &test)
+{
+ const int length = line.length();
+ int current = begin;
+ while (current < length && test(line.at(current)))
+ ++current;
+ return current - begin;
+};
+
+
+static bool isIdentifierChar(const QChar &c)
+{
+ return c.isLetterOrNumber() || c == '\'' || c == '_';
+}
+
+static bool isVariableIdentifierStart(const QChar &c)
+{
+ return c == '_' || c.isLower();
+}
+
+static bool isAscSymbol(const QChar &c)
+{
+ return c == '!'
+ || c == '#'
+ || c == '$'
+ || c == '%'
+ || c == '&'
+ || c == '*'
+ || c == '+'
+ || c == '.'
+ || c == '/'
+ || c == '<'
+ || c == '='
+ || c == '>'
+ || c == '?'
+ || c == '@'
+ || c == '\\'
+ || c == '^'
+ || c == '|'
+ || c == '-'
+ || c == '~'
+ || c == ':';
+}
+
+static bool isSymbol(const QChar &c)
+{
+ return isAscSymbol(c)
+ || ((c.isSymbol() || c.isPunct()) && c != '_' && c != '"' && c != '\''
+ && !SPECIAL->contains(c));
+}
+
+static bool isDigit(const QChar &c)
+{
+ return c.isDigit();
+}
+
+static bool isOctit(const QChar &c)
+{
+ return c >= '0' && c <= '7';
+}
+
+static bool isHexit(const QChar &c)
+{
+ return c.isDigit()
+ || (c >= 'A' && c <= 'F')
+ || (c >= 'a' && c <= 'f');
+}
+
+static bool isCntrl(const QChar &c)
+{
+ return (c >= 'A' && c <= 'Z')
+ || c == '@'
+ || c == '['
+ || c == '\\'
+ || c == ']'
+ || c == '^'
+ || c == '_';
+}
+
+static QVector<Token> getSpace(std::shared_ptr<QString> line, int start)
+{
+ const auto lineEnd = line->cend();
+ const auto tokenStart = line->cbegin() + start;
+ auto current = tokenStart;
+ while (current != lineEnd && (*current).isSpace())
+ ++current;
+ const int length = int(std::distance(tokenStart, current));
+ if (current > tokenStart)
+ return {{TokenType::Whitespace, start, length, line->midRef(start, length), line}};
+ return {};
+}
+
+static QVector<Token> getNumber(std::shared_ptr<QString> line, int start)
+{
+ const QChar &startC = line->at(start);
+ if (!startC.isDigit())
+ return {};
+ const int length = line->length();
+ int current = start + 1;
+ TokenType type = TokenType::Integer;
+ if (current < length) {
+ if (startC == '0') {
+ // check for octal or hexadecimal
+ const QChar &secondC = line->at(current);
+ if (secondC == 'o' || secondC == 'O') {
+ const int numLen = grab(*line, current + 1, isOctit);
+ if (numLen > 0)
+ return {token(TokenType::Integer, line, start, current + numLen + 1)};
+ } else if (secondC == 'x' || secondC == 'X') {
+ const int numLen = grab(*line, current + 1, isHexit);
+ if (numLen > 0)
+ return {token(TokenType::Integer, line, start, current + numLen + 1)};
+ }
+ }
+ // starts with decimal
+ const int numLen = grab(*line, start, isDigit);
+ current = start + numLen;
+ // check for floating point
+ if (current < length && line->at(current) == '.') {
+ const int numLen = grab(*line, current + 1, isDigit);
+ if (numLen > 0) {
+ current += numLen + 1;
+ type = TokenType::Float;
+ }
+ }
+ // check for exponent
+ if (current + 1 < length /*for at least 'e' and digit*/
+ && (line->at(current) == 'e' || line->at(current) == 'E')) {
+ int expEnd = current + 1;
+ if (line->at(expEnd) == '+' || line->at(expEnd) == '-')
+ ++expEnd;
+ const int numLen = grab(*line, expEnd, isDigit);
+ if (numLen > 0) {
+ current = expEnd + numLen;
+ type = TokenType::Float;
+ }
+ }
+ }
+ return {token(type, line, start, current)};
+}
+
+static QVector<Token> getIdOrOpOrSingleLineComment(std::shared_ptr<QString> line, int start)
+{
+ const int length = line->length();
+ if (start >= length)
+ return {};
+ int current = start;
+ // check for {conid.}conid
+ int conidEnd = start;
+ bool canOnlyBeConstructor = false;
+ while (current < length && line->at(current).isUpper()) {
+ current += grab(*line, current, isIdentifierChar);
+ conidEnd = current;
+ // it is definitely a constructor id if it is not followed by a '.'
+ canOnlyBeConstructor = current >= length || line->at(current) != '.';
+ // otherwise it might be a module id, and we skip the dot to check for qualified thing
+ if (!canOnlyBeConstructor)
+ ++current;
+ }
+ if (canOnlyBeConstructor)
+ return {token(TokenType::Constructor, line, start, conidEnd)};
+
+ // check for variable or reserved id
+ if (current < length && isVariableIdentifierStart(line->at(current))) {
+ const int varLen = grab(*line, current, isIdentifierChar);
+ // check for reserved id
+ if (RESERVED_ID->contains(line->mid(current, varLen))) {
+ QVector<Token> result;
+ // possibly add constructor + op '.'
+ if (conidEnd > start) {
+ result.append(token(TokenType::Constructor, line, start, conidEnd));
+ result.append(token(TokenType::Operator, line, conidEnd, current));
+ }
+ result.append(token(TokenType::Keyword, line, current, current + varLen));
+ return result;
+ }
+ return {token(TokenType::Variable, line, start, current + varLen)};
+ }
+ // check for operator
+ if (current < length && isSymbol(line->at(current))) {
+ const int opLen = grab(*line, current, isSymbol);
+ // check for reserved op
+ if (RESERVED_OP->contains(line->mid(current, opLen))) {
+ // because of the case of F... (constructor + op '...') etc
+ // we only add conid if we have one, handling the rest in next iteration
+ if (conidEnd > start)
+ return {token(TokenType::Constructor, line, start, conidEnd)};
+ return {token(TokenType::Keyword, line, start, current + opLen)};
+ }
+ // check for single line comment
+ if (opLen >= 2 && std::all_of(line->begin() + current, line->begin() + current + opLen,
+ [](const QChar c) { return c == '-'; })) {
+ QVector<Token> result;
+ // possibly add constructor + op '.'
+ if (conidEnd > start) {
+ result.append(token(TokenType::Constructor, line, start, conidEnd));
+ result.append(token(TokenType::Operator, line, conidEnd, current));
+ }
+ // rest is comment
+ result.append(token(TokenType::SingleLineComment, line, current, length));
+ return result;
+ }
+ // check for (qualified?) operator constructor
+ if (line->at(current) == ':')
+ return {token(TokenType::OperatorConstructor, line, start, current + opLen)};
+ return {token(TokenType::Operator, line, start, current + opLen)};
+ }
+ // Foo.Blah.
+ if (conidEnd > start)
+ return {token(TokenType::Constructor, line, start, conidEnd)};
+ return {};
+}
+
+static int getEscape(const QString &line, int start)
+{
+ if (CHAR_ESCAPES->contains(line.at(start)))
+ return 1;
+
+ // decimal
+ if (line.at(start).isDigit())
+ return grab(line, start + 1, isDigit) + 1;
+ // octal
+ if (line.at(start) == 'o') {
+ const int count = grab(line, start + 1, isOctit);
+ if (count < 1) // no octal number after 'o'
+ return 0;
+ return count + 1;
+ }
+ // hexadecimal
+ if (line.at(start) == 'x') {
+ const int count = grab(line, start + 1, isHexit);
+ if (count < 1) // no octal number after 'o'
+ return 0;
+ return count + 1;
+ }
+ // ascii cntrl
+ if (line.at(start) == '^') {
+ const int count = grab(line, start + 1, isCntrl);
+ if (count < 1) // no octal number after 'o'
+ return 0;
+ return count + 1;
+ }
+ const QStringRef s = line.midRef(start);
+ for (const QString &esc : *ASCII_ESCAPES) {
+ if (s.startsWith(esc))
+ return esc.length();
+ }
+ return 0;
+}
+
+static QVector<Token> getString(std::shared_ptr<QString> line, int start, bool *inStringGap/*in-out*/)
+{
+ // Haskell has the specialty of using \<whitespace>\ within strings for multiline strings
+ const int length = line->length();
+ if (start >= length)
+ return {};
+ QVector<Token> result;
+ int tokenStart = start;
+ int current = tokenStart;
+ bool inString = *inStringGap;
+ do {
+ const QChar c = line->at(current);
+ if (*inStringGap && !c.isSpace() && c != '\\') {
+ // invalid non-whitespace in string gap
+ // add previous string as token, this is at least a whitespace
+ result.append(token(TokenType::String, line, tokenStart, current));
+ // then add wrong non-whitespace
+ tokenStart = current;
+ do { ++current; } while (current < length && !line->at(current).isSpace());
+ result.append(token(TokenType::StringError, line, tokenStart, current));
+ tokenStart = current;
+ } else if (c == '"') {
+ inString = !inString;
+ ++current;
+ } else if (inString) {
+ if (c == '\\') {
+ ++current;
+ if (*inStringGap) {
+ // ending string gap
+ *inStringGap = false;
+ } else if (current >= length || line->at(current).isSpace()) {
+ // starting string gap
+ *inStringGap = true;
+ current = std::min(current + 1, length);
+ } else { // there is at least one character after current
+ const int escapeLength = getEscape(*line, current);
+ if (escapeLength > 0) {
+ // valid escape
+ // add previous string as token without backslash, if necessary
+ if (tokenStart < current - 1/*backslash*/)
+ result.append(token(TokenType::String, line, tokenStart, current - 1));
+ tokenStart = current - 1; // backslash
+ current += escapeLength;
+ result.append(token(TokenType::EscapeSequence, line, tokenStart, current));
+ tokenStart = current;
+ } else { // invalid escape sequence
+ // add previous string as token, this is at least backslash
+ result.append(token(TokenType::String, line, tokenStart, current));
+ result.append(token(TokenType::StringError, line, current, current + 1));
+ ++current;
+ tokenStart = current;
+ }
+ }
+ } else {
+ ++current;
+ }
+ }
+ } while (current < length && inString);
+ if (current > tokenStart)
+ result.append(token(TokenType::String, line, tokenStart, current));
+ if (inString && !*inStringGap) { // unterminated string
+ // mark last character of last token as Unknown as an error hint
+ if (!result.isEmpty()) { // should actually never be different
+ Token &lastRef = result.last();
+ if (lastRef.length == 1) {
+ lastRef.type = TokenType::StringError;
+ } else {
+ --lastRef.length;
+ lastRef.text = line->midRef(lastRef.startCol, lastRef.length);
+ result.append(token(TokenType::StringError, line, current - 1, current));
+ }
+ }
+ }
+ return result;
+}
+
+static QVector<Token> getMultiLineComment(std::shared_ptr<QString> line, int start,
+ int *commentLevel/*in_out*/)
+{
+ // Haskell multiline comments can be nested {- foo {- bar -} blah -}
+ const int length = line->length();
+ int current = start;
+ do {
+ const QStringRef test = line->midRef(current, 2);
+ if (test == "{-") {
+ ++(*commentLevel);
+ current += 2;
+ } else if (test == "-}" && *commentLevel > 0) {
+ --(*commentLevel);
+ current += 2;
+ } else if (*commentLevel > 0) {
+ ++current;
+ }
+ } while (current < length && *commentLevel > 0);
+ if (current > start) {
+ return {token(TokenType::MultiLineComment, line, start, current)};
+ }
+ return {};
+}
+
+static QVector<Token> getChar(std::shared_ptr<QString> line, int start)
+{
+ if (line->at(start) != '\'')
+ return {};
+ QVector<Token> result;
+ const int length = line->length();
+ int tokenStart = start;
+ int current = tokenStart + 1;
+ bool inChar = true;
+ int count = 0;
+ while (current < length && inChar) {
+ if (line->at(current) == '\'') {
+ inChar = false;
+ ++current;
+ } else if (count == 1) {
+ // we already have one character, so start Unknown token
+ if (current > tokenStart)
+ result.append(token(TokenType::Char, line, tokenStart, current));
+ tokenStart = current;
+ ++count;
+ ++current;
+ } else if (count > 1) {
+ ++count;
+ ++current;
+ } else if (line->at(current) == '\\') {
+ if (current + 1 < length) {
+ ++current;
+ ++count;
+ const int escapeLength = getEscape(*line, current);
+ if (line->at(current) != '&' && escapeLength > 0) { // no & escape for chars
+ // valid escape
+ // add previous string as token without backslash, if necessary
+ if (tokenStart < current - 1/*backslash*/)
+ result.append(token(TokenType::Char, line, tokenStart, current - 1));
+ tokenStart = current - 1; // backslash
+ current += escapeLength;
+ result.append(token(TokenType::EscapeSequence, line, tokenStart, current));
+ tokenStart = current;
+ } else { // invalid escape sequence
+ // add previous string as token, this is at least backslash
+ result.append(token(TokenType::Char, line, tokenStart, current));
+ result.append(token(TokenType::CharError, line, current, current + 1));
+ ++current;
+ tokenStart = current;
+ }
+ } else {
+ ++current;
+ }
+ } else {
+ ++count;
+ ++current;
+ }
+ }
+ if (count > 1 && inChar) {
+ // too long and unterminated, just add Unknown token till end
+ result.append(token(TokenType::CharError, line, tokenStart, current));
+ } else if (count > 1) {
+ // too long but terminated, add Unknown up to ending quote, then quote
+ result.append(token(TokenType::CharError, line, tokenStart, current - 1));
+ result.append(token(TokenType::Char, line, current - 1, current));
+ } else if (inChar || count < 1) {
+ // unterminated, or no character inside, mark last character as error
+ if (current > tokenStart + 1)
+ result.append(token(TokenType::Char, line, tokenStart, current - 1));
+ result.append(token(TokenType::CharError, line, current - 1, current));
+ } else {
+ result.append(token(TokenType::Char, line, tokenStart, current));
+ }
+ return result;
+}
+
+static QVector<Token> getSpecial(std::shared_ptr<QString> line, int start)
+{
+ if (SPECIAL->contains(line->at(start)))
+ return {{TokenType::Special, start, 1, line->midRef(start, 1), line}};
+ return {};
+}
+
+Tokens HaskellTokenizer::tokenize(const QString &line, int startState)
+{
+ Tokens result(std::make_shared<QString>(line));
+ const int length = result.source->length();
+ bool inStringGap = startState == int(Tokens::State::StringGap);
+ int multiLineCommentLevel = std::max(startState - int(Tokens::State::MultiLineCommentGuard), 0);
+ int currentStart = 0;
+ QVector<Token> tokens;
+ while (currentStart < length) {
+ if (multiLineCommentLevel <= 0 &&
+ !(tokens = getString(result.source, currentStart, &inStringGap)).isEmpty()) {
+ result.append(tokens);
+ } else if (!(tokens = getMultiLineComment(result.source, currentStart,
+ &multiLineCommentLevel)).isEmpty()) {
+ result.append(tokens);
+ } else if (!(tokens = getChar(result.source, currentStart)).isEmpty()) {
+ result.append(tokens);
+ } else if (!(tokens = getSpace(result.source, currentStart)).isEmpty()) {
+ result.append(tokens);
+ } else if (!(tokens = getNumber(result.source, currentStart)).isEmpty()) {
+ result.append(tokens);
+ } else if (!(tokens = getIdOrOpOrSingleLineComment(result.source, currentStart)).isEmpty()) {
+ result.append(tokens);
+ } else if (!(tokens = getSpecial(result.source, currentStart)).isEmpty()) {
+ result.append(tokens);
+ } else {
+ tokens = {{TokenType::Unknown,
+ currentStart,
+ 1,
+ result.source->midRef(currentStart, 1),
+ result.source}};
+ result.append(tokens);
+ }
+ currentStart += std::accumulate(tokens.cbegin(), tokens.cend(), 0,
+ [](int s, const Token &t) { return s + t.length; });
+ }
+ if (inStringGap)
+ result.state = int(Tokens::State::StringGap);
+ else if (multiLineCommentLevel > 0)
+ result.state = int(Tokens::State::MultiLineCommentGuard) + multiLineCommentLevel;
+ return result;
+}
+
+bool Token::isValid() const
+{
+ return type != TokenType::Unknown;
+}
+
+} // Internal
+} // Haskell
diff --git a/plugins/haskell/haskelltokenizer.h b/plugins/haskell/haskelltokenizer.h
new file mode 100644
index 0000000..46b4b00
--- /dev/null
+++ b/plugins/haskell/haskelltokenizer.h
@@ -0,0 +1,91 @@
+/****************************************************************************
+**
+** Copyright (C) 2017 The Qt Company Ltd.
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of Qt Creator.
+**
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3 as published by the Free Software
+** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-3.0.html.
+**
+****************************************************************************/
+
+#pragma once
+
+#include <QChar>
+#include <QString>
+#include <QVector>
+
+#include <memory>
+
+namespace Haskell {
+namespace Internal {
+
+enum class TokenType {
+ Variable,
+ Constructor,
+ Operator,
+ OperatorConstructor,
+ Whitespace,
+ String,
+ StringError,
+ Char,
+ CharError,
+ EscapeSequence,
+ Integer,
+ Float,
+ Keyword,
+ Special,
+ SingleLineComment,
+ MultiLineComment,
+ Unknown
+};
+
+class Token {
+public:
+ bool isValid() const;
+
+ TokenType type = TokenType::Unknown;
+ int startCol = -1;
+ int length = -1;
+ QStringRef text;
+
+ std::shared_ptr<QString> source; // keep the string ref alive
+};
+
+class Tokens : public QVector<Token>
+{
+public:
+ enum class State {
+ None = -1,
+ StringGap = 0, // gap == two backslashes enclosing only whitespace
+ MultiLineCommentGuard // nothing may follow that
+ };
+
+ Tokens(std::shared_ptr<QString> source);
+
+ std::shared_ptr<QString> source;
+ int state = int(State::None);
+};
+
+class HaskellTokenizer
+{
+public:
+ static Tokens tokenize(const QString &line, int startState);
+};
+
+} // Internal
+} // Haskell