diff options
Diffstat (limited to 'src/qml/parser/qqmljslexer.cpp')
-rw-r--r-- | src/qml/parser/qqmljslexer.cpp | 1110 |
1 files changed, 681 insertions, 429 deletions
diff --git a/src/qml/parser/qqmljslexer.cpp b/src/qml/parser/qqmljslexer.cpp index 5f0276da8f..cdb3dde5c6 100644 --- a/src/qml/parser/qqmljslexer.cpp +++ b/src/qml/parser/qqmljslexer.cpp @@ -1,41 +1,5 @@ -/**************************************************************************** -** -** Copyright (C) 2016 The Qt Company Ltd. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the QtQml module of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:LGPL$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 3 as published by the Free Software -** Foundation and appearing in the file LICENSE.LGPL3 included in the -** packaging of this file. Please review the following information to -** ensure the GNU Lesser General Public License version 3 requirements -** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 2.0 or (at your option) the GNU General -** Public license version 3 or any later version approved by the KDE Free -** Qt Foundation. The licenses are as published by the Free Software -** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-2.0.html and -** https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ +// Copyright (C) 2016 The Qt Company Ltd. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only #include "qqmljslexer_p.h" #include "qqmljsengine_p.h" @@ -51,6 +15,9 @@ #include <QtCore/qdebug.h> #include <QtCore/QScopedValueRollback> +#include <optional> + +QT_BEGIN_NAMESPACE using namespace QQmlJS; static inline int regExpFlagFromChar(const QChar &ch) @@ -80,31 +47,8 @@ static inline QChar convertHex(QChar c1, QChar c2) return QChar((convertHex(c1.unicode()) << 4) + convertHex(c2.unicode())); } -Lexer::Lexer(Engine *engine) - : _engine(engine) - , _codePtr(nullptr) - , _endPtr(nullptr) - , _tokenStartPtr(nullptr) - , _char(u'\n') - , _errorCode(NoError) - , _currentLineNumber(0) - , _currentColumnNumber(0) - , _tokenValue(0) - , _parenthesesState(IgnoreParentheses) - , _parenthesesCount(0) - , _stackToken(-1) - , _patternFlags(0) - , _tokenKind(0) - , _tokenLength(0) - , _tokenLine(0) - , _tokenColumn(0) - , _validTokenText(false) - , _prohibitAutomaticSemicolon(false) - , _restrictedKeyword(false) - , _terminator(false) - , _followsClosingBrace(false) - , _delimited(true) - , _qmlMode(true) +Lexer::Lexer(Engine *engine, LexMode lexMode) + : _engine(engine), _lexMode(lexMode), _endPtr(nullptr), _qmlMode(true) { if (engine) engine->setLexer(this); @@ -120,13 +64,20 @@ QString Lexer::code() const return _code; } -void Lexer::setCode(const QString &code, int lineno, bool qmlMode) +void Lexer::setCode(const QString &code, int lineno, bool qmlMode, + Lexer::CodeContinuation codeContinuation) { + if (codeContinuation == Lexer::CodeContinuation::Continue) + _currentOffset += _code.size(); + else + _currentOffset = 0; if (_engine) _engine->setCode(code); _qmlMode = qmlMode; _code = code; + _skipLinefeed = false; + _tokenText.clear(); _tokenText.reserve(1024); _errorMessage.clear(); @@ -134,33 +85,18 @@ void Lexer::setCode(const QString &code, int lineno, bool qmlMode) _rawString = QStringView(); _codePtr = code.unicode(); - _endPtr = _codePtr + code.length(); + _endPtr = _codePtr + code.size(); _tokenStartPtr = _codePtr; - _char = u'\n'; - _errorCode = NoError; - - _currentLineNumber = lineno; + if (lineno >= 0) + _currentLineNumber = lineno; _currentColumnNumber = 0; - _tokenValue = 0; - - // parentheses state - _parenthesesState = IgnoreParentheses; - _parenthesesCount = 0; - - _stackToken = -1; - - _patternFlags = 0; - _tokenLength = 0; - _tokenLine = lineno; + _tokenLine = _currentLineNumber; _tokenColumn = 0; + _tokenLength = 0; - _validTokenText = false; - _prohibitAutomaticSemicolon = false; - _restrictedKeyword = false; - _terminator = false; - _followsClosingBrace = false; - _delimited = true; + if (codeContinuation == Lexer::CodeContinuation::Reset) + _state = State {}; } void Lexer::scanChar() @@ -170,20 +106,28 @@ void Lexer::scanChar() ++_codePtr; _skipLinefeed = false; } - _char = *_codePtr++; + _state.currentChar = *_codePtr++; ++_currentColumnNumber; if (isLineTerminator()) { - if (_char == u'\r') { + if (_state.currentChar == u'\r') { if (_codePtr < _endPtr && *_codePtr == u'\n') _skipLinefeed = true; - _char = u'\n'; + _state.currentChar = u'\n'; } ++_currentLineNumber; _currentColumnNumber = 0; } } +QChar Lexer::peekChar() +{ + auto peekPtr = _codePtr; + if (peekPtr < _endPtr) + return *peekPtr; + return QChar(); +} + namespace { inline bool isBinop(int tok) { @@ -251,107 +195,184 @@ int octalDigit(QChar c) int Lexer::lex() { - const int previousTokenKind = _tokenKind; + const int previousTokenKind = _state.tokenKind; + int tokenKind; + bool firstPass = true; again: - _tokenSpell = QStringView(); - _rawString = QStringView(); - _tokenKind = scanToken(); - _tokenLength = _codePtr - _tokenStartPtr - 1; - - _delimited = false; - _restrictedKeyword = false; - _followsClosingBrace = (previousTokenKind == T_RBRACE); - - // update the flags - switch (_tokenKind) { - case T_LBRACE: - if (_bracesCount > 0) - ++_bracesCount; - Q_FALLTHROUGH(); - case T_SEMICOLON: - _importState = ImportState::NoQmlImport; - Q_FALLTHROUGH(); - case T_QUESTION: - case T_COLON: - case T_TILDE: - _delimited = true; - break; - case T_AUTOMATIC_SEMICOLON: - case T_AS: - _importState = ImportState::NoQmlImport; - Q_FALLTHROUGH(); - default: - if (isBinop(_tokenKind)) - _delimited = true; - break; - - case T_IMPORT: - if (qmlMode() || (_handlingDirectives && previousTokenKind == T_DOT)) - _importState = ImportState::SawImport; - if (isBinop(_tokenKind)) - _delimited = true; - break; - - case T_IF: - case T_FOR: - case T_WHILE: - case T_WITH: - _parenthesesState = CountParentheses; - _parenthesesCount = 0; - break; - - case T_ELSE: - case T_DO: - _parenthesesState = BalancedParentheses; - break; - - case T_CONTINUE: - case T_BREAK: - case T_RETURN: - case T_YIELD: - case T_THROW: - _restrictedKeyword = true; - break; - case T_RBRACE: - if (_bracesCount > 0) - --_bracesCount; - if (_bracesCount == 0) - goto again; - } // switch + tokenKind = T_ERROR; + _tokenSpell = QStringView(); + _rawString = QStringView(); + if (firstPass && _state.stackToken == -1) { + firstPass = false; + if (_codePtr > _endPtr && _lexMode == LexMode::LineByLine && !_code.isEmpty()) + return T_EOL; + + if (_state.comments == CommentState::InMultilineComment) { + scanChar(); + _tokenStartPtr = _codePtr - 1; + _tokenLine = _currentLineNumber; + _tokenColumn = _currentColumnNumber; + while (_codePtr <= _endPtr) { + if (_state.currentChar == u'*') { + scanChar(); + if (_state.currentChar == u'/') { + scanChar(); + if (_engine) { + _engine->addComment(tokenOffset() + 2, + _codePtr - _tokenStartPtr - 1 - 4, + tokenStartLine(), tokenStartColumn() + 2); + } + tokenKind = T_COMMENT; + break; + } + } else { + scanChar(); + } + } + if (tokenKind == T_ERROR) + tokenKind = T_PARTIAL_COMMENT; + } else { + // handle multiline continuation + std::optional<ScanStringMode> scanMode; + switch (previousTokenKind) { + case T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL: + scanMode = ScanStringMode::SingleQuote; + break; + case T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL: + scanMode = ScanStringMode::DoubleQuote; + break; + case T_PARTIAL_TEMPLATE_HEAD: + scanMode = ScanStringMode::TemplateHead; + break; + case T_PARTIAL_TEMPLATE_MIDDLE: + scanMode = ScanStringMode::TemplateContinuation; + break; + default: + break; + } + if (scanMode) { + scanChar(); + _tokenStartPtr = _codePtr - 1; + _tokenLine = _currentLineNumber; + _tokenColumn = _currentColumnNumber; + tokenKind = scanString(*scanMode); + } + } + } + if (tokenKind == T_ERROR) + tokenKind = scanToken(); + _tokenLength = _codePtr - _tokenStartPtr - 1; + switch (tokenKind) { + // end of line and comments should not "overwrite" the old token type... + case T_EOL: + return tokenKind; + case T_COMMENT: + _state.comments = CommentState::HadComment; + return tokenKind; + case T_PARTIAL_COMMENT: + _state.comments = CommentState::InMultilineComment; + return tokenKind; + default: + _state.comments = CommentState::NoComment; + break; + } + _state.tokenKind = tokenKind; + + _state.delimited = false; + _state.restrictedKeyword = false; + _state.followsClosingBrace = (previousTokenKind == T_RBRACE); + + // update the flags + switch (_state.tokenKind) { + case T_LBRACE: + if (_state.bracesCount > 0) + ++_state.bracesCount; + Q_FALLTHROUGH(); + case T_SEMICOLON: + _state.importState = ImportState::NoQmlImport; + Q_FALLTHROUGH(); + case T_QUESTION: + case T_COLON: + case T_TILDE: + _state.delimited = true; + break; + case T_AUTOMATIC_SEMICOLON: + case T_AS: + _state.importState = ImportState::NoQmlImport; + Q_FALLTHROUGH(); + default: + if (isBinop(_state.tokenKind)) + _state.delimited = true; + break; + + case T_IMPORT: + if (qmlMode() || (_state.handlingDirectives && previousTokenKind == T_DOT)) + _state.importState = ImportState::SawImport; + if (isBinop(_state.tokenKind)) + _state.delimited = true; + break; + + case T_IF: + case T_FOR: + case T_WHILE: + case T_WITH: + _state.parenthesesState = CountParentheses; + _state.parenthesesCount = 0; + break; + + case T_ELSE: + case T_DO: + _state.parenthesesState = BalancedParentheses; + break; + + case T_CONTINUE: + case T_BREAK: + case T_RETURN: + case T_YIELD: + case T_THROW: + _state.restrictedKeyword = true; + break; + case T_RBRACE: + if (_state.bracesCount > 0) + --_state.bracesCount; + if (_state.bracesCount == 0) + goto again; + } // switch // update the parentheses state - switch (_parenthesesState) { - case IgnoreParentheses: - break; - - case CountParentheses: - if (_tokenKind == T_RPAREN) { - --_parenthesesCount; - if (_parenthesesCount == 0) - _parenthesesState = BalancedParentheses; - } else if (_tokenKind == T_LPAREN) { - ++_parenthesesCount; - } - break; - - case BalancedParentheses: - if (_tokenKind != T_DO && _tokenKind != T_ELSE) - _parenthesesState = IgnoreParentheses; - break; - } // switch - - return _tokenKind; + switch (_state.parenthesesState) { + case IgnoreParentheses: + break; + + case CountParentheses: + if (_state.tokenKind == T_RPAREN) { + --_state.parenthesesCount; + if (_state.parenthesesCount == 0) + _state.parenthesesState = BalancedParentheses; + } else if (_state.tokenKind == T_LPAREN) { + ++_state.parenthesesCount; + } + break; + + case BalancedParentheses: + if (_state.tokenKind != T_DO && _state.tokenKind != T_ELSE) + _state.parenthesesState = IgnoreParentheses; + break; + } // switch + + return _state.tokenKind; } uint Lexer::decodeUnicodeEscapeCharacter(bool *ok) { - Q_ASSERT(_char == u'u'); + Q_ASSERT(_state.currentChar == u'u'); scanChar(); // skip u - if (_codePtr + 4 <= _endPtr && isHexDigit(_char)) { + constexpr int distanceFromFirstHexToLastHex = 3; + if (_codePtr + distanceFromFirstHexToLastHex <= _endPtr && isHexDigit(_state.currentChar)) { uint codePoint = 0; for (int i = 0; i < 4; ++i) { - int digit = hexDigit(_char); + int digit = hexDigit(_state.currentChar); if (digit < 0) goto error; codePoint *= 16; @@ -361,15 +382,15 @@ uint Lexer::decodeUnicodeEscapeCharacter(bool *ok) *ok = true; return codePoint; - } else if (_codePtr < _endPtr && _char == u'{') { + } else if (_codePtr < _endPtr && _state.currentChar == u'{') { scanChar(); // skip '{' uint codePoint = 0; - if (!isHexDigit(_char)) + if (!isHexDigit(_state.currentChar)) // need at least one hex digit goto error; while (_codePtr <= _endPtr) { - int digit = hexDigit(_char); + int digit = hexDigit(_state.currentChar); if (digit < 0) break; codePoint *= 16; @@ -379,7 +400,7 @@ uint Lexer::decodeUnicodeEscapeCharacter(bool *ok) scanChar(); } - if (_char != u'}') + if (_state.currentChar != u'}') goto error; scanChar(); // skip '}' @@ -389,8 +410,8 @@ uint Lexer::decodeUnicodeEscapeCharacter(bool *ok) return codePoint; } - error: - _errorCode = IllegalUnicodeEscapeSequence; +error: + _state.errorCode = IllegalUnicodeEscapeSequence; _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal unicode escape sequence"); *ok = false; @@ -402,10 +423,10 @@ QChar Lexer::decodeHexEscapeCharacter(bool *ok) if (isHexDigit(_codePtr[0]) && isHexDigit(_codePtr[1])) { scanChar(); - const QChar c1 = _char; + const QChar c1 = _state.currentChar; scanChar(); - const QChar c2 = _char; + const QChar c2 = _state.currentChar; scanChar(); if (ok) @@ -418,6 +439,40 @@ QChar Lexer::decodeHexEscapeCharacter(bool *ok) return QChar(); } +namespace QQmlJS { +QDebug operator<<(QDebug dbg, const Lexer &l) +{ + dbg << "{\n" + << " engine:" << qsizetype(l._engine) << ",\n" + << " lexMode:" << int(l._lexMode) << ",\n" + << " code.size:" << qsizetype(l._code.unicode()) << "+" << l._code.size() << ",\n" + << " endPtr: codePtr + " << (l._endPtr - l._codePtr) << ",\n" + << " qmlMode:" << l._qmlMode << ",\n" + << " staticIsKeyword:" << l._staticIsKeyword << ",\n" + << " currentLineNumber:" << l._currentLineNumber << ",\n" + << " currentColumnNumber:" << l._currentColumnNumber << ",\n" + << " currentOffset:" << l._currentOffset << ",\n" + << " tokenLength:" << l._tokenLength << ",\n" + << " tokenLine:" << l._tokenLine << ",\n" + << " tokenColumn:" << l._tokenColumn << ",\n" + << " tokenText:" << l._tokenText << ",\n" + << " skipLinefeed:" << l._skipLinefeed << ",\n" + << " errorMessage:" << l._errorMessage << ",\n" + << " tokenSpell:" << l._tokenSpell << ",\n" + << " rawString:" << l._rawString << ",\n"; + if (l._codePtr) + dbg << " codePtr: code.unicode()+" << (l._codePtr - l._code.unicode()) << ",\n"; + else + dbg << " codePtr: *null*,\n"; + if (l._tokenStartPtr) + dbg << " tokenStartPtr: codePtr " << (l._tokenStartPtr - l._codePtr) << ",\n"; + else + dbg << " tokenStartPtr: *null*,\n"; + dbg << " state:" << l._state << "\n}"; + return dbg; +} +} + static inline bool isIdentifierStart(uint ch) { // fast path for ascii @@ -473,71 +528,36 @@ static bool isIdentifierPart(uint ch) int Lexer::scanToken() { - if (_stackToken != -1) { - int tk = _stackToken; - _stackToken = -1; + if (_state.stackToken != -1) { + int tk = _state.stackToken; + _state.stackToken = -1; return tk; } - if (_bracesCount == 0) { + if (_state.bracesCount == 0) { // we're inside a Template string return scanString(TemplateContinuation); } - - _terminator = false; + if (_state.comments == CommentState::NoComment) + _state.terminator = false; again: - _validTokenText = false; - - // handle comment can be called after a '/' has been read - // and returns true if it actually encountered a comment - auto handleComment = [this](){ - if (_char == u'*') { - scanChar(); - while (_codePtr <= _endPtr) { - if (_char == u'*') { - scanChar(); - if (_char == u'/') { - scanChar(); + _state.validTokenText = false; - if (_engine) { - _engine->addComment(tokenOffset() + 2, _codePtr - _tokenStartPtr - 1 - 4, - tokenStartLine(), tokenStartColumn() + 2); - } - - return true; - } - } else { - scanChar(); - } - } - } else if (_char == u'/') { - while (_codePtr <= _endPtr && !isLineTerminator()) { - scanChar(); - } - if (_engine) { - _engine->addComment(tokenOffset() + 2, _codePtr - _tokenStartPtr - 1 - 2, - tokenStartLine(), tokenStartColumn() + 2); - } - return true; - } - return false; - }; - - - while (_char.isSpace()) { + while (_state.currentChar.isSpace()) { if (isLineTerminator()) { - if (_restrictedKeyword) { + bool isAtEnd = (_codePtr + (_skipLinefeed ? 1 : 0)) == _endPtr; + if (_state.restrictedKeyword) { // automatic semicolon insertion _tokenLine = _currentLineNumber; _tokenColumn = _currentColumnNumber; _tokenStartPtr = _codePtr - 1; return T_SEMICOLON; - } else { - _terminator = true; + } else if (_lexMode == LexMode::WholeCode || !isAtEnd) { + _state.terminator = true; syncProhibitAutomaticSemicolon(); - } + } // else we will do the previous things at the start of next line... } scanChar(); @@ -547,10 +567,20 @@ again: _tokenLine = _currentLineNumber; _tokenColumn = _currentColumnNumber; - if (_codePtr > _endPtr) - return EOF_SYMBOL; + if (_codePtr >= _endPtr) { + if (_lexMode == LexMode::LineByLine) { + if (!_code.isEmpty()) { + _state.currentChar = *(_codePtr - 2); + return T_EOL; + } else { + return EOF_SYMBOL; + } + } else if (_codePtr > _endPtr) { + return EOF_SYMBOL; + } + } - const QChar ch = _char; + const QChar ch = _state.currentChar; scanChar(); switch (ch.unicode()) { @@ -558,10 +588,10 @@ again: case u'}': return T_RBRACE; case u'|': - if (_char == u'|') { + if (_state.currentChar == u'|') { scanChar(); return T_OR_OR; - } else if (_char == u'=') { + } else if (_state.currentChar == u'=') { scanChar(); return T_OR_EQ; } @@ -570,7 +600,7 @@ again: case u'{': return T_LBRACE; case u'^': - if (_char == u'=') { + if (_state.currentChar == u'=') { scanChar(); return T_XOR_EQ; } @@ -579,56 +609,60 @@ again: case u']': return T_RBRACKET; case u'[': return T_LBRACKET; case u'?': { - if (_char == u'?') { + if (_state.currentChar == u'?') { scanChar(); return T_QUESTION_QUESTION; } + if (_state.currentChar == u'.' && !peekChar().isDigit()) { + scanChar(); + return T_QUESTION_DOT; + } return T_QUESTION; } case u'>': - if (_char == u'>') { + if (_state.currentChar == u'>') { scanChar(); - if (_char == u'>') { + if (_state.currentChar == u'>') { scanChar(); - if (_char == u'=') { + if (_state.currentChar == u'=') { scanChar(); return T_GT_GT_GT_EQ; } return T_GT_GT_GT; - } else if (_char == u'=') { + } else if (_state.currentChar == u'=') { scanChar(); return T_GT_GT_EQ; } return T_GT_GT; - } else if (_char == u'=') { + } else if (_state.currentChar == u'=') { scanChar(); return T_GE; } return T_GT; case u'=': - if (_char == u'=') { + if (_state.currentChar == u'=') { scanChar(); - if (_char == u'=') { + if (_state.currentChar == u'=') { scanChar(); return T_EQ_EQ_EQ; } return T_EQ_EQ; - } else if (_char == u'>') { + } else if (_state.currentChar == u'>') { scanChar(); return T_ARROW; } return T_EQ; case u'<': - if (_char == u'=') { + if (_state.currentChar == u'=') { scanChar(); return T_LE; - } else if (_char == u'<') { + } else if (_state.currentChar == u'<') { scanChar(); - if (_char == u'=') { + if (_state.currentChar == u'=') { scanChar(); return T_LT_LT_EQ; } @@ -640,26 +674,62 @@ again: case u':': return T_COLON; case u'/': - if (handleComment()) - goto again; - else if (_char == u'=') { + switch (_state.currentChar.unicode()) { + case u'*': + scanChar(); + while (_codePtr <= _endPtr) { + if (_state.currentChar == u'*') { + scanChar(); + if (_state.currentChar == u'/') { + scanChar(); + if (_engine) { + _engine->addComment(tokenOffset() + 2, + _codePtr - _tokenStartPtr - 1 - 4, tokenStartLine(), + tokenStartColumn() + 2); + } + if (_lexMode == LexMode::LineByLine) + return T_COMMENT; + else + goto again; + } + } else { + scanChar(); + } + } + if (_lexMode == LexMode::LineByLine) + return T_PARTIAL_COMMENT; + else + goto again; + case u'/': + while (_codePtr <= _endPtr && !isLineTerminator()) { + scanChar(); + } + if (_engine) { + _engine->addComment(tokenOffset() + 2, _codePtr - _tokenStartPtr - 1 - 2, + tokenStartLine(), tokenStartColumn() + 2); + } + if (_lexMode == LexMode::LineByLine) + return T_COMMENT; + else + goto again; + case u'=': scanChar(); return T_DIVIDE_EQ; + default: + return T_DIVIDE_; } - return T_DIVIDE_; - case u'.': - if (_importState == ImportState::SawImport) + if (_state.importState == ImportState::SawImport) return T_DOT; - if (isDecimalDigit(_char.unicode())) + if (isDecimalDigit(_state.currentChar.unicode())) return scanNumber(ch); - if (_char == u'.') { + if (_state.currentChar == u'.') { scanChar(); - if (_char == u'.') { + if (_state.currentChar == u'.') { scanChar(); return T_ELLIPSIS; } else { - _errorCode = IllegalCharacter; + _state.errorCode = IllegalCharacter; _errorMessage = QCoreApplication::translate("QQmlParser", "Unexpected token '.'"); return T_ERROR; } @@ -667,14 +737,15 @@ again: return T_DOT; case u'-': - if (_char == u'=') { + if (_state.currentChar == u'=') { scanChar(); return T_MINUS_EQ; - } else if (_char == u'-') { + } else if (_state.currentChar == u'-') { scanChar(); - if (_terminator && !_delimited && !_prohibitAutomaticSemicolon && _tokenKind != T_LPAREN) { - _stackToken = T_MINUS_MINUS; + if (_state.terminator && !_state.delimited && !_state.prohibitAutomaticSemicolon + && _state.tokenKind != T_LPAREN) { + _state.stackToken = T_MINUS_MINUS; return T_SEMICOLON; } @@ -685,14 +756,15 @@ again: case u',': return T_COMMA; case u'+': - if (_char == u'=') { + if (_state.currentChar == u'=') { scanChar(); return T_PLUS_EQ; - } else if (_char == u'+') { + } else if (_state.currentChar == u'+') { scanChar(); - if (_terminator && !_delimited && !_prohibitAutomaticSemicolon && _tokenKind != T_LPAREN) { - _stackToken = T_PLUS_PLUS; + if (_state.terminator && !_state.delimited && !_state.prohibitAutomaticSemicolon + && _state.tokenKind != T_LPAREN) { + _state.stackToken = T_PLUS_PLUS; return T_SEMICOLON; } @@ -701,12 +773,12 @@ again: return T_PLUS; case u'*': - if (_char == u'=') { + if (_state.currentChar == u'=') { scanChar(); return T_STAR_EQ; - } else if (_char == u'*') { + } else if (_state.currentChar == u'*') { scanChar(); - if (_char == u'=') { + if (_state.currentChar == u'=') { scanChar(); return T_STAR_STAR_EQ; } @@ -720,26 +792,26 @@ again: case u'@': return T_AT; case u'&': - if (_char == u'=') { + if (_state.currentChar == u'=') { scanChar(); return T_AND_EQ; - } else if (_char == u'&') { + } else if (_state.currentChar == u'&') { scanChar(); return T_AND_AND; } return T_AND; case u'%': - if (_char == u'=') { + if (_state.currentChar == u'=') { scanChar(); return T_REMAINDER_EQ; } return T_REMAINDER; case u'!': - if (_char == u'=') { + if (_state.currentChar == u'=') { scanChar(); - if (_char == u'=') { + if (_state.currentChar == u'=') { scanChar(); return T_NOT_EQ_EQ; } @@ -748,7 +820,7 @@ again: return T_NOT; case u'`': - _outerTemplateBraceCount.push(_bracesCount); + _state.outerTemplateBraceCount.push(_state.bracesCount); Q_FALLTHROUGH(); case u'\'': case u'"': @@ -763,7 +835,7 @@ again: case u'7': case u'8': case u'9': - if (_importState == ImportState::SawImport) + if (_state.importState == ImportState::SawImport) return scanVersionNumber(ch); else return scanNumber(ch); @@ -775,20 +847,23 @@ again: scanChar(); } if (_engine) { - _engine->addComment(tokenOffset(), _codePtr - _tokenStartPtr - 1, - tokenStartLine(), tokenStartColumn()); + _engine->addComment(tokenOffset(), _codePtr - _tokenStartPtr - 1, tokenStartLine(), + tokenStartColumn()); } - goto again; + if (_lexMode == LexMode::LineByLine) + return T_COMMENT; + else + goto again; } Q_FALLTHROUGH(); default: { uint c = ch.unicode(); bool identifierWithEscapeChars = false; - if (QChar::isHighSurrogate(c) && QChar::isLowSurrogate(_char.unicode())) { - c = QChar::surrogateToUcs4(ushort(c), _char.unicode()); + if (QChar::isHighSurrogate(c) && QChar::isLowSurrogate(_state.currentChar.unicode())) { + c = QChar::surrogateToUcs4(ushort(c), _state.currentChar.unicode()); scanChar(); - } else if (c == '\\' && _char == u'u') { + } else if (c == '\\' && _state.currentChar == u'u') { identifierWithEscapeChars = true; bool ok = false; c = decodeUnicodeEscapeCharacter(&ok); @@ -804,19 +879,19 @@ again: } else { _tokenText += QChar(c); } - _validTokenText = true; + _state.validTokenText = true; } while (_codePtr <= _endPtr) { - c = _char.unicode(); + c = _state.currentChar.unicode(); if (QChar::isHighSurrogate(c) && QChar::isLowSurrogate(_codePtr->unicode())) { scanChar(); - c = QChar::surrogateToUcs4(ushort(c), _char.unicode()); - } else if (_char == u'\\' && _codePtr[0] == u'u') { + c = QChar::surrogateToUcs4(ushort(c), _state.currentChar.unicode()); + } else if (_state.currentChar == u'\\' && _codePtr[0] == u'u') { if (!identifierWithEscapeChars) { identifierWithEscapeChars = true; _tokenText.resize(0); _tokenText.insert(0, _tokenStartPtr, _codePtr - _tokenStartPtr - 1); - _validTokenText = true; + _state.validTokenText = true; } scanChar(); // skip '\\' @@ -860,17 +935,57 @@ again: if (kind == T_FUNCTION) { continue_skipping: - while (_codePtr < _endPtr && _char.isSpace()) - scanChar(); - if (_char == u'*') { - _tokenLength = _codePtr - _tokenStartPtr - 1; - kind = T_FUNCTION_STAR; - scanChar(); - } else if (_char == u'/') { - scanChar(); - if (handleComment()) - goto continue_skipping; - } + while (_codePtr < _endPtr && _state.currentChar.isSpace()) + scanChar(); + if (_state.currentChar == u'*') { + _tokenLength = _codePtr - _tokenStartPtr - 1; + kind = T_FUNCTION_STAR; + scanChar(); + } else if (_state.currentChar == u'/') { + scanChar(); + switch (_state.currentChar.unicode()) { + case u'*': + scanChar(); + while (_codePtr <= _endPtr) { + if (_state.currentChar == u'*') { + scanChar(); + if (_state.currentChar == u'/') { + scanChar(); + if (_engine) { + _engine->addComment(tokenOffset() + 2, + _codePtr - _tokenStartPtr - 1 - 4, + tokenStartLine(), + tokenStartColumn() + 2); + } + if (_lexMode == LexMode::LineByLine) + return T_COMMENT; + goto continue_skipping; + } + } else { + scanChar(); + } + } + if (_lexMode == LexMode::LineByLine) + return T_PARTIAL_COMMENT; + else + goto continue_skipping; + case u'/': + while (_codePtr <= _endPtr && !isLineTerminator()) { + scanChar(); + } + if (_engine) { + _engine->addComment(tokenOffset() + 2, + _codePtr - _tokenStartPtr - 1 - 2, + tokenStartLine(), tokenStartColumn() + 2); + } + if (_lexMode == LexMode::LineByLine) + return T_COMMENT; + else + goto continue_skipping; + default: + break; + } + } } if (_engine) { @@ -893,6 +1008,11 @@ again: int Lexer::scanString(ScanStringMode mode) { QChar quote = (mode == TemplateContinuation) ? QChar(TemplateHead) : QChar(mode); + // we actually use T_STRING_LITERAL also for multiline strings, should we want to + // change that we should set it to: + // _state.tokenKind == T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL || + // _state.tokenKind == T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL + // here and uncomment the multilineStringLiteral = true below. bool multilineStringLiteral = false; const QChar *startCode = _codePtr - 1; @@ -909,30 +1029,33 @@ int Lexer::scanString(ScanStringMode mode) --_currentLineNumber; // will be read again in scanChar() break; } - _errorCode = IllegalCharacter; - _errorMessage = QCoreApplication::translate("QQmlParser", "Stray newline in string literal"); + _state.errorCode = IllegalCharacter; + _errorMessage = QCoreApplication::translate("QQmlParser", + "Stray newline in string literal"); return T_ERROR; - } else if (_char == u'\\') { + } else if (_state.currentChar == u'\\') { break; - } else if (_char == u'$' && quote == u'`') { + } else if (_state.currentChar == u'$' && quote == u'`') { break; - } else if (_char == quote) { - _tokenSpell = _engine->midRef(startCode - _code.unicode(), _codePtr - startCode - 1); + } else if (_state.currentChar == quote) { + _tokenSpell = + _engine->midRef(startCode - _code.unicode(), _codePtr - startCode - 1); _rawString = _tokenSpell; scanChar(); if (quote == u'`') - _bracesCount = _outerTemplateBraceCount.pop(); - + _state.bracesCount = _state.outerTemplateBraceCount.pop(); if (mode == TemplateHead) return T_NO_SUBSTITUTION_TEMPLATE; else if (mode == TemplateContinuation) return T_TEMPLATE_TAIL; + else if (multilineStringLiteral) + return T_MULTILINE_STRING_LITERAL; else return T_STRING_LITERAL; } // don't use scanChar() here, that would transform \r sequences and the midRef() call would create the wrong result - _char = *_codePtr++; + _state.currentChar = *_codePtr++; ++_currentColumnNumber; first = false; } @@ -942,7 +1065,7 @@ int Lexer::scanString(ScanStringMode mode) --_codePtr; --_currentColumnNumber; - _validTokenText = true; + _state.validTokenText = true; _tokenText = QString(startCode, _codePtr - startCode); auto setRawString = [&](const QChar *end) { @@ -955,7 +1078,7 @@ int Lexer::scanString(ScanStringMode mode) scanChar(); while (_codePtr <= _endPtr) { - if (_char == quote) { + if (_state.currentChar == quote) { scanChar(); if (_engine) { @@ -965,7 +1088,7 @@ int Lexer::scanString(ScanStringMode mode) } if (quote == u'`') - _bracesCount = _outerTemplateBraceCount.pop(); + _state.bracesCount = _state.outerTemplateBraceCount.pop(); if (mode == TemplateContinuation) return T_TEMPLATE_TAIL; @@ -973,27 +1096,28 @@ int Lexer::scanString(ScanStringMode mode) return T_NO_SUBSTITUTION_TEMPLATE; return multilineStringLiteral ? T_MULTILINE_STRING_LITERAL : T_STRING_LITERAL; - } else if (quote == u'`' && _char == u'$' && *_codePtr == u'{') { + } else if (quote == u'`' && _state.currentChar == u'$' && *_codePtr == u'{') { scanChar(); scanChar(); - _bracesCount = 1; + _state.bracesCount = 1; if (_engine) { _tokenSpell = _engine->newStringRef(_tokenText); setRawString(_codePtr - 2); } return (mode == TemplateHead ? T_TEMPLATE_HEAD : T_TEMPLATE_MIDDLE); - } else if (_char == u'\\') { + } else if (_state.currentChar == u'\\') { scanChar(); if (_codePtr > _endPtr) { - _errorCode = IllegalEscapeSequence; - _errorMessage = QCoreApplication::translate("QQmlParser", "End of file reached at escape sequence"); + _state.errorCode = IllegalEscapeSequence; + _errorMessage = QCoreApplication::translate( + "QQmlParser", "End of file reached at escape sequence"); return T_ERROR; } QChar u; - switch (_char.unicode()) { + switch (_state.currentChar.unicode()) { // unicode escape sequence case u'u': { bool ok = false; @@ -1014,8 +1138,9 @@ int Lexer::scanString(ScanStringMode mode) bool ok = false; u = decodeHexEscapeCharacter(&ok); if (!ok) { - _errorCode = IllegalHexadecimalEscapeSequence; - _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal hexadecimal escape sequence"); + _state.errorCode = IllegalHexadecimalEscapeSequence; + _errorMessage = QCoreApplication::translate( + "QQmlParser", "Illegal hexadecimal escape sequence"); return T_ERROR; } } break; @@ -1032,7 +1157,7 @@ int Lexer::scanString(ScanStringMode mode) case u'v': u = u'\v'; scanChar(); break; case u'0': - if (! _codePtr->isDigit()) { + if (!_codePtr->isDigit()) { scanChar(); u = u'\0'; break; @@ -1047,115 +1172,177 @@ int Lexer::scanString(ScanStringMode mode) case u'7': case u'8': case u'9': - _errorCode = IllegalEscapeSequence; - _errorMessage = QCoreApplication::translate("QQmlParser", "Octal escape sequences are not allowed"); + _state.errorCode = IllegalEscapeSequence; + _errorMessage = QCoreApplication::translate( + "QQmlParser", "Octal escape sequences are not allowed"); return T_ERROR; case u'\r': case u'\n': case 0x2028u: case 0x2029u: + // uncomment the following to use T_MULTILINE_STRING_LITERAL + // multilineStringLiteral = true; scanChar(); continue; default: // non escape character - u = _char; + u = _state.currentChar; scanChar(); } _tokenText += u; } else { - _tokenText += _char; + _tokenText += _state.currentChar; scanChar(); } } - - _errorCode = UnclosedStringLiteral; + if (_lexMode == LexMode::LineByLine && !_code.isEmpty()) { + if (mode == TemplateContinuation) + return T_PARTIAL_TEMPLATE_MIDDLE; + else if (mode == TemplateHead) + return T_PARTIAL_TEMPLATE_HEAD; + else if (mode == SingleQuote) + return T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL; + return T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL; + } + _state.errorCode = UnclosedStringLiteral; _errorMessage = QCoreApplication::translate("QQmlParser", "Unclosed string at end of line"); return T_ERROR; } int Lexer::scanNumber(QChar ch) { + auto scanOptionalNumericSeparator = [this](auto isNextCharacterValid){ + if (_state.currentChar == u'_') { + if (peekChar() == u'_') { + _state.errorCode = IllegalNumber; + _errorMessage = QCoreApplication::translate( + "QQmlParser", + "There can be at most one numeric separator beetwen digits" + ); + return false; + } + + if (!isNextCharacterValid()) { + _state.errorCode = IllegalNumber; + _errorMessage = QCoreApplication::translate( + "QQmlParser", + "A trailing numeric separator is not allowed in numeric literals" + ); + return false; + } + + scanChar(); + } + + return true; + }; + if (ch == u'0') { - if (_char == u'x' || _char == u'X') { - ch = _char; // remember the x or X to use it in the error message below. + if (_state.currentChar == u'x' || _state.currentChar == u'X') { + ch = _state.currentChar; // remember the x or X to use it in the error message below. // parse hex integer literal scanChar(); // consume 'x' - if (!isHexDigit(_char)) { - _errorCode = IllegalNumber; - _errorMessage = QCoreApplication::translate("QQmlParser", "At least one hexadecimal digit is required after '0%1'").arg(ch); + if (!isHexDigit(_state.currentChar)) { + _state.errorCode = IllegalNumber; + _errorMessage = QCoreApplication::translate( + "QQmlParser", + "At least one hexadecimal digit is required after '0%1'") + .arg(ch); return T_ERROR; } double d = 0.; while (1) { - int digit = ::hexDigit(_char); + int digit = ::hexDigit(_state.currentChar); if (digit < 0) break; d *= 16; d += digit; scanChar(); + + if (!scanOptionalNumericSeparator([this](){ return isHexDigit(peekChar()); })) + return T_ERROR; } - _tokenValue = d; + _state.tokenValue = d; return T_NUMERIC_LITERAL; - } else if (_char == u'o' || _char == u'O') { - ch = _char; // remember the o or O to use it in the error message below. + } else if (_state.currentChar == u'o' || _state.currentChar == u'O') { + ch = _state.currentChar; // remember the o or O to use it in the error message below. // parse octal integer literal scanChar(); // consume 'o' - if (!isOctalDigit(_char.unicode())) { - _errorCode = IllegalNumber; - _errorMessage = QCoreApplication::translate("QQmlParser", "At least one octal digit is required after '0%1'").arg(ch); + if (!isOctalDigit(_state.currentChar.unicode())) { + _state.errorCode = IllegalNumber; + _errorMessage = + QCoreApplication::translate( + "QQmlParser", "At least one octal digit is required after '0%1'") + .arg(ch); return T_ERROR; } double d = 0.; while (1) { - int digit = ::octalDigit(_char); + int digit = ::octalDigit(_state.currentChar); if (digit < 0) break; d *= 8; d += digit; scanChar(); + + if (!scanOptionalNumericSeparator([this](){ + return isOctalDigit(peekChar().unicode()); + })) { + return T_ERROR; + } } - _tokenValue = d; + _state.tokenValue = d; return T_NUMERIC_LITERAL; - } else if (_char == u'b' || _char == u'B') { - ch = _char; // remember the b or B to use it in the error message below. + } else if (_state.currentChar == u'b' || _state.currentChar == u'B') { + ch = _state.currentChar; // remember the b or B to use it in the error message below. // parse binary integer literal scanChar(); // consume 'b' - if (_char.unicode() != u'0' && _char.unicode() != u'1') { - _errorCode = IllegalNumber; - _errorMessage = QCoreApplication::translate("QQmlParser", "At least one binary digit is required after '0%1'").arg(ch); + if (_state.currentChar.unicode() != u'0' && _state.currentChar.unicode() != u'1') { + _state.errorCode = IllegalNumber; + _errorMessage = + QCoreApplication::translate( + "QQmlParser", "At least one binary digit is required after '0%1'") + .arg(ch); return T_ERROR; } double d = 0.; while (1) { int digit = 0; - if (_char.unicode() == u'1') + if (_state.currentChar.unicode() == u'1') digit = 1; - else if (_char.unicode() != u'0') + else if (_state.currentChar.unicode() != u'0') break; d *= 2; d += digit; scanChar(); + + if (!scanOptionalNumericSeparator([this](){ + return peekChar().unicode() == u'0' || peekChar().unicode() == u'1'; + })) { + return T_ERROR; + } } - _tokenValue = d; + _state.tokenValue = d; return T_NUMERIC_LITERAL; - } else if (_char.isDigit() && !qmlMode()) { - _errorCode = IllegalCharacter; - _errorMessage = QCoreApplication::translate("QQmlParser", "Decimal numbers can't start with '0'"); + } else if (_state.currentChar.isDigit() && !qmlMode()) { + _state.errorCode = IllegalCharacter; + _errorMessage = QCoreApplication::translate("QQmlParser", + "Decimal numbers can't start with '0'"); return T_ERROR; } } @@ -1165,37 +1352,49 @@ int Lexer::scanNumber(QChar ch) chars.append(ch.unicode()); if (ch != u'.') { - while (_char.isDigit()) { - chars.append(_char.unicode()); + if (!scanOptionalNumericSeparator([this](){ return peekChar().isDigit(); })) + return T_ERROR; + + while (_state.currentChar.isDigit()) { + chars.append(_state.currentChar.unicode()); scanChar(); // consume the digit + + if (!scanOptionalNumericSeparator([this](){ return peekChar().isDigit(); })) + return T_ERROR; } - if (_char == u'.') { - chars.append(_char.unicode()); + if (_state.currentChar == u'.') { + chars.append(_state.currentChar.unicode()); scanChar(); // consume `.' } } - while (_char.isDigit()) { - chars.append(_char.unicode()); + while (_state.currentChar.isDigit()) { + chars.append(_state.currentChar.unicode()); scanChar(); + + if (!scanOptionalNumericSeparator([this](){ return peekChar().isDigit(); })) + return T_ERROR; } - if (_char == u'e' || _char == u'E') { - if (_codePtr[0].isDigit() || ((_codePtr[0] == u'+' || _codePtr[0] == u'-') && - _codePtr[1].isDigit())) { + if (_state.currentChar == u'e' || _state.currentChar == u'E') { + if (_codePtr[0].isDigit() + || ((_codePtr[0] == u'+' || _codePtr[0] == u'-') && _codePtr[1].isDigit())) { - chars.append(_char.unicode()); + chars.append(_state.currentChar.unicode()); scanChar(); // consume `e' - if (_char == u'+' || _char == u'-') { - chars.append(_char.unicode()); + if (_state.currentChar == u'+' || _state.currentChar == u'-') { + chars.append(_state.currentChar.unicode()); scanChar(); // consume the sign } - while (_char.isDigit()) { - chars.append(_char.unicode()); + while (_state.currentChar.isDigit()) { + chars.append(_state.currentChar.unicode()); scanChar(); + + if (!scanOptionalNumericSeparator([this](){ return peekChar().isDigit(); })) + return T_ERROR; } } } @@ -1204,11 +1403,12 @@ int Lexer::scanNumber(QChar ch) const char *end = nullptr; bool ok = false; - _tokenValue = qstrntod(begin, chars.size(), &end, &ok); + _state.tokenValue = qstrntod(begin, chars.size(), &end, &ok); if (end - begin != chars.size()) { - _errorCode = IllegalExponentIndicator; - _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal syntax for exponential number"); + _state.errorCode = IllegalExponentIndicator; + _errorMessage = + QCoreApplication::translate("QQmlParser", "Illegal syntax for exponential number"); return T_ERROR; } @@ -1218,47 +1418,48 @@ int Lexer::scanNumber(QChar ch) int Lexer::scanVersionNumber(QChar ch) { if (ch == u'0') { - _tokenValue = 0; + _state.tokenValue = 0; return T_VERSION_NUMBER; } int acc = 0; acc += ch.digitValue(); - while (_char.isDigit()) { + while (_state.currentChar.isDigit()) { acc *= 10; - acc += _char.digitValue(); + acc += _state.currentChar.digitValue(); scanChar(); // consume the digit } - _tokenValue = acc; + _state.tokenValue = acc; return T_VERSION_NUMBER; } bool Lexer::scanRegExp(RegExpBodyPrefix prefix) { _tokenText.resize(0); - _validTokenText = true; - _patternFlags = 0; + _state.validTokenText = true; + _state.patternFlags = 0; if (prefix == EqualPrefix) _tokenText += u'='; while (true) { - switch (_char.unicode()) { + switch (_state.currentChar.unicode()) { case u'/': scanChar(); // scan the flags - _patternFlags = 0; - while (isIdentLetter(_char)) { - int flag = regExpFlagFromChar(_char); - if (flag == 0 || _patternFlags & flag) { - _errorMessage = QCoreApplication::translate("QQmlParser", "Invalid regular expression flag '%0'") - .arg(QChar(_char)); + _state.patternFlags = 0; + while (isIdentLetter(_state.currentChar)) { + int flag = regExpFlagFromChar(_state.currentChar); + if (flag == 0 || _state.patternFlags & flag) { + _errorMessage = QCoreApplication::translate( + "QQmlParser", "Invalid regular expression flag '%0'") + .arg(QChar(_state.currentChar)); return false; } - _patternFlags |= flag; + _state.patternFlags |= flag; scanChar(); } @@ -1267,59 +1468,63 @@ bool Lexer::scanRegExp(RegExpBodyPrefix prefix) case u'\\': // regular expression backslash sequence - _tokenText += _char; + _tokenText += _state.currentChar; scanChar(); if (_codePtr > _endPtr || isLineTerminator()) { - _errorMessage = QCoreApplication::translate("QQmlParser", "Unterminated regular expression backslash sequence"); + _errorMessage = QCoreApplication::translate( + "QQmlParser", "Unterminated regular expression backslash sequence"); return false; } - _tokenText += _char; + _tokenText += _state.currentChar; scanChar(); break; case u'[': // regular expression class - _tokenText += _char; + _tokenText += _state.currentChar; scanChar(); - while (_codePtr <= _endPtr && ! isLineTerminator()) { - if (_char == u']') + while (_codePtr <= _endPtr && !isLineTerminator()) { + if (_state.currentChar == u']') break; - else if (_char == u'\\') { + else if (_state.currentChar == u'\\') { // regular expression backslash sequence - _tokenText += _char; + _tokenText += _state.currentChar; scanChar(); if (_codePtr > _endPtr || isLineTerminator()) { - _errorMessage = QCoreApplication::translate("QQmlParser", "Unterminated regular expression backslash sequence"); + _errorMessage = QCoreApplication::translate( + "QQmlParser", "Unterminated regular expression backslash sequence"); return false; } - _tokenText += _char; + _tokenText += _state.currentChar; scanChar(); } else { - _tokenText += _char; + _tokenText += _state.currentChar; scanChar(); } } - if (_char != u']') { - _errorMessage = QCoreApplication::translate("QQmlParser", "Unterminated regular expression class"); + if (_state.currentChar != u']') { + _errorMessage = QCoreApplication::translate( + "QQmlParser", "Unterminated regular expression class"); return false; } - _tokenText += _char; + _tokenText += _state.currentChar; scanChar(); // skip ] break; default: if (_codePtr > _endPtr || isLineTerminator()) { - _errorMessage = QCoreApplication::translate("QQmlParser", "Unterminated regular expression literal"); + _errorMessage = QCoreApplication::translate( + "QQmlParser", "Unterminated regular expression literal"); return false; } else { - _tokenText += _char; + _tokenText += _state.currentChar; scanChar(); } } // switch @@ -1330,7 +1535,7 @@ bool Lexer::scanRegExp(RegExpBodyPrefix prefix) bool Lexer::isLineTerminator() const { - const ushort unicode = _char.unicode(); + const ushort unicode = _state.currentChar.unicode(); return unicode == 0x000Au || unicode == 0x000Du || unicode == 0x2028u @@ -1339,7 +1544,7 @@ bool Lexer::isLineTerminator() const unsigned Lexer::isLineTerminatorSequence() const { - switch (_char.unicode()) { + switch (_state.currentChar.unicode()) { case 0x000Au: case 0x2028u: case 0x2029u: @@ -1386,10 +1591,10 @@ bool Lexer::isOctalDigit(ushort c) QString Lexer::tokenText() const { - if (_validTokenText) + if (_state.validTokenText) return _tokenText; - if (_tokenKind == T_STRING_LITERAL) + if (_state.tokenKind == T_STRING_LITERAL) return QString(_tokenStartPtr + 1, _tokenLength - 2); return QString(_tokenStartPtr, _tokenLength); @@ -1397,7 +1602,7 @@ QString Lexer::tokenText() const Lexer::Error Lexer::errorCode() const { - return _errorCode; + return _state.errorCode; } QString Lexer::errorMessage() const @@ -1407,33 +1612,31 @@ QString Lexer::errorMessage() const void Lexer::syncProhibitAutomaticSemicolon() { - if (_parenthesesState == BalancedParentheses) { + if (_state.parenthesesState == BalancedParentheses) { // we have seen something like "if (foo)", which means we should // never insert an automatic semicolon at this point, since it would // then be expanded into an empty statement (ECMA-262 7.9.1) - _prohibitAutomaticSemicolon = true; - _parenthesesState = IgnoreParentheses; + _state.prohibitAutomaticSemicolon = true; + _state.parenthesesState = IgnoreParentheses; } else { - _prohibitAutomaticSemicolon = false; + _state.prohibitAutomaticSemicolon = false; } } bool Lexer::prevTerminator() const { - return _terminator; + return _state.terminator; } bool Lexer::followsClosingBrace() const { - return _followsClosingBrace; + return _state.followsClosingBrace; } bool Lexer::canInsertAutomaticSemicolon(int token) const { - return token == T_RBRACE - || token == EOF_SYMBOL - || _terminator - || _followsClosingBrace; + return token == T_RBRACE || token == EOF_SYMBOL || _state.terminator + || _state.followsClosingBrace; } static const int uriTokens[] = { @@ -1497,12 +1700,12 @@ bool Lexer::scanDirectives(Directives *directives, DiagnosticMessage *error) error->loc.startColumn = tokenStartColumn(); }; - QScopedValueRollback<bool> directivesGuard(_handlingDirectives, true); + QScopedValueRollback<bool> directivesGuard(_state.handlingDirectives, true); Q_ASSERT(!_qmlMode); lex(); // fetch the first token - if (_tokenKind != T_DOT) + if (_state.tokenKind != T_DOT) return true; do { @@ -1511,7 +1714,7 @@ bool Lexer::scanDirectives(Directives *directives, DiagnosticMessage *error) lex(); // skip T_DOT - if (! (_tokenKind == T_IDENTIFIER || _tokenKind == T_IMPORT)) + if (!(_state.tokenKind == T_IDENTIFIER || _state.tokenKind == T_IMPORT)) return true; // expected a valid QML/JS directive const QString directiveName = tokenText(); @@ -1541,7 +1744,7 @@ bool Lexer::scanDirectives(Directives *directives, DiagnosticMessage *error) QString version; bool fileImport = false; // file or uri import - if (_tokenKind == T_STRING_LITERAL) { + if (_state.tokenKind == T_STRING_LITERAL) { // .import T_STRING_LITERAL as T_IDENTIFIER fileImport = true; @@ -1553,10 +1756,10 @@ bool Lexer::scanDirectives(Directives *directives, DiagnosticMessage *error) } lex(); - } else if (_tokenKind == T_IDENTIFIER) { + } else if (_state.tokenKind == T_IDENTIFIER) { // .import T_IDENTIFIER (. T_IDENTIFIER)* (T_VERSION_NUMBER (. T_VERSION_NUMBER)?)? as T_IDENTIFIER while (true) { - if (!isUriToken(_tokenKind)) { + if (!isUriToken(_state.tokenKind)) { setError(QCoreApplication::translate("QQmlParser","Invalid module URI")); return false; } @@ -1568,7 +1771,7 @@ bool Lexer::scanDirectives(Directives *directives, DiagnosticMessage *error) setError(QCoreApplication::translate("QQmlParser","Invalid module URI")); return false; } - if (_tokenKind != QQmlJSGrammar::T_DOT) + if (_state.tokenKind != QQmlJSGrammar::T_DOT) break; pathOrUri.append(u'.'); @@ -1580,13 +1783,13 @@ bool Lexer::scanDirectives(Directives *directives, DiagnosticMessage *error) } } - if (_tokenKind == T_VERSION_NUMBER) { + if (_state.tokenKind == T_VERSION_NUMBER) { version = tokenText(); lex(); - if (_tokenKind == T_DOT) { + if (_state.tokenKind == T_DOT) { version += u'.'; lex(); - if (_tokenKind != T_VERSION_NUMBER) { + if (_state.tokenKind != T_VERSION_NUMBER) { setError(QCoreApplication::translate( "QQmlParser", "Incomplete version number (dot but no minor)")); return false; // expected the module version number @@ -1600,7 +1803,7 @@ bool Lexer::scanDirectives(Directives *directives, DiagnosticMessage *error) // // recognize the mandatory `as' followed by the module name // - if (! (_tokenKind == T_AS && tokenStartLine() == lineNumber)) { + if (!(_state.tokenKind == T_AS && tokenStartLine() == lineNumber)) { if (fileImport) setError(QCoreApplication::translate("QQmlParser", "File import requires a qualifier")); else @@ -1639,7 +1842,56 @@ bool Lexer::scanDirectives(Directives *directives, DiagnosticMessage *error) // fetch the first token after the .pragma/.import directive lex(); - } while (_tokenKind == T_DOT); + } while (_state.tokenKind == T_DOT); return true; } + +const Lexer::State &Lexer::state() const +{ + return _state; +} +void Lexer::setState(const Lexer::State &state) +{ + _state = state; +} + +int Lexer::parseModeFlags() const { + int flags = 0; + if (qmlMode()) + flags |= QmlMode|StaticIsKeyword; + if (yieldIsKeyWord()) + flags |= YieldIsKeyword; + if (_staticIsKeyword) + flags |= StaticIsKeyword; + return flags; +} + +namespace QQmlJS { +QDebug operator<<(QDebug dbg, const Lexer::State &s) +{ + dbg << "{\n" + << " errorCode:" << int(s.errorCode) << ",\n" + << " currentChar:" << s.currentChar << ",\n" + << " tokenValue:" << s.tokenValue << ",\n" + << " parenthesesState:" << s.parenthesesState << ",\n" + << " parenthesesCount:" << s.parenthesesCount << ",\n" + << " outerTemplateBraceCount:" << s.outerTemplateBraceCount << ",\n" + << " bracesCount:" << s.bracesCount << ",\n" + << " stackToken:" << s.stackToken << ",\n" + << " patternFlags:" << s.patternFlags << ",\n" + << " tokenKind:" << s.tokenKind << ",\n" + << " importState:" << int(s.importState) << ",\n" + << " validTokenText:" << s.validTokenText << ",\n" + << " prohibitAutomaticSemicolon:" << s.prohibitAutomaticSemicolon << ",\n" + << " restrictedKeyword:" << s.restrictedKeyword << ",\n" + << " terminator:" << s.terminator << ",\n" + << " followsClosingBrace:" << s.followsClosingBrace << ",\n" + << " delimited:" << s.delimited << ",\n" + << " handlingDirectives:" << s.handlingDirectives << ",\n" + << " generatorLevel:" << s.generatorLevel << "\n}"; + return dbg; +} +} + +QT_END_NAMESPACE |