diff options
Diffstat (limited to 'src/qml/parser')
-rw-r--r-- | src/qml/parser/qqmljs.g | 8 | ||||
-rw-r--r-- | src/qml/parser/qqmljslexer.cpp | 570 | ||||
-rw-r--r-- | src/qml/parser/qqmljslexer_p.h | 106 |
3 files changed, 441 insertions, 243 deletions
diff --git a/src/qml/parser/qqmljs.g b/src/qml/parser/qqmljs.g index ab4d94728c..1df2658628 100644 --- a/src/qml/parser/qqmljs.g +++ b/src/qml/parser/qqmljs.g @@ -75,6 +75,14 @@ %token T_ERROR +-- states for line by line parsing +%token T_EOL +%token T_PARTIAL_COMMENT "non closed multiline comment" +%token T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL "multiline single quote string literal" +%token T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL "multiline double quote string literal" +%token T_PARTIAL_TEMPLATE_HEAD "(template head)" +%token T_PARTIAL_TEMPLATE_MIDDLE "(template middle)" + --- feed tokens %token T_FEED_UI_PROGRAM %token T_FEED_UI_OBJECT_MEMBER diff --git a/src/qml/parser/qqmljslexer.cpp b/src/qml/parser/qqmljslexer.cpp index d44b5a29da..cc552996ea 100644 --- a/src/qml/parser/qqmljslexer.cpp +++ b/src/qml/parser/qqmljslexer.cpp @@ -15,6 +15,8 @@ #include <QtCore/qdebug.h> #include <QtCore/QScopedValueRollback> +#include <optional> + using namespace QQmlJS; static inline int regExpFlagFromChar(const QChar &ch) @@ -44,7 +46,8 @@ static inline QChar convertHex(QChar c1, QChar c2) return QChar((convertHex(c1.unicode()) << 4) + convertHex(c2.unicode())); } -Lexer::Lexer(Engine *engine) : _engine(engine), _endPtr(nullptr), _qmlMode(true) +Lexer::Lexer(Engine *engine, LexMode lexMode) + : _engine(engine), _lexMode(lexMode), _endPtr(nullptr), _qmlMode(true) { if (engine) engine->setLexer(this); @@ -60,74 +63,65 @@ QString Lexer::code() const return _code; } -void Lexer::setCode(const QString &code, int lineno, bool qmlMode) +void Lexer::setCode(const QString &code, int lineno, bool qmlMode, + Lexer::CodeContinuation codeContinuation) { + if (codeContinuation == Lexer::CodeContinuation::Continue) + _currentOffset += _code.length(); + else + _currentOffset = 0; if (_engine) _engine->setCode(code); _qmlMode = qmlMode; _code = code; - - _state.tokenText.clear(); - _state.tokenText.reserve(1024); - _state.errorMessage.clear(); - _state.tokenSpell = QStringView(); - _state.rawString = QStringView(); - - _state.codePtr = code.unicode(); - _endPtr = _state.codePtr + code.length(); - _state.tokenStartPtr = _state.codePtr; - - _state.currentChar = u'\n'; - _state.errorCode = NoError; - - _state.currentLineNumber = lineno; - _state.currentColumnNumber = 0; - _state.tokenValue = 0; - - // parentheses state - _state.parenthesesState = IgnoreParentheses; - _state.parenthesesCount = 0; - - _state.stackToken = -1; - - _state.patternFlags = 0; - _state.tokenLength = 0; - _state.tokenLine = lineno; - _state.tokenColumn = 0; - - _state.validTokenText = false; - _state.prohibitAutomaticSemicolon = false; - _state.restrictedKeyword = false; - _state.terminator = false; - _state.followsClosingBrace = false; - _state.delimited = true; + _skipLinefeed = false; + + _tokenText.clear(); + _tokenText.reserve(1024); + _errorMessage.clear(); + _tokenSpell = QStringView(); + _rawString = QStringView(); + + _codePtr = code.unicode(); + _endPtr = _codePtr + code.length(); + _tokenStartPtr = _codePtr; + + if (lineno >= 0) + _currentLineNumber = lineno; + _currentColumnNumber = 0; + _tokenLine = _currentLineNumber; + _tokenColumn = 0; + _tokenLength = 0; + + if (codeContinuation == Lexer::CodeContinuation::Reset) + _state = State {}; } void Lexer::scanChar() { - if (_state.skipLinefeed) { - Q_ASSERT(*_state.codePtr == u'\n'); - ++_state.codePtr; - _state.skipLinefeed = false; + if (_skipLinefeed) { + Q_ASSERT(*_codePtr == u'\n'); + ++_codePtr; + _skipLinefeed = false; } - _state.currentChar = *_state.codePtr++; - ++_state.currentColumnNumber; + _state.currentChar = *_codePtr++; + ++_currentColumnNumber; if (isLineTerminator()) { if (_state.currentChar == u'\r') { - if (_state.codePtr < _endPtr && *_state.codePtr == u'\n') - _state.skipLinefeed = true; + if (_codePtr < _endPtr && *_codePtr == u'\n') + _skipLinefeed = true; _state.currentChar = u'\n'; } - ++_state.currentLineNumber; - _state.currentColumnNumber = 0; + ++_currentLineNumber; + _currentColumnNumber = 0; } } QChar Lexer::peekChar() { - auto peekPtr = _state.codePtr; + auto peekPtr = _codePtr; if (peekPtr < _endPtr) return *peekPtr; return QChar(); @@ -201,12 +195,88 @@ int octalDigit(QChar c) int Lexer::lex() { const int previousTokenKind = _state.tokenKind; + int tokenKind; + bool firstPass = true; again: - _state.tokenSpell = QStringView(); - _state.rawString = QStringView(); - _state.tokenKind = scanToken(); - _state.tokenLength = _state.codePtr - _state.tokenStartPtr - 1; + tokenKind = T_ERROR; + _tokenSpell = QStringView(); + _rawString = QStringView(); + if (firstPass && _state.stackToken == -1) { + firstPass = false; + if (_codePtr > _endPtr && _lexMode == LexMode::LineByLine && !_code.isEmpty()) + return T_EOL; + + if (_state.comments == CommentState::InMultilineComment) { + scanChar(); + _tokenStartPtr = _codePtr - 1; + _tokenLine = _currentLineNumber; + _tokenColumn = _currentColumnNumber; + while (_codePtr <= _endPtr) { + if (_state.currentChar == u'*') { + scanChar(); + if (_state.currentChar == u'/') { + scanChar(); + if (_engine) { + _engine->addComment(tokenOffset() + 2, + _codePtr - _tokenStartPtr - 1 - 4, + tokenStartLine(), tokenStartColumn() + 2); + } + tokenKind = T_COMMENT; + break; + } + } else { + scanChar(); + } + } + if (tokenKind == T_ERROR) + tokenKind = T_PARTIAL_COMMENT; + } else { + // handle multiline continuation + std::optional<ScanStringMode> scanMode; + switch (previousTokenKind) { + case T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL: + scanMode = ScanStringMode::SingleQuote; + break; + case T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL: + scanMode = ScanStringMode::DoubleQuote; + break; + case T_PARTIAL_TEMPLATE_HEAD: + scanMode = ScanStringMode::TemplateHead; + break; + case T_PARTIAL_TEMPLATE_MIDDLE: + scanMode = ScanStringMode::TemplateContinuation; + break; + default: + break; + } + if (scanMode) { + scanChar(); + _tokenStartPtr = _codePtr - 1; + _tokenLine = _currentLineNumber; + _tokenColumn = _currentColumnNumber; + tokenKind = scanString(*scanMode); + } + } + } + if (tokenKind == T_ERROR) + tokenKind = scanToken(); + _tokenLength = _codePtr - _tokenStartPtr - 1; + switch (tokenKind) { + // end of line and comments should not "overwrite" the old token type... + case T_EOL: + return tokenKind; + case T_COMMENT: + _state.comments = CommentState::HadComment; + return tokenKind; + case T_PARTIAL_COMMENT: + _state.comments = CommentState::InMultilineComment; + return tokenKind; + default: + _state.comments = CommentState::NoComment; + break; + } + _state.tokenKind = tokenKind; _state.delimited = false; _state.restrictedKeyword = false; @@ -297,7 +367,7 @@ uint Lexer::decodeUnicodeEscapeCharacter(bool *ok) { Q_ASSERT(_state.currentChar == u'u'); scanChar(); // skip u - if (_state.codePtr + 4 <= _endPtr && isHexDigit(_state.currentChar)) { + if (_codePtr + 4 <= _endPtr && isHexDigit(_state.currentChar)) { uint codePoint = 0; for (int i = 0; i < 4; ++i) { int digit = hexDigit(_state.currentChar); @@ -310,14 +380,14 @@ uint Lexer::decodeUnicodeEscapeCharacter(bool *ok) *ok = true; return codePoint; - } else if (_state.codePtr < _endPtr && _state.currentChar == u'{') { + } else if (_codePtr < _endPtr && _state.currentChar == u'{') { scanChar(); // skip '{' uint codePoint = 0; if (!isHexDigit(_state.currentChar)) // need at least one hex digit goto error; - while (_state.codePtr <= _endPtr) { + while (_codePtr <= _endPtr) { int digit = hexDigit(_state.currentChar); if (digit < 0) break; @@ -338,10 +408,9 @@ uint Lexer::decodeUnicodeEscapeCharacter(bool *ok) return codePoint; } - error: - _state.errorCode = IllegalUnicodeEscapeSequence; - _state.errorMessage = - QCoreApplication::translate("QQmlParser", "Illegal unicode escape sequence"); +error: + _state.errorCode = IllegalUnicodeEscapeSequence; + _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal unicode escape sequence"); *ok = false; return 0; @@ -349,7 +418,7 @@ uint Lexer::decodeUnicodeEscapeCharacter(bool *ok) QChar Lexer::decodeHexEscapeCharacter(bool *ok) { - if (isHexDigit(_state.codePtr[0]) && isHexDigit(_state.codePtr[1])) { + if (isHexDigit(_codePtr[0]) && isHexDigit(_codePtr[1])) { scanChar(); const QChar c1 = _state.currentChar; @@ -434,71 +503,46 @@ int Lexer::scanToken() return scanString(TemplateContinuation); } - _state.terminator = false; + if (_state.comments == CommentState::NoComment) + _state.terminator = false; again: _state.validTokenText = false; - // handle comment can be called after a '/' has been read - // and returns true if it actually encountered a comment - auto handleComment = [this]() { - if (_state.currentChar == u'*') { - scanChar(); - while (_state.codePtr <= _endPtr) { - if (_state.currentChar == u'*') { - scanChar(); - if (_state.currentChar == u'/') { - scanChar(); - - if (_engine) { - _engine->addComment(tokenOffset() + 2, - _state.codePtr - _state.tokenStartPtr - 1 - 4, - tokenStartLine(), tokenStartColumn() + 2); - } - - return true; - } - } else { - scanChar(); - } - } - } else if (_state.currentChar == u'/') { - while (_state.codePtr <= _endPtr && !isLineTerminator()) { - scanChar(); - } - if (_engine) { - _engine->addComment(tokenOffset() + 2, - _state.codePtr - _state.tokenStartPtr - 1 - 2, tokenStartLine(), - tokenStartColumn() + 2); - } - return true; - } - return false; - }; - while (_state.currentChar.isSpace()) { if (isLineTerminator()) { + bool isAtEnd = (_codePtr + (_skipLinefeed ? 1 : 0)) == _endPtr; if (_state.restrictedKeyword) { // automatic semicolon insertion - _state.tokenLine = _state.currentLineNumber; - _state.tokenColumn = _state.currentColumnNumber; - _state.tokenStartPtr = _state.codePtr - 1; + _tokenLine = _currentLineNumber; + _tokenColumn = _currentColumnNumber; + _tokenStartPtr = _codePtr - 1; return T_SEMICOLON; - } else { + } else if (_lexMode == LexMode::WholeCode || !isAtEnd) { _state.terminator = true; syncProhibitAutomaticSemicolon(); - } + } // else we will do the previous things at the start of next line... } scanChar(); } - _state.tokenStartPtr = _state.codePtr - 1; - _state.tokenLine = _state.currentLineNumber; - _state.tokenColumn = _state.currentColumnNumber; + _tokenStartPtr = _codePtr - 1; + _tokenLine = _currentLineNumber; + _tokenColumn = _currentColumnNumber; - if (_state.codePtr > _endPtr) - return EOF_SYMBOL; + if (_codePtr >= _endPtr) { + if (_lexMode == LexMode::LineByLine) { + if (!_code.isEmpty()) { + _state.currentChar = *(_codePtr - 2); + return T_EOL; + } else { + return EOF_SYMBOL; + } + } else if (_codePtr > _endPtr) { + return EOF_SYMBOL; + } + } const QChar ch = _state.currentChar; scanChar(); @@ -594,14 +638,50 @@ again: case u':': return T_COLON; case u'/': - if (handleComment()) - goto again; - else if (_state.currentChar == u'=') { + switch (_state.currentChar.unicode()) { + case u'*': + scanChar(); + while (_codePtr <= _endPtr) { + if (_state.currentChar == u'*') { + scanChar(); + if (_state.currentChar == u'/') { + scanChar(); + if (_engine) { + _engine->addComment(tokenOffset() + 2, + _codePtr - _tokenStartPtr - 1 - 4, tokenStartLine(), + tokenStartColumn() + 2); + } + if (_lexMode == LexMode::LineByLine) + return T_COMMENT; + else + goto again; + } + } else { + scanChar(); + } + } + if (_lexMode == LexMode::LineByLine) + return T_PARTIAL_COMMENT; + else + goto again; + case u'/': + while (_codePtr <= _endPtr && !isLineTerminator()) { + scanChar(); + } + if (_engine) { + _engine->addComment(tokenOffset() + 2, _codePtr - _tokenStartPtr - 1 - 2, + tokenStartLine(), tokenStartColumn() + 2); + } + if (_lexMode == LexMode::LineByLine) + return T_COMMENT; + else + goto again; + case u'=': scanChar(); return T_DIVIDE_EQ; + default: + return T_DIVIDE_; } - return T_DIVIDE_; - case u'.': if (_state.importState == ImportState::SawImport) return T_DOT; @@ -614,8 +694,7 @@ again: return T_ELLIPSIS; } else { _state.errorCode = IllegalCharacter; - _state.errorMessage = - QCoreApplication::translate("QQmlParser", "Unexpected token '.'"); + _errorMessage = QCoreApplication::translate("QQmlParser", "Unexpected token '.'"); return T_ERROR; } } @@ -726,16 +805,19 @@ again: return scanNumber(ch); case '#': - if (_state.currentLineNumber == 1 && _state.currentColumnNumber == 2) { + if (_currentLineNumber == 1 && _currentColumnNumber == 2) { // shebang support - while (_state.codePtr <= _endPtr && !isLineTerminator()) { + while (_codePtr <= _endPtr && !isLineTerminator()) { scanChar(); } if (_engine) { - _engine->addComment(tokenOffset(), _state.codePtr - _state.tokenStartPtr - 1, - tokenStartLine(), tokenStartColumn()); + _engine->addComment(tokenOffset(), _codePtr - _tokenStartPtr - 1, tokenStartLine(), + tokenStartColumn()); } - goto again; + if (_lexMode == LexMode::LineByLine) + return T_COMMENT; + else + goto again; } Q_FALLTHROUGH(); @@ -754,26 +836,25 @@ again: } if (isIdentifierStart(c)) { if (identifierWithEscapeChars) { - _state.tokenText.resize(0); + _tokenText.resize(0); if (QChar::requiresSurrogates(c)) { - _state.tokenText += QChar(QChar::highSurrogate(c)); - _state.tokenText += QChar(QChar::lowSurrogate(c)); + _tokenText += QChar(QChar::highSurrogate(c)); + _tokenText += QChar(QChar::lowSurrogate(c)); } else { - _state.tokenText += QChar(c); + _tokenText += QChar(c); } _state.validTokenText = true; } - while (_state.codePtr <= _endPtr) { + while (_codePtr <= _endPtr) { c = _state.currentChar.unicode(); - if (QChar::isHighSurrogate(c) && QChar::isLowSurrogate(_state.codePtr->unicode())) { + if (QChar::isHighSurrogate(c) && QChar::isLowSurrogate(_codePtr->unicode())) { scanChar(); c = QChar::surrogateToUcs4(ushort(c), _state.currentChar.unicode()); - } else if (_state.currentChar == u'\\' && _state.codePtr[0] == u'u') { + } else if (_state.currentChar == u'\\' && _codePtr[0] == u'u') { if (!identifierWithEscapeChars) { identifierWithEscapeChars = true; - _state.tokenText.resize(0); - _state.tokenText.insert(0, _state.tokenStartPtr, - _state.codePtr - _state.tokenStartPtr - 1); + _tokenText.resize(0); + _tokenText.insert(0, _tokenStartPtr, _codePtr - _tokenStartPtr - 1); _state.validTokenText = true; } @@ -787,10 +868,10 @@ again: break; if (QChar::requiresSurrogates(c)) { - _state.tokenText += QChar(QChar::highSurrogate(c)); - _state.tokenText += QChar(QChar::lowSurrogate(c)); + _tokenText += QChar(QChar::highSurrogate(c)); + _tokenText += QChar(QChar::lowSurrogate(c)); } else { - _state.tokenText += QChar(c); + _tokenText += QChar(c); } continue; } @@ -800,43 +881,82 @@ again: if (identifierWithEscapeChars) { if (QChar::requiresSurrogates(c)) { - _state.tokenText += QChar(QChar::highSurrogate(c)); - _state.tokenText += QChar(QChar::lowSurrogate(c)); + _tokenText += QChar(QChar::highSurrogate(c)); + _tokenText += QChar(QChar::lowSurrogate(c)); } else { - _state.tokenText += QChar(c); + _tokenText += QChar(c); } } scanChar(); } - _state.tokenLength = _state.codePtr - _state.tokenStartPtr - 1; + _tokenLength = _codePtr - _tokenStartPtr - 1; int kind = T_IDENTIFIER; if (!identifierWithEscapeChars) - kind = classify(_state.tokenStartPtr, _state.tokenLength, parseModeFlags()); + kind = classify(_tokenStartPtr, _tokenLength, parseModeFlags()); if (kind == T_FUNCTION) { continue_skipping: - while (_state.codePtr < _endPtr && _state.currentChar.isSpace()) + while (_codePtr < _endPtr && _state.currentChar.isSpace()) scanChar(); if (_state.currentChar == u'*') { - _state.tokenLength = _state.codePtr - _state.tokenStartPtr - 1; + _tokenLength = _codePtr - _tokenStartPtr - 1; kind = T_FUNCTION_STAR; scanChar(); } else if (_state.currentChar == u'/') { scanChar(); - if (handleComment()) - goto continue_skipping; + switch (_state.currentChar.unicode()) { + case u'*': + scanChar(); + while (_codePtr <= _endPtr) { + if (_state.currentChar == u'*') { + scanChar(); + if (_state.currentChar == u'/') { + scanChar(); + if (_engine) { + _engine->addComment(tokenOffset() + 2, + _codePtr - _tokenStartPtr - 1 - 4, + tokenStartLine(), + tokenStartColumn() + 2); + } + if (_lexMode == LexMode::LineByLine) + return T_COMMENT; + goto continue_skipping; + } + } else { + scanChar(); + } + } + if (_lexMode == LexMode::LineByLine) + return T_PARTIAL_COMMENT; + else + goto continue_skipping; + case u'/': + while (_codePtr <= _endPtr && !isLineTerminator()) { + scanChar(); + } + if (_engine) { + _engine->addComment(tokenOffset() + 2, + _codePtr - _tokenStartPtr - 1 - 2, + tokenStartLine(), tokenStartColumn() + 2); + } + if (_lexMode == LexMode::LineByLine) + return T_COMMENT; + else + goto continue_skipping; + default: + break; + } } } if (_engine) { if (kind == T_IDENTIFIER && identifierWithEscapeChars) - _state.tokenSpell = _engine->newStringRef(_state.tokenText); + _tokenSpell = _engine->newStringRef(_tokenText); else - _state.tokenSpell = _engine->midRef(_state.tokenStartPtr - _code.unicode(), - _state.tokenLength); + _tokenSpell = _engine->midRef(_tokenStartPtr - _code.unicode(), _tokenLength); } return kind; @@ -852,77 +972,83 @@ again: int Lexer::scanString(ScanStringMode mode) { QChar quote = (mode == TemplateContinuation) ? QChar(TemplateHead) : QChar(mode); + // we actually use T_STRING_LITERAL also for multiline strings, should we want to + // change that we should set it to: + // _state.tokenKind == T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL || + // _state.tokenKind == T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL + // here and uncomment the multilineStringLiteral = true below. bool multilineStringLiteral = false; - const QChar *startCode = _state.codePtr - 1; + const QChar *startCode = _codePtr - 1; // in case we just parsed a \r, we need to reset this flag to get things working // correctly in the loop below and afterwards - _state.skipLinefeed = false; + _skipLinefeed = false; bool first = true; if (_engine) { - while (_state.codePtr <= _endPtr) { + while (_codePtr <= _endPtr) { if (isLineTerminator()) { if ((quote == u'`' || qmlMode())) { if (first) - --_state.currentLineNumber; // will be read again in scanChar() + --_currentLineNumber; // will be read again in scanChar() break; } _state.errorCode = IllegalCharacter; - _state.errorMessage = QCoreApplication::translate( - "QQmlParser", "Stray newline in string literal"); + _errorMessage = QCoreApplication::translate("QQmlParser", + "Stray newline in string literal"); return T_ERROR; } else if (_state.currentChar == u'\\') { break; } else if (_state.currentChar == u'$' && quote == u'`') { break; } else if (_state.currentChar == quote) { - _state.tokenSpell = _engine->midRef(startCode - _code.unicode(), - _state.codePtr - startCode - 1); - _state.rawString = _state.tokenSpell; + _tokenSpell = + _engine->midRef(startCode - _code.unicode(), _codePtr - startCode - 1); + _rawString = _tokenSpell; scanChar(); if (quote == u'`') _state.bracesCount = _state.outerTemplateBraceCount.pop(); - if (mode == TemplateHead) return T_NO_SUBSTITUTION_TEMPLATE; else if (mode == TemplateContinuation) return T_TEMPLATE_TAIL; + else if (multilineStringLiteral) + return T_MULTILINE_STRING_LITERAL; else return T_STRING_LITERAL; } // don't use scanChar() here, that would transform \r sequences and the midRef() call would create the wrong result - _state.currentChar = *_state.codePtr++; - ++_state.currentColumnNumber; + _state.currentChar = *_codePtr++; + ++_currentColumnNumber; first = false; } } // rewind by one char, so things gets scanned correctly - --_state.codePtr; - --_state.currentColumnNumber; + --_codePtr; + --_currentColumnNumber; _state.validTokenText = true; - _state.tokenText = QString(startCode, _state.codePtr - startCode); + _tokenText = QString(startCode, _codePtr - startCode); auto setRawString = [&](const QChar *end) { QString raw(startCode, end - startCode - 1); raw.replace(QLatin1String("\r\n"), QLatin1String("\n")); raw.replace(u'\r', u'\n'); - _state.rawString = _engine->newStringRef(raw); + _rawString = _engine->newStringRef(raw); }; scanChar(); - while (_state.codePtr <= _endPtr) { + while (_codePtr <= _endPtr) { if (_state.currentChar == quote) { scanChar(); if (_engine) { - _state.tokenSpell = _engine->newStringRef(_state.tokenText); + _tokenSpell = _engine->newStringRef(_tokenText); if (quote == u'`') - setRawString(_state.codePtr - 1); + setRawString(_codePtr - 1); } if (quote == u'`') @@ -934,21 +1060,21 @@ int Lexer::scanString(ScanStringMode mode) return T_NO_SUBSTITUTION_TEMPLATE; return multilineStringLiteral ? T_MULTILINE_STRING_LITERAL : T_STRING_LITERAL; - } else if (quote == u'`' && _state.currentChar == u'$' && *_state.codePtr == u'{') { + } else if (quote == u'`' && _state.currentChar == u'$' && *_codePtr == u'{') { scanChar(); scanChar(); _state.bracesCount = 1; if (_engine) { - _state.tokenSpell = _engine->newStringRef(_state.tokenText); - setRawString(_state.codePtr - 2); + _tokenSpell = _engine->newStringRef(_tokenText); + setRawString(_codePtr - 2); } return (mode == TemplateHead ? T_TEMPLATE_HEAD : T_TEMPLATE_MIDDLE); } else if (_state.currentChar == u'\\') { scanChar(); - if (_state.codePtr > _endPtr) { + if (_codePtr > _endPtr) { _state.errorCode = IllegalEscapeSequence; - _state.errorMessage = QCoreApplication::translate( + _errorMessage = QCoreApplication::translate( "QQmlParser", "End of file reached at escape sequence"); return T_ERROR; } @@ -964,7 +1090,7 @@ int Lexer::scanString(ScanStringMode mode) return T_ERROR; if (QChar::requiresSurrogates(codePoint)) { // need to use a surrogate pair - _state.tokenText += QChar(QChar::highSurrogate(codePoint)); + _tokenText += QChar(QChar::highSurrogate(codePoint)); u = QChar::lowSurrogate(codePoint); } else { u = QChar(codePoint); @@ -977,7 +1103,7 @@ int Lexer::scanString(ScanStringMode mode) u = decodeHexEscapeCharacter(&ok); if (!ok) { _state.errorCode = IllegalHexadecimalEscapeSequence; - _state.errorMessage = QCoreApplication::translate( + _errorMessage = QCoreApplication::translate( "QQmlParser", "Illegal hexadecimal escape sequence"); return T_ERROR; } @@ -995,7 +1121,7 @@ int Lexer::scanString(ScanStringMode mode) case u'v': u = u'\v'; scanChar(); break; case u'0': - if (!_state.codePtr->isDigit()) { + if (!_codePtr->isDigit()) { scanChar(); u = u'\0'; break; @@ -1011,7 +1137,7 @@ int Lexer::scanString(ScanStringMode mode) case u'8': case u'9': _state.errorCode = IllegalEscapeSequence; - _state.errorMessage = QCoreApplication::translate( + _errorMessage = QCoreApplication::translate( "QQmlParser", "Octal escape sequences are not allowed"); return T_ERROR; @@ -1019,6 +1145,8 @@ int Lexer::scanString(ScanStringMode mode) case u'\n': case 0x2028u: case 0x2029u: + // uncomment the following to use T_MULTILINE_STRING_LITERAL + // multilineStringLiteral = true; scanChar(); continue; @@ -1028,16 +1156,23 @@ int Lexer::scanString(ScanStringMode mode) scanChar(); } - _state.tokenText += u; + _tokenText += u; } else { - _state.tokenText += _state.currentChar; + _tokenText += _state.currentChar; scanChar(); } } - + if (_lexMode == LexMode::LineByLine && !_code.isEmpty()) { + if (mode == TemplateContinuation) + return T_PARTIAL_TEMPLATE_MIDDLE; + else if (mode == TemplateHead) + return T_PARTIAL_TEMPLATE_HEAD; + else if (mode == SingleQuote) + return T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL; + return T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL; + } _state.errorCode = UnclosedStringLiteral; - _state.errorMessage = - QCoreApplication::translate("QQmlParser", "Unclosed string at end of line"); + _errorMessage = QCoreApplication::translate("QQmlParser", "Unclosed string at end of line"); return T_ERROR; } @@ -1052,11 +1187,10 @@ int Lexer::scanNumber(QChar ch) if (!isHexDigit(_state.currentChar)) { _state.errorCode = IllegalNumber; - _state.errorMessage = - QCoreApplication::translate( - "QQmlParser", - "At least one hexadecimal digit is required after '0%1'") - .arg(ch); + _errorMessage = QCoreApplication::translate( + "QQmlParser", + "At least one hexadecimal digit is required after '0%1'") + .arg(ch); return T_ERROR; } @@ -1080,7 +1214,7 @@ int Lexer::scanNumber(QChar ch) if (!isOctalDigit(_state.currentChar.unicode())) { _state.errorCode = IllegalNumber; - _state.errorMessage = + _errorMessage = QCoreApplication::translate( "QQmlParser", "At least one octal digit is required after '0%1'") .arg(ch); @@ -1107,7 +1241,7 @@ int Lexer::scanNumber(QChar ch) if (_state.currentChar.unicode() != u'0' && _state.currentChar.unicode() != u'1') { _state.errorCode = IllegalNumber; - _state.errorMessage = + _errorMessage = QCoreApplication::translate( "QQmlParser", "At least one binary digit is required after '0%1'") .arg(ch); @@ -1130,8 +1264,8 @@ int Lexer::scanNumber(QChar ch) return T_NUMERIC_LITERAL; } else if (_state.currentChar.isDigit() && !qmlMode()) { _state.errorCode = IllegalCharacter; - _state.errorMessage = QCoreApplication::translate( - "QQmlParser", "Decimal numbers can't start with '0'"); + _errorMessage = QCoreApplication::translate("QQmlParser", + "Decimal numbers can't start with '0'"); return T_ERROR; } } @@ -1158,9 +1292,8 @@ int Lexer::scanNumber(QChar ch) } if (_state.currentChar == u'e' || _state.currentChar == u'E') { - if (_state.codePtr[0].isDigit() - || ((_state.codePtr[0] == u'+' || _state.codePtr[0] == u'-') - && _state.codePtr[1].isDigit())) { + if (_codePtr[0].isDigit() + || ((_codePtr[0] == u'+' || _codePtr[0] == u'-') && _codePtr[1].isDigit())) { chars.append(_state.currentChar.unicode()); scanChar(); // consume `e' @@ -1185,7 +1318,7 @@ int Lexer::scanNumber(QChar ch) if (end - begin != chars.size()) { _state.errorCode = IllegalExponentIndicator; - _state.errorMessage = + _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal syntax for exponential number"); return T_ERROR; } @@ -1215,12 +1348,12 @@ int Lexer::scanVersionNumber(QChar ch) bool Lexer::scanRegExp(RegExpBodyPrefix prefix) { - _state.tokenText.resize(0); + _tokenText.resize(0); _state.validTokenText = true; _state.patternFlags = 0; if (prefix == EqualPrefix) - _state.tokenText += u'='; + _tokenText += u'='; while (true) { switch (_state.currentChar.unicode()) { @@ -1232,78 +1365,77 @@ bool Lexer::scanRegExp(RegExpBodyPrefix prefix) while (isIdentLetter(_state.currentChar)) { int flag = regExpFlagFromChar(_state.currentChar); if (flag == 0 || _state.patternFlags & flag) { - _state.errorMessage = - QCoreApplication::translate("QQmlParser", - "Invalid regular expression flag '%0'") - .arg(QChar(_state.currentChar)); + _errorMessage = QCoreApplication::translate( + "QQmlParser", "Invalid regular expression flag '%0'") + .arg(QChar(_state.currentChar)); return false; } _state.patternFlags |= flag; scanChar(); } - _state.tokenLength = _state.codePtr - _state.tokenStartPtr - 1; + _tokenLength = _codePtr - _tokenStartPtr - 1; return true; case u'\\': // regular expression backslash sequence - _state.tokenText += _state.currentChar; + _tokenText += _state.currentChar; scanChar(); - if (_state.codePtr > _endPtr || isLineTerminator()) { - _state.errorMessage = QCoreApplication::translate( + if (_codePtr > _endPtr || isLineTerminator()) { + _errorMessage = QCoreApplication::translate( "QQmlParser", "Unterminated regular expression backslash sequence"); return false; } - _state.tokenText += _state.currentChar; + _tokenText += _state.currentChar; scanChar(); break; case u'[': // regular expression class - _state.tokenText += _state.currentChar; + _tokenText += _state.currentChar; scanChar(); - while (_state.codePtr <= _endPtr && !isLineTerminator()) { + while (_codePtr <= _endPtr && !isLineTerminator()) { if (_state.currentChar == u']') break; else if (_state.currentChar == u'\\') { // regular expression backslash sequence - _state.tokenText += _state.currentChar; + _tokenText += _state.currentChar; scanChar(); - if (_state.codePtr > _endPtr || isLineTerminator()) { - _state.errorMessage = QCoreApplication::translate( + if (_codePtr > _endPtr || isLineTerminator()) { + _errorMessage = QCoreApplication::translate( "QQmlParser", "Unterminated regular expression backslash sequence"); return false; } - _state.tokenText += _state.currentChar; + _tokenText += _state.currentChar; scanChar(); } else { - _state.tokenText += _state.currentChar; + _tokenText += _state.currentChar; scanChar(); } } if (_state.currentChar != u']') { - _state.errorMessage = QCoreApplication::translate( + _errorMessage = QCoreApplication::translate( "QQmlParser", "Unterminated regular expression class"); return false; } - _state.tokenText += _state.currentChar; + _tokenText += _state.currentChar; scanChar(); // skip ] break; default: - if (_state.codePtr > _endPtr || isLineTerminator()) { - _state.errorMessage = QCoreApplication::translate( + if (_codePtr > _endPtr || isLineTerminator()) { + _errorMessage = QCoreApplication::translate( "QQmlParser", "Unterminated regular expression literal"); return false; } else { - _state.tokenText += _state.currentChar; + _tokenText += _state.currentChar; scanChar(); } } // switch @@ -1329,7 +1461,7 @@ unsigned Lexer::isLineTerminatorSequence() const case 0x2029u: return 1; case 0x000Du: - if (_state.codePtr->unicode() == 0x000Au) + if (_codePtr->unicode() == 0x000Au) return 2; else return 1; @@ -1371,12 +1503,12 @@ bool Lexer::isOctalDigit(ushort c) QString Lexer::tokenText() const { if (_state.validTokenText) - return _state.tokenText; + return _tokenText; if (_state.tokenKind == T_STRING_LITERAL) - return QString(_state.tokenStartPtr + 1, _state.tokenLength - 2); + return QString(_tokenStartPtr + 1, _tokenLength - 2); - return QString(_state.tokenStartPtr, _state.tokenLength); + return QString(_tokenStartPtr, _tokenLength); } Lexer::Error Lexer::errorCode() const @@ -1386,7 +1518,7 @@ Lexer::Error Lexer::errorCode() const QString Lexer::errorMessage() const { - return _state.errorMessage; + return _errorMessage; } void Lexer::syncProhibitAutomaticSemicolon() diff --git a/src/qml/parser/qqmljslexer_p.h b/src/qml/parser/qqmljslexer_p.h index 7bcc3def36..a49c1a8f76 100644 --- a/src/qml/parser/qqmljslexer_p.h +++ b/src/qml/parser/qqmljslexer_p.h @@ -93,8 +93,12 @@ public: NoQmlImport }; + enum class LexMode { WholeCode, LineByLine }; + + enum class CodeContinuation { Reset, Continue }; + public: - Lexer(Engine *engine); + Lexer(Engine *engine, LexMode lexMode = LexMode::WholeCode); int parseModeFlags() const { int flags = 0; @@ -112,7 +116,8 @@ public: void setStaticIsKeyword(bool b) { _staticIsKeyword = b; } QString code() const; - void setCode(const QString &code, int lineno, bool qmlMode = true); + void setCode(const QString &code, int lineno, bool qmlMode = true, + CodeContinuation codeContinuation = CodeContinuation::Reset); int lex(); @@ -120,17 +125,17 @@ public: bool scanDirectives(Directives *directives, DiagnosticMessage *error); int regExpFlags() const { return _state.patternFlags; } - QString regExpPattern() const { return _state.tokenText; } + QString regExpPattern() const { return _tokenText; } int tokenKind() const { return _state.tokenKind; } - int tokenOffset() const { return _state.tokenStartPtr - _code.unicode(); } - int tokenLength() const { return _state.tokenLength; } + int tokenOffset() const { return _currentOffset + _tokenStartPtr - _code.unicode(); } + int tokenLength() const { return _tokenLength; } - int tokenStartLine() const { return _state.tokenLine; } - int tokenStartColumn() const { return _state.tokenColumn; } + int tokenStartLine() const { return _tokenLine; } + int tokenStartColumn() const { return _tokenColumn; } - inline QStringView tokenSpell() const { return _state.tokenSpell; } - inline QStringView rawString() const { return _state.rawString; } + inline QStringView tokenSpell() const { return _tokenSpell; } + inline QStringView rawString() const { return _rawString; } double tokenValue() const { return _state.tokenValue; } QString tokenText() const; @@ -147,24 +152,16 @@ public: BalancedParentheses }; + enum class CommentState { NoComment, HadComment, InMultilineComment }; + void enterGeneratorBody() { ++_state.generatorLevel; } void leaveGeneratorBody() { --_state.generatorLevel; } struct State { - QString tokenText; - QString errorMessage; - QStringView tokenSpell; - QStringView rawString; - - const QChar *codePtr = nullptr; - const QChar *tokenStartPtr = nullptr; - - QChar currentChar = u'\n'; Error errorCode = NoError; - int currentLineNumber = 0; - int currentColumnNumber = 0; + QChar currentChar = u'\n'; double tokenValue = 0; // parentheses state @@ -179,9 +176,6 @@ public: int patternFlags = 0; int tokenKind = 0; - int tokenLength = 0; - int tokenLine = 0; - int tokenColumn = 0; ImportState importState = ImportState::NoQmlImport; bool validTokenText = false; @@ -190,9 +184,54 @@ public: bool terminator = false; bool followsClosingBrace = false; bool delimited = true; - bool skipLinefeed = false; bool handlingDirectives = false; + CommentState comments = CommentState::NoComment; int generatorLevel = 0; + + friend bool operator==(State const &s1, State const &s2) + { + if (s1.errorCode != s2.errorCode) + return false; + if (s1.currentChar != s2.currentChar) + return false; + if (s1.tokenValue != s2.tokenValue) + return false; + if (s1.parenthesesState != s2.parenthesesState) + return false; + if (s1.parenthesesCount != s2.parenthesesCount) + return false; + if (s1.outerTemplateBraceCount != s2.outerTemplateBraceCount) + return false; + if (s1.bracesCount != s2.bracesCount) + return false; + if (s1.stackToken != s2.stackToken) + return false; + if (s1.patternFlags != s2.patternFlags) + return false; + if (s1.tokenKind != s2.tokenKind) + return false; + if (s1.importState != s2.importState) + return false; + if (s1.validTokenText != s2.validTokenText) + return false; + if (s1.prohibitAutomaticSemicolon != s2.prohibitAutomaticSemicolon) + return false; + if (s1.restrictedKeyword != s2.restrictedKeyword) + return false; + if (s1.terminator != s2.terminator) + return false; + if (s1.followsClosingBrace != s2.followsClosingBrace) + return false; + if (s1.delimited != s2.delimited) + return false; + if (s1.handlingDirectives != s2.handlingDirectives) + return false; + if (s1.generatorLevel != s2.generatorLevel) + return false; + return true; + } + + friend bool operator!=(State const &s1, State const &s2) { return !(s1 == s2); } }; const State &state() const; @@ -229,11 +268,30 @@ private: private: Engine *_engine; + LexMode _lexMode = LexMode::WholeCode; QString _code; const QChar *_endPtr; bool _qmlMode; bool _staticIsKeyword = false; + bool _skipLinefeed = false; + + int _currentLineNumber = 0; + int _currentColumnNumber = 0; + int _currentOffset = 0; + + int _tokenLength = 0; + int _tokenLine = 0; + int _tokenColumn = 0; + + QString _tokenText; + QString _errorMessage; + QStringView _tokenSpell; + QStringView _rawString; + + const QChar *_codePtr = nullptr; + const QChar *_tokenStartPtr = nullptr; + State _state; }; |