diff options
Diffstat (limited to 'src/qml/qml/parser/qqmljslexer.cpp')
-rw-r--r-- | src/qml/qml/parser/qqmljslexer.cpp | 332 |
1 files changed, 242 insertions, 90 deletions
diff --git a/src/qml/qml/parser/qqmljslexer.cpp b/src/qml/qml/parser/qqmljslexer.cpp index ec9b718917..cb78238f99 100644 --- a/src/qml/qml/parser/qqmljslexer.cpp +++ b/src/qml/qml/parser/qqmljslexer.cpp @@ -42,10 +42,11 @@ #include "qqmljslexer_p.h" #include "qqmljsengine_p.h" #include "qqmljsmemorypool_p.h" +#include "qqmljskeywords_p.h" -#include <QtCore/QCoreApplication> -#include <QtCore/QVarLengthArray> -#include <QtCore/QDebug> +#include <QtCore/qcoreapplication.h> +#include <QtCore/qvarlengtharray.h> +#include <QtCore/qdebug.h> QT_BEGIN_NAMESPACE Q_CORE_EXPORT double qstrtod(const char *s00, char const **se, bool *ok); @@ -53,7 +54,7 @@ QT_END_NAMESPACE using namespace QQmlJS; -static int regExpFlagFromChar(const QChar &ch) +static inline int regExpFlagFromChar(const QChar &ch) { switch (ch.unicode()) { case 'g': return Lexer::RegExp_Global; @@ -63,7 +64,7 @@ static int regExpFlagFromChar(const QChar &ch) return 0; } -static unsigned char convertHex(ushort c) +static inline unsigned char convertHex(ushort c) { if (c >= '0' && c <= '9') return (c - '0'); @@ -73,12 +74,12 @@ static unsigned char convertHex(ushort c) return (c - 'A' + 10); } -static QChar convertHex(QChar c1, QChar c2) +static inline QChar convertHex(QChar c1, QChar c2) { return QChar((convertHex(c1.unicode()) << 4) + convertHex(c2.unicode())); } -static QChar convertUnicode(QChar c1, QChar c2, QChar c3, QChar c4) +static inline QChar convertUnicode(QChar c1, QChar c2, QChar c3, QChar c4) { return QChar((convertHex(c3.unicode()) << 4) + convertHex(c4.unicode()), (convertHex(c1.unicode()) << 4) + convertHex(c2.unicode())); @@ -136,6 +137,7 @@ void Lexer::setCode(const QString &code, int lineno, bool qmlMode) _tokenSpell = QStringRef(); _codePtr = code.unicode(); + _endPtr = _codePtr + code.length(); _lastLinePtr = _codePtr; _tokenLinePtr = _codePtr; _tokenStartPtr = _codePtr; @@ -177,6 +179,52 @@ void Lexer::scanChar() } } +namespace { +inline bool isBinop(int tok) +{ + switch (tok) { + case Lexer::T_AND: + case Lexer::T_AND_AND: + case Lexer::T_AND_EQ: + case Lexer::T_DIVIDE_: + case Lexer::T_DIVIDE_EQ: + case Lexer::T_EQ: + case Lexer::T_EQ_EQ: + case Lexer::T_EQ_EQ_EQ: + case Lexer::T_GE: + case Lexer::T_GT: + case Lexer::T_GT_GT: + case Lexer::T_GT_GT_EQ: + case Lexer::T_GT_GT_GT: + case Lexer::T_GT_GT_GT_EQ: + case Lexer::T_LE: + case Lexer::T_LT: + case Lexer::T_LT_LT: + case Lexer::T_LT_LT_EQ: + case Lexer::T_MINUS: + case Lexer::T_MINUS_EQ: + case Lexer::T_NOT_EQ: + case Lexer::T_NOT_EQ_EQ: + case Lexer::T_OR: + case Lexer::T_OR_EQ: + case Lexer::T_OR_OR: + case Lexer::T_PLUS: + case Lexer::T_PLUS_EQ: + case Lexer::T_REMAINDER: + case Lexer::T_REMAINDER_EQ: + case Lexer::T_RETURN: + case Lexer::T_STAR: + case Lexer::T_STAR_EQ: + case Lexer::T_XOR: + case Lexer::T_XOR_EQ: + return true; + + default: + return false; + } +} +} // anonymous namespace + int Lexer::lex() { const int previousTokenKind = _tokenKind; @@ -193,9 +241,15 @@ int Lexer::lex() switch (_tokenKind) { case T_LBRACE: case T_SEMICOLON: + case T_QUESTION: case T_COLON: + case T_TILDE: _delimited = true; break; + default: + if (isBinop(_tokenKind)) + _delimited = true; + break; case T_IF: case T_FOR: @@ -275,6 +329,80 @@ QChar Lexer::decodeUnicodeEscapeCharacter(bool *ok) return QChar(); } +QChar Lexer::decodeHexEscapeCharacter(bool *ok) +{ + if (isHexDigit(_codePtr[0]) && isHexDigit(_codePtr[1])) { + scanChar(); + + const QChar c1 = _char; + scanChar(); + + const QChar c2 = _char; + scanChar(); + + if (ok) + *ok = true; + + return convertHex(c1, c2); + } + + *ok = false; + return QChar(); +} + +static inline bool isIdentifierStart(QChar ch) +{ + // fast path for ascii + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || + (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || + ch == '$' || ch == '_') + return true; + + switch (ch.category()) { + case QChar::Number_Letter: + case QChar::Letter_Uppercase: + case QChar::Letter_Lowercase: + case QChar::Letter_Titlecase: + case QChar::Letter_Modifier: + case QChar::Letter_Other: + return true; + default: + break; + } + return false; +} + +static bool isIdentifierPart(QChar ch) +{ + // fast path for ascii + if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') || + (ch.unicode() >= 'A' && ch.unicode() <= 'Z') || + (ch.unicode() >= '0' && ch.unicode() <= '9') || + ch == '$' || ch == '_' || + ch.unicode() == 0x200c /* ZWNJ */ || ch.unicode() == 0x200d /* ZWJ */) + return true; + + switch (ch.category()) { + case QChar::Mark_NonSpacing: + case QChar::Mark_SpacingCombining: + + case QChar::Number_DecimalDigit: + case QChar::Number_Letter: + + case QChar::Letter_Uppercase: + case QChar::Letter_Lowercase: + case QChar::Letter_Titlecase: + case QChar::Letter_Modifier: + case QChar::Letter_Other: + + case QChar::Punctuation_Connector: + return true; + default: + break; + } + return false; +} + int Lexer::scanToken() { if (_stackToken != -1) { @@ -310,7 +438,7 @@ again: _tokenStartPtr = _codePtr - 1; _tokenLine = _currentLineNumber; - if (_char.isNull()) + if (_codePtr > _endPtr) return EOF_SYMBOL; const QChar ch = _char; @@ -395,7 +523,7 @@ again: case '/': if (_char == QLatin1Char('*')) { scanChar(); - while (!_char.isNull()) { + while (_codePtr <= _endPtr) { if (_char == QLatin1Char('*')) { scanChar(); if (_char == QLatin1Char('/')) { @@ -413,7 +541,7 @@ again: } } } else if (_char == QLatin1Char('/')) { - while (!_char.isNull() && !isLineTerminator()) { + while (_codePtr <= _endPtr && !isLineTerminator()) { scanChar(); } if (_engine) { @@ -555,8 +683,14 @@ again: const QChar *startCode = _codePtr; if (_engine) { - while (!_char.isNull()) { - if (isLineTerminator() || _char == QLatin1Char('\\')) { + while (_codePtr <= _endPtr) { + if (isLineTerminator()) { + if (qmlMode()) + break; + _errorCode = IllegalCharacter; + _errorMessage = QCoreApplication::translate("QQmlParser", "Stray newline in string literal"); + return T_ERROR; + } else if (_char == QLatin1Char('\\')) { break; } else if (_char == quote) { _tokenSpell = _engine->midRef(startCode - _code.unicode() - 1, _codePtr - startCode); @@ -574,7 +708,7 @@ again: while (startCode != _codePtr - 1) _tokenText += *startCode++; - while (! _char.isNull()) { + while (_codePtr <= _endPtr) { if (unsigned sequenceLength = isLineTerminatorSequence()) { multilineStringLiteral = true; _tokenText += _char; @@ -592,32 +726,29 @@ again: scanChar(); QChar u; - bool ok = false; switch (_char.unicode()) { // unicode escape sequence - case 'u': + case 'u': { + bool ok = false; u = decodeUnicodeEscapeCharacter(&ok); - if (! ok) - u = _char; - break; + if (! ok) { + _errorCode = IllegalUnicodeEscapeSequence; + _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal unicode escape sequence"); + return T_ERROR; + } + } break; // hex escape sequence - case 'x': - if (isHexDigit(_codePtr[0]) && isHexDigit(_codePtr[1])) { - scanChar(); - - const QChar c1 = _char; - scanChar(); - - const QChar c2 = _char; - scanChar(); - - u = convertHex(c1, c2); - } else { - u = _char; + case 'x': { + bool ok = false; + u = decodeHexEscapeCharacter(&ok); + if (!ok) { + _errorCode = IllegalHexadecimalEscapeSequence; + _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal hexadecimal escape sequence"); + return T_ERROR; } - break; + } break; // single character escape sequence case '\\': u = QLatin1Char('\\'); scanChar(); break; @@ -631,32 +762,31 @@ again: case 'v': u = QLatin1Char('\v'); scanChar(); break; case '0': - if (! _codePtr[1].isDigit()) { + if (! _codePtr->isDigit()) { scanChar(); u = QLatin1Char('\0'); - } else { - // ### parse deprecated octal escape sequence ? - u = _char; + break; } - break; + // fall through + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + _errorCode = IllegalEscapeSequence; + _errorMessage = QCoreApplication::translate("QQmlParser", "Octal escape sequences are not allowed"); + return T_ERROR; case '\r': - if (isLineTerminatorSequence() == 2) { - _tokenText += QLatin1Char('\r'); - u = QLatin1Char('\n'); - } else { - u = QLatin1Char('\r'); - } - scanChar(); - break; - case '\n': case 0x2028u: case 0x2029u: - u = _char; scanChar(); - break; - + continue; default: // non escape character @@ -687,28 +817,28 @@ again: case '9': return scanNumber(ch); - default: - if (ch.isLetter() || ch == QLatin1Char('$') || ch == QLatin1Char('_') || (ch == QLatin1Char('\\') && _char == QLatin1Char('u'))) { - bool identifierWithEscapeChars = false; - if (ch == QLatin1Char('\\')) { - identifierWithEscapeChars = true; + default: { + QChar c = ch; + bool identifierWithEscapeChars = false; + if (c == QLatin1Char('\\') && _char == QLatin1Char('u')) { + identifierWithEscapeChars = true; + bool ok = false; + c = decodeUnicodeEscapeCharacter(&ok); + if (! ok) { + _errorCode = IllegalUnicodeEscapeSequence; + _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal unicode escape sequence"); + return T_ERROR; + } + } + if (isIdentifierStart(c)) { + if (identifierWithEscapeChars) { _tokenText.resize(0); - bool ok = false; - _tokenText += decodeUnicodeEscapeCharacter(&ok); + _tokenText += c; _validTokenText = true; - if (! ok) { - _errorCode = IllegalUnicodeEscapeSequence; - _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal unicode escape sequence"); - return T_ERROR; - } } while (true) { - if (_char.isLetterOrNumber() || _char == QLatin1Char('$') || _char == QLatin1Char('_')) { - if (identifierWithEscapeChars) - _tokenText += _char; - - scanChar(); - } else if (_char == QLatin1Char('\\') && _codePtr[0] == QLatin1Char('u')) { + c = _char; + if (_char == QLatin1Char('\\') && _codePtr[0] == QLatin1Char('u')) { if (! identifierWithEscapeChars) { identifierWithEscapeChars = true; _tokenText.resize(0); @@ -718,31 +848,41 @@ again: scanChar(); // skip '\\' bool ok = false; - _tokenText += decodeUnicodeEscapeCharacter(&ok); + c = decodeUnicodeEscapeCharacter(&ok); if (! ok) { _errorCode = IllegalUnicodeEscapeSequence; _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal unicode escape sequence"); return T_ERROR; } - } else { - _tokenLength = _codePtr - _tokenStartPtr - 1; + if (isIdentifierPart(c)) + _tokenText += c; + continue; + } else if (isIdentifierPart(c)) { + if (identifierWithEscapeChars) + _tokenText += c; - int kind = T_IDENTIFIER; + scanChar(); + continue; + } - if (! identifierWithEscapeChars) - kind = classify(_tokenStartPtr, _tokenLength, _qmlMode); + _tokenLength = _codePtr - _tokenStartPtr - 1; - if (_engine) { - if (kind == T_IDENTIFIER && identifierWithEscapeChars) - _tokenSpell = _engine->newStringRef(_tokenText); - else - _tokenSpell = _engine->midRef(_tokenStartPtr - _code.unicode(), _tokenLength); - } + int kind = T_IDENTIFIER; + + if (! identifierWithEscapeChars) + kind = classify(_tokenStartPtr, _tokenLength, _qmlMode); - return kind; + if (_engine) { + if (kind == T_IDENTIFIER && identifierWithEscapeChars) + _tokenSpell = _engine->newStringRef(_tokenText); + else + _tokenSpell = _engine->midRef(_tokenStartPtr - _code.unicode(), _tokenLength); } + + return kind; } } + } break; } @@ -753,12 +893,14 @@ again: int Lexer::scanNumber(QChar ch) { if (ch != QLatin1Char('0')) { - double integer = ch.unicode() - '0'; + QByteArray buf; + buf.reserve(64); + buf += ch.toLatin1(); QChar n = _char; const QChar *code = _codePtr; while (n.isDigit()) { - integer = integer * 10 + (n.unicode() - '0'); + buf += n.toLatin1(); n = *code++; } @@ -767,17 +909,23 @@ int Lexer::scanNumber(QChar ch) _codePtr = code - 1; scanChar(); } - _tokenValue = integer; + buf.append('\0'); + _tokenValue = strtod(buf.constData(), 0); return T_NUMERIC_LITERAL; } + } else if (_char.isDigit() && !qmlMode()) { + _errorCode = IllegalCharacter; + _errorMessage = QCoreApplication::translate("QQmlParser", "Decimal numbers can't start with '0'"); + return T_ERROR; } QVarLengthArray<char,32> chars; chars.append(ch.unicode()); if (ch == QLatin1Char('0') && (_char == QLatin1Char('x') || _char == QLatin1Char('X'))) { - // parse hex integer literal + ch = _char; // remember the x or X to use it in the error message below. + // parse hex integer literal chars.append(_char.unicode()); scanChar(); // consume `x' @@ -786,6 +934,12 @@ int Lexer::scanNumber(QChar ch) scanChar(); } + if (chars.size() < 3) { + _errorCode = IllegalHexNumber; + _errorMessage = QCoreApplication::translate("QQmlParser", "At least one hexadecimal digit is required after '0%1'").arg(ch); + return T_ERROR; + } + _tokenValue = integerFromString(chars.constData(), chars.size(), 16); return T_NUMERIC_LITERAL; } @@ -900,7 +1054,7 @@ bool Lexer::scanRegExp(RegExpBodyPrefix prefix) _tokenText += _char; scanChar(); - if (_char.isNull() || isLineTerminator()) { + if (_codePtr > _endPtr || isLineTerminator()) { _errorMessage = QCoreApplication::translate("QQmlParser", "Unterminated regular expression backslash sequence"); return false; } @@ -914,7 +1068,7 @@ bool Lexer::scanRegExp(RegExpBodyPrefix prefix) _tokenText += _char; scanChar(); - while (! _char.isNull() && ! isLineTerminator()) { + while (_codePtr <= _endPtr && ! isLineTerminator()) { if (_char == QLatin1Char(']')) break; else if (_char == QLatin1Char('\\')) { @@ -922,7 +1076,7 @@ bool Lexer::scanRegExp(RegExpBodyPrefix prefix) _tokenText += _char; scanChar(); - if (_char.isNull() || isLineTerminator()) { + if (_codePtr > _endPtr || isLineTerminator()) { _errorMessage = QCoreApplication::translate("QQmlParser", "Unterminated regular expression backslash sequence"); return false; } @@ -945,7 +1099,7 @@ bool Lexer::scanRegExp(RegExpBodyPrefix prefix) break; default: - if (_char.isNull() || isLineTerminator()) { + if (_codePtr > _endPtr || isLineTerminator()) { _errorMessage = QCoreApplication::translate("QQmlParser", "Unterminated regular expression literal"); return false; } else { @@ -1172,5 +1326,3 @@ bool Lexer::scanDirectives(Directives *directives) return true; } - -#include "qqmljskeywords_p.h" |