1 files changed, 242 insertions, 90 deletions
diff --git a/src/qml/qml/parser/qqmljslexer.cpp b/src/qml/qml/parser/qqmljslexer.cpp
index ec9b718917..cb78238f99 100644
--- a/src/qml/qml/parser/qqmljslexer.cpp
+++ b/src/qml/qml/parser/qqmljslexer.cpp
@@ -42,10 +42,11 @@
 #include "qqmljslexer_p.h"
 #include "qqmljsengine_p.h"
 #include "qqmljsmemorypool_p.h"
+#include "qqmljskeywords_p.h"
 
-#include <QtCore/QCoreApplication>
-#include <QtCore/QVarLengthArray>
-#include <QtCore/QDebug>
+#include <QtCore/qcoreapplication.h>
+#include <QtCore/qvarlengtharray.h>
+#include <QtCore/qdebug.h>
 
 QT_BEGIN_NAMESPACE
 Q_CORE_EXPORT double qstrtod(const char *s00, char const **se, bool *ok);
@@ -53,7 +54,7 @@ QT_END_NAMESPACE
 
 using namespace QQmlJS;
 
-static int regExpFlagFromChar(const QChar &ch)
+static inline int regExpFlagFromChar(const QChar &ch)
 {
     switch (ch.unicode()) {
     case 'g': return Lexer::RegExp_Global;
@@ -63,7 +64,7 @@ static int regExpFlagFromChar(const QChar &ch)
     return 0;
 }
 
-static unsigned char convertHex(ushort c)
+static inline unsigned char convertHex(ushort c)
 {
     if (c >= '0' && c <= '9')
         return (c - '0');
@@ -73,12 +74,12 @@ static unsigned char convertHex(ushort c)
         return (c - 'A' + 10);
 }
 
-static QChar convertHex(QChar c1, QChar c2)
+static inline QChar convertHex(QChar c1, QChar c2)
 {
     return QChar((convertHex(c1.unicode()) << 4) + convertHex(c2.unicode()));
 }
 
-static QChar convertUnicode(QChar c1, QChar c2, QChar c3, QChar c4)
+static inline QChar convertUnicode(QChar c1, QChar c2, QChar c3, QChar c4)
 {
     return QChar((convertHex(c3.unicode()) << 4) + convertHex(c4.unicode()),
                  (convertHex(c1.unicode()) << 4) + convertHex(c2.unicode()));
@@ -136,6 +137,7 @@ void Lexer::setCode(const QString &code, int lineno, bool qmlMode)
     _tokenSpell = QStringRef();
 
     _codePtr = code.unicode();
+    _endPtr = _codePtr + code.length();
     _lastLinePtr = _codePtr;
     _tokenLinePtr = _codePtr;
     _tokenStartPtr = _codePtr;
@@ -177,6 +179,52 @@ void Lexer::scanChar()
     }
 }
 
+namespace {
+inline bool isBinop(int tok)
+{
+    switch (tok) {
+    case Lexer::T_AND:
+    case Lexer::T_AND_AND:
+    case Lexer::T_AND_EQ:
+    case Lexer::T_DIVIDE_:
+    case Lexer::T_DIVIDE_EQ:
+    case Lexer::T_EQ:
+    case Lexer::T_EQ_EQ:
+    case Lexer::T_EQ_EQ_EQ:
+    case Lexer::T_GE:
+    case Lexer::T_GT:
+    case Lexer::T_GT_GT:
+    case Lexer::T_GT_GT_EQ:
+    case Lexer::T_GT_GT_GT:
+    case Lexer::T_GT_GT_GT_EQ:
+    case Lexer::T_LE:
+    case Lexer::T_LT:
+    case Lexer::T_LT_LT:
+    case Lexer::T_LT_LT_EQ:
+    case Lexer::T_MINUS:
+    case Lexer::T_MINUS_EQ:
+    case Lexer::T_NOT_EQ:
+    case Lexer::T_NOT_EQ_EQ:
+    case Lexer::T_OR:
+    case Lexer::T_OR_EQ:
+    case Lexer::T_OR_OR:
+    case Lexer::T_PLUS:
+    case Lexer::T_PLUS_EQ:
+    case Lexer::T_REMAINDER:
+    case Lexer::T_REMAINDER_EQ:
+    case Lexer::T_RETURN:
+    case Lexer::T_STAR:
+    case Lexer::T_STAR_EQ:
+    case Lexer::T_XOR:
+    case Lexer::T_XOR_EQ:
+        return true;
+
+    default:
+        return false;
+    }
+}
+} // anonymous namespace
+
 int Lexer::lex()
 {
     const int previousTokenKind = _tokenKind;
@@ -193,9 +241,15 @@ int Lexer::lex()
     switch (_tokenKind) {
     case T_LBRACE:
     case T_SEMICOLON:
+    case T_QUESTION:
     case T_COLON:
+    case T_TILDE:
         _delimited = true;
         break;
+    default:
+        if (isBinop(_tokenKind))
+            _delimited = true;
+        break;
 
     case T_IF:
     case T_FOR:
@@ -275,6 +329,80 @@ QChar Lexer::decodeUnicodeEscapeCharacter(bool *ok)
     return QChar();
 }
 
+QChar Lexer::decodeHexEscapeCharacter(bool *ok)
+{
+    if (isHexDigit(_codePtr[0]) && isHexDigit(_codePtr[1])) {
+        scanChar();
+
+        const QChar c1 = _char;
+        scanChar();
+
+        const QChar c2 = _char;
+        scanChar();
+
+        if (ok)
+            *ok = true;
+
+        return convertHex(c1, c2);
+    }
+
+    *ok = false;
+    return QChar();
+}
+
+static inline bool isIdentifierStart(QChar ch)
+{
+    // fast path for ascii
+    if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') ||
+        (ch.unicode() >= 'A' && ch.unicode() <= 'Z') ||
+        ch == '$' || ch == '_')
+        return true;
+
+    switch (ch.category()) {
+    case QChar::Number_Letter:
+    case QChar::Letter_Uppercase:
+    case QChar::Letter_Lowercase:
+    case QChar::Letter_Titlecase:
+    case QChar::Letter_Modifier:
+    case QChar::Letter_Other:
+        return true;
+    default:
+        break;
+    }
+    return false;
+}
+
+static bool isIdentifierPart(QChar ch)
+{
+    // fast path for ascii
+    if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') ||
+        (ch.unicode() >= 'A' && ch.unicode() <= 'Z') ||
+        (ch.unicode() >= '0' && ch.unicode() <= '9') ||
+        ch == '$' || ch == '_' ||
+        ch.unicode() == 0x200c /* ZWNJ */ || ch.unicode() == 0x200d /* ZWJ */)
+        return true;
+
+    switch (ch.category()) {
+    case QChar::Mark_NonSpacing:
+    case QChar::Mark_SpacingCombining:
+
+    case QChar::Number_DecimalDigit:
+    case QChar::Number_Letter:
+
+    case QChar::Letter_Uppercase:
+    case QChar::Letter_Lowercase:
+    case QChar::Letter_Titlecase:
+    case QChar::Letter_Modifier:
+    case QChar::Letter_Other:
+
+    case QChar::Punctuation_Connector:
+        return true;
+    default:
+        break;
+    }
+    return false;
+}
+
 int Lexer::scanToken()
 {
     if (_stackToken != -1) {
@@ -310,7 +438,7 @@ again:
     _tokenStartPtr = _codePtr - 1;
     _tokenLine = _currentLineNumber;
 
-    if (_char.isNull())
+    if (_codePtr > _endPtr)
         return EOF_SYMBOL;
 
     const QChar ch = _char;
@@ -395,7 +523,7 @@ again:
     case '/':
         if (_char == QLatin1Char('*')) {
             scanChar();
-            while (!_char.isNull()) {
+            while (_codePtr <= _endPtr) {
                 if (_char == QLatin1Char('*')) {
                     scanChar();
                     if (_char == QLatin1Char('/')) {
@@ -413,7 +541,7 @@ again:
                 }
             }
         } else if (_char == QLatin1Char('/')) {
-            while (!_char.isNull() && !isLineTerminator()) {
+            while (_codePtr <= _endPtr && !isLineTerminator()) {
                 scanChar();
             }
             if (_engine) {
@@ -555,8 +683,14 @@ again:
         const QChar *startCode = _codePtr;
 
         if (_engine) {
-            while (!_char.isNull()) {
-                if (isLineTerminator() || _char == QLatin1Char('\\')) {
+            while (_codePtr <= _endPtr) {
+                if (isLineTerminator()) {
+                    if (qmlMode())
+                        break;
+                    _errorCode = IllegalCharacter;
+                    _errorMessage = QCoreApplication::translate("QQmlParser", "Stray newline in string literal");
+                    return T_ERROR;
+                } else if (_char == QLatin1Char('\\')) {
                     break;
                 } else if (_char == quote) {
                     _tokenSpell = _engine->midRef(startCode - _code.unicode() - 1, _codePtr - startCode);
@@ -574,7 +708,7 @@ again:
         while (startCode != _codePtr - 1) 
             _tokenText += *startCode++;
 
-        while (! _char.isNull()) {
+        while (_codePtr <= _endPtr) {
             if (unsigned sequenceLength = isLineTerminatorSequence()) {
                 multilineStringLiteral = true;
                 _tokenText += _char;
@@ -592,32 +726,29 @@ again:
                 scanChar();
 
                 QChar u;
-                bool ok = false;
 
                 switch (_char.unicode()) {
                 // unicode escape sequence
-                case 'u':
+                case 'u': {
+                    bool ok = false;
                     u = decodeUnicodeEscapeCharacter(&ok);
-                    if (! ok)
-                        u = _char;
-                    break;
+                    if (! ok) {
+                        _errorCode = IllegalUnicodeEscapeSequence;
+                        _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal unicode escape sequence");
+                        return T_ERROR;
+                    }
+                } break;
 
                 // hex escape sequence
-                case 'x':
-                    if (isHexDigit(_codePtr[0]) && isHexDigit(_codePtr[1])) {
-                        scanChar();
-
-                        const QChar c1 = _char;
-                        scanChar();
-
-                        const QChar c2 = _char;
-                        scanChar();
-
-                        u = convertHex(c1, c2);
-                    } else {
-                        u = _char;
+                case 'x': {
+                    bool ok = false;
+                    u = decodeHexEscapeCharacter(&ok);
+                    if (!ok) {
+                        _errorCode = IllegalHexadecimalEscapeSequence;
+                        _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal hexadecimal escape sequence");
+                        return T_ERROR;
                     }
-                    break;
+                } break;
 
                 // single character escape sequence
                 case '\\': u = QLatin1Char('\\'); scanChar(); break;
@@ -631,32 +762,31 @@ again:
                 case 'v':  u = QLatin1Char('\v'); scanChar(); break;
 
                 case '0':
-                    if (! _codePtr[1].isDigit()) {
+                    if (! _codePtr->isDigit()) {
                         scanChar();
                         u = QLatin1Char('\0');
-                    } else {
-                        // ### parse deprecated octal escape sequence ?
-                        u = _char;
+                        break;
                     }
-                    break;
+                    // fall through
+                case '1':
+                case '2':
+                case '3':
+                case '4':
+                case '5':
+                case '6':
+                case '7':
+                case '8':
+                case '9':
+                    _errorCode = IllegalEscapeSequence;
+                    _errorMessage = QCoreApplication::translate("QQmlParser", "Octal escape sequences are not allowed");
+                    return T_ERROR;
 
                 case '\r':
-                    if (isLineTerminatorSequence() == 2) {
-                        _tokenText += QLatin1Char('\r');
-                        u = QLatin1Char('\n');
-                    } else {
-                        u = QLatin1Char('\r');
-                    }
-                    scanChar();
-                    break;
-
                 case '\n':
                 case 0x2028u:
                 case 0x2029u:
-                    u = _char;
                     scanChar();
-                    break;
-
+                    continue;
 
                 default:
                     // non escape character
@@ -687,28 +817,28 @@ again:
     case '9':
         return scanNumber(ch);
 
-    default:
-        if (ch.isLetter() || ch == QLatin1Char('$') || ch == QLatin1Char('_') || (ch == QLatin1Char('\\') && _char == QLatin1Char('u'))) {
-            bool identifierWithEscapeChars = false;
-            if (ch == QLatin1Char('\\')) {
-                identifierWithEscapeChars = true;
+    default: {
+        QChar c = ch;
+        bool identifierWithEscapeChars = false;
+        if (c == QLatin1Char('\\') && _char == QLatin1Char('u')) {
+            identifierWithEscapeChars = true;
+            bool ok = false;
+            c = decodeUnicodeEscapeCharacter(&ok);
+            if (! ok) {
+                _errorCode = IllegalUnicodeEscapeSequence;
+                _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal unicode escape sequence");
+                return T_ERROR;
+            }
+        }
+        if (isIdentifierStart(c)) {
+            if (identifierWithEscapeChars) {
                 _tokenText.resize(0);
-                bool ok = false;
-                _tokenText += decodeUnicodeEscapeCharacter(&ok);
+                _tokenText += c;
                 _validTokenText = true;
-                if (! ok) {
-                    _errorCode = IllegalUnicodeEscapeSequence;
-                    _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal unicode escape sequence");
-                    return T_ERROR;
-                }
             }
             while (true) {
-                if (_char.isLetterOrNumber() || _char == QLatin1Char('$') || _char == QLatin1Char('_')) {
-                    if (identifierWithEscapeChars)
-                        _tokenText += _char;
-
-                    scanChar();
-                } else if (_char == QLatin1Char('\\') && _codePtr[0] == QLatin1Char('u')) {
+                c = _char;
+                if (_char == QLatin1Char('\\') && _codePtr[0] == QLatin1Char('u')) {
                     if (! identifierWithEscapeChars) {
                         identifierWithEscapeChars = true;
                         _tokenText.resize(0);
@@ -718,31 +848,41 @@ again:
 
                     scanChar(); // skip '\\'
                     bool ok = false;
-                    _tokenText += decodeUnicodeEscapeCharacter(&ok);
+                    c = decodeUnicodeEscapeCharacter(&ok);
                     if (! ok) {
                         _errorCode = IllegalUnicodeEscapeSequence;
                         _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal unicode escape sequence");
                         return T_ERROR;
                     }
-                } else {
-                    _tokenLength = _codePtr - _tokenStartPtr - 1;
+                    if (isIdentifierPart(c))
+                        _tokenText += c;
+                    continue;
+                } else if (isIdentifierPart(c)) {
+                    if (identifierWithEscapeChars)
+                        _tokenText += c;
 
-                    int kind = T_IDENTIFIER;
+                    scanChar();
+                    continue;
+                }
 
-                    if (! identifierWithEscapeChars)
-                        kind = classify(_tokenStartPtr, _tokenLength, _qmlMode);
+                _tokenLength = _codePtr - _tokenStartPtr - 1;
 
-                    if (_engine) {
-                        if (kind == T_IDENTIFIER && identifierWithEscapeChars)
-                            _tokenSpell = _engine->newStringRef(_tokenText);
-                        else
-                            _tokenSpell = _engine->midRef(_tokenStartPtr - _code.unicode(), _tokenLength);
-                    }
+                int kind = T_IDENTIFIER;
+
+                if (! identifierWithEscapeChars)
+                    kind = classify(_tokenStartPtr, _tokenLength, _qmlMode);
 
-                    return kind;
+                if (_engine) {
+                    if (kind == T_IDENTIFIER && identifierWithEscapeChars)
+                        _tokenSpell = _engine->newStringRef(_tokenText);
+                    else
+                        _tokenSpell = _engine->midRef(_tokenStartPtr - _code.unicode(), _tokenLength);
                 }
+
+                return kind;
             }
         }
+        }
 
         break;
     }
@@ -753,12 +893,14 @@ again:
 int Lexer::scanNumber(QChar ch)
 {
     if (ch != QLatin1Char('0')) {
-        double integer = ch.unicode() - '0';
+        QByteArray buf;
+        buf.reserve(64);
+        buf += ch.toLatin1();
 
         QChar n = _char;
         const QChar *code = _codePtr;
         while (n.isDigit()) {
-            integer = integer * 10 + (n.unicode() - '0');
+            buf += n.toLatin1();
             n = *code++;
         }
 
@@ -767,17 +909,23 @@ int Lexer::scanNumber(QChar ch)
                 _codePtr = code - 1;
                 scanChar();
             }
-            _tokenValue = integer;
+            buf.append('\0');
+            _tokenValue = strtod(buf.constData(), 0);
             return T_NUMERIC_LITERAL;
         }
+    } else if (_char.isDigit() && !qmlMode()) {
+        _errorCode = IllegalCharacter;
+        _errorMessage = QCoreApplication::translate("QQmlParser", "Decimal numbers can't start with '0'");
+        return T_ERROR;
     }
 
     QVarLengthArray<char,32> chars;
     chars.append(ch.unicode());
 
     if (ch == QLatin1Char('0') && (_char == QLatin1Char('x') || _char == QLatin1Char('X'))) {
-        // parse hex integer literal
+        ch = _char; // remember the x or X to use it in the error message below.
 
+        // parse hex integer literal
         chars.append(_char.unicode());
         scanChar(); // consume `x'
 
@@ -786,6 +934,12 @@ int Lexer::scanNumber(QChar ch)
             scanChar();
         }
 
+        if (chars.size() < 3) {
+            _errorCode = IllegalHexNumber;
+            _errorMessage = QCoreApplication::translate("QQmlParser", "At least one hexadecimal digit is required after '0%1'").arg(ch);
+            return T_ERROR;
+        }
+
         _tokenValue = integerFromString(chars.constData(), chars.size(), 16);
         return T_NUMERIC_LITERAL;
     }
@@ -900,7 +1054,7 @@ bool Lexer::scanRegExp(RegExpBodyPrefix prefix)
             _tokenText += _char;
             scanChar();
 
-            if (_char.isNull() || isLineTerminator()) {
+            if (_codePtr > _endPtr || isLineTerminator()) {
                 _errorMessage = QCoreApplication::translate("QQmlParser", "Unterminated regular expression backslash sequence");
                 return false;
             }
@@ -914,7 +1068,7 @@ bool Lexer::scanRegExp(RegExpBodyPrefix prefix)
             _tokenText += _char;
             scanChar();
 
-            while (! _char.isNull() && ! isLineTerminator()) {
+            while (_codePtr <= _endPtr && ! isLineTerminator()) {
                 if (_char == QLatin1Char(']'))
                     break;
                 else if (_char == QLatin1Char('\\')) {
@@ -922,7 +1076,7 @@ bool Lexer::scanRegExp(RegExpBodyPrefix prefix)
                     _tokenText += _char;
                     scanChar();
 
-                    if (_char.isNull() || isLineTerminator()) {
+                    if (_codePtr > _endPtr || isLineTerminator()) {
                         _errorMessage = QCoreApplication::translate("QQmlParser", "Unterminated regular expression backslash sequence");
                         return false;
                     }
@@ -945,7 +1099,7 @@ bool Lexer::scanRegExp(RegExpBodyPrefix prefix)
             break;
 
         default:
-            if (_char.isNull() || isLineTerminator()) {
+            if (_codePtr > _endPtr || isLineTerminator()) {
                 _errorMessage = QCoreApplication::translate("QQmlParser", "Unterminated regular expression literal");
                 return false;
             } else {
@@ -1172,5 +1326,3 @@ bool Lexer::scanDirectives(Directives *directives)
 
     return true;
 }
-
-#include "qqmljskeywords_p.h"