From e7d1a74d994445749275e8e5cb88b0a21938bb09 Mon Sep 17 00:00:00 2001 From: Roberto Raggi Date: Tue, 6 Dec 2011 14:09:22 +0100 Subject: Improve parsing of escape characters and regexp literals. That is, in QML/JS you can escape characters in identifiers, e.g. var c\u0061se = 25 declares a variable called `case' with value 25. In such cases qmlmin needs to preserve the escape sequence in the declaration. Also, fix possible errors when pasting keywords after regexp literals. The minifier needs to preserve the whitespace character after the regexp delimiter, e.g. /x/instanceof blah without the white space after the regexp, the `i' of `instanceof' is parsed as a regexp flag. Change-Id: I5f426ac62949e34d092d4fdb0a41243de8ff2236 Reviewed-by: Kent Hansen --- src/declarative/qml/parser/qdeclarativejslexer.cpp | 5 +++ src/declarative/qml/parser/qdeclarativejslexer_p.h | 7 +++- tests/auto/declarative/qmlmin/tst_qmlmin.cpp | 1 + tools/qmlmin/main.cpp | 47 ++++++++++++++++------ 4 files changed, 45 insertions(+), 15 deletions(-) diff --git a/src/declarative/qml/parser/qdeclarativejslexer.cpp b/src/declarative/qml/parser/qdeclarativejslexer.cpp index 9b9af3868d..5e008d27f1 100644 --- a/src/declarative/qml/parser/qdeclarativejslexer.cpp +++ b/src/declarative/qml/parser/qdeclarativejslexer.cpp @@ -113,6 +113,11 @@ Lexer::Lexer(Engine *engine) engine->setLexer(this); } +bool Lexer::qmlMode() const +{ + return _qmlMode; +} + QString Lexer::code() const { return _code; diff --git a/src/declarative/qml/parser/qdeclarativejslexer_p.h b/src/declarative/qml/parser/qdeclarativejslexer_p.h index a0f02afa60..31eeff8181 100644 --- a/src/declarative/qml/parser/qdeclarativejslexer_p.h +++ b/src/declarative/qml/parser/qdeclarativejslexer_p.h @@ -145,6 +145,8 @@ public: public: Lexer(Engine *engine); + bool qmlMode() const; + QString code() const; void setCode(const QString &code, int lineno, bool qmlMode = true); @@ -183,12 +185,13 @@ public: BalancedParentheses }; +protected: + int classify(const QChar *s, int n, bool qmlMode); + private: inline void scanChar(); int scanToken(); - int classify(const QChar *s, int n, bool qmlMode); - bool isLineTerminator() const; static bool isIdentLetter(QChar c); static bool isDecimalDigit(ushort c); diff --git a/tests/auto/declarative/qmlmin/tst_qmlmin.cpp b/tests/auto/declarative/qmlmin/tst_qmlmin.cpp index d6146d121e..2726a637ec 100644 --- a/tests/auto/declarative/qmlmin/tst_qmlmin.cpp +++ b/tests/auto/declarative/qmlmin/tst_qmlmin.cpp @@ -107,6 +107,7 @@ void tst_qmlmin::initTestCase() invalidFiles << "tests/auto/declarative/qdeclarativeecmascript/data/qtbug_22843.js"; invalidFiles << "tests/auto/declarative/qdeclarativeecmascript/data/qtbug_22843.library.js"; invalidFiles << "tests/auto/declarative/qdeclarativeworkerscript/data/script_error_onLoad.js"; + invalidFiles << "tests/auto/declarative/parserstress/tests/ecma_3/Unicode/regress-352044-02-n.js"; } QStringList tst_qmlmin::findFiles(const QDir &d) diff --git a/tools/qmlmin/main.cpp b/tools/qmlmin/main.cpp index ebfc5851dd..b1aeab6168 100644 --- a/tools/qmlmin/main.cpp +++ b/tools/qmlmin/main.cpp @@ -182,6 +182,13 @@ protected: *restOfRegExp += QLatin1Char('i'); if (flags & Multiline) *restOfRegExp += QLatin1Char('m'); + + if (regExpFlags() == 0) { + // Add an extra space after the regexp literal delimiter (aka '/'). + // This will avoid possible problems when pasting tokens like `instanceof' + // after the regexp literal. + *restOfRegExp += QLatin1Char(' '); + } return true; } }; @@ -201,6 +208,7 @@ public: protected: bool parse(int startToken); + void escape(const QChar &ch, QString *out); }; Minify::Minify() @@ -213,6 +221,20 @@ QString Minify::minifiedCode() const return _minifiedCode; } +void Minify::escape(const QChar &ch, QString *out) +{ + out->append(QLatin1String("\\u")); + const QString hx = QString::number(ch.unicode(), 16); + switch (hx.length()) { + case 1: out->append(QLatin1String("000")); break; + case 2: out->append(QLatin1String("00")); break; + case 3: out->append(QLatin1String("0")); break; + case 4: break; + default: Q_ASSERT(!"unreachable"); + } + out->append(hx); +} + bool Minify::parse(int startToken) { int yyaction = 0; @@ -288,25 +310,24 @@ bool Minify::parse(int startToken) _minifiedCode += QLatin1Char('0'); } else if (yytoken == T_IDENTIFIER) { + QString identifier = yytokentext; + + if (classify(identifier.constData(), identifier.size(), qmlMode()) != T_IDENTIFIER) { + // the unescaped identifier is a keyword. In this case just replace + // the last character of the identifier with it escape sequence. + const QChar ch = identifier.at(identifier.length() - 1); + identifier.chop(1); + escape(ch, &identifier); + } + if (isIdentChar(lastChar)) _minifiedCode += QLatin1Char(' '); - foreach (const QChar &ch, yytokentext) { + foreach (const QChar &ch, identifier) { if (isIdentChar(ch)) _minifiedCode += ch; else { - _minifiedCode += QLatin1String("\\u"); - const QString hx = QString::number(ch.unicode(), 16); - switch (hx.length()) { - case 1: _minifiedCode += QLatin1String("000"); break; - case 2: _minifiedCode += QLatin1String("00"); break; - case 3: _minifiedCode += QLatin1String("0"); break; - case 4: break; - default: - std::cerr << "qmlmin: invalid unicode sequence" << std::endl; - return false; - } - _minifiedCode += hx; + escape(ch, &_minifiedCode); } } -- cgit v1.2.3