diff options
author | Lars Knoll <lars.knoll@qt.io> | 2018-02-06 00:04:27 +0100 |
---|---|---|
committer | Lars Knoll <lars.knoll@qt.io> | 2018-02-08 14:00:48 +0000 |
commit | b555f45c10a050be9410f9dc6286a44753cc7d98 (patch) | |
tree | bcd065a0624abec293fa587cec8209dec895e4e7 /src/qml | |
parent | f7bc003aa12270da905e8f665bc0ad221a132f89 (diff) |
Implement parsing of ECMAScript 6 Unicode escape sequences
ECMAScript 6 added the \u{XXXX} syntax to encode arbitrary
Unicode code points. Support this properly in our lexer.
One issue currently is that codepoints outside the BMP will
not yet be handled correctly.
Change-Id: Id46f9ec6fdbb264a5a919d84a16857afc9e8ca6e
Reviewed-by: Simon Hausmann <simon.hausmann@qt.io>
Diffstat (limited to 'src/qml')
-rw-r--r-- | src/qml/parser/qqmljslexer.cpp | 56 | ||||
-rw-r--r-- | src/qml/parser/qqmljslexer_p.h | 1 |
2 files changed, 36 insertions, 21 deletions
diff --git a/src/qml/parser/qqmljslexer.cpp b/src/qml/parser/qqmljslexer.cpp index a3382fa92e..a8c9ead80b 100644 --- a/src/qml/parser/qqmljslexer.cpp +++ b/src/qml/parser/qqmljslexer.cpp @@ -315,36 +315,52 @@ int Lexer::lex() return _tokenKind; } -bool Lexer::isUnicodeEscapeSequence(const QChar *chars) -{ - if (isHexDigit(chars[0]) && isHexDigit(chars[1]) && isHexDigit(chars[2]) && isHexDigit(chars[3])) - return true; - - return false; -} - QChar Lexer::decodeUnicodeEscapeCharacter(bool *ok) { - if (_char == QLatin1Char('u') && isUnicodeEscapeSequence(&_codePtr[0])) { - scanChar(); // skip u + Q_ASSERT(_char == QLatin1Char('u')); + scanChar(); // skip u + if (_codePtr + 4 <= _endPtr && isHexDigit(_char)) { + ushort codePoint = 0; + for (int i = 0; i < 4; ++i) { + int digit = hexDigit(_char); + if (digit < 0) + goto error; + codePoint *= 16; + codePoint += digit; + scanChar(); + } - const QChar c1 = _char; - scanChar(); + *ok = true; + return QChar(codePoint); + } else if (_codePtr < _endPtr && _char == QLatin1Char('{')) { + scanChar(); // skip '{' + uint codePoint = 0; + if (!isHexDigit(_char)) + // need at least one hex digit + goto error; - const QChar c2 = _char; - scanChar(); + while (_codePtr <= _endPtr) { + int digit = hexDigit(_char); + if (digit < 0) + break; + codePoint *= 16; + codePoint += digit; + if (codePoint > 0x10ffff) + goto error; + scanChar(); + } - const QChar c3 = _char; - scanChar(); + if (_char != QLatin1Char('}')) + goto error; - const QChar c4 = _char; - scanChar(); + scanChar(); // skip '}' - *ok = true; - return convertUnicode(c1, c2, c3, c4); + *ok = true; + return QChar(codePoint); } + error: _errorCode = IllegalUnicodeEscapeSequence; _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal unicode escape sequence"); diff --git a/src/qml/parser/qqmljslexer_p.h b/src/qml/parser/qqmljslexer_p.h index 902ac5ad7c..c25b74b12d 100644 --- a/src/qml/parser/qqmljslexer_p.h +++ b/src/qml/parser/qqmljslexer_p.h @@ -199,7 +199,6 @@ private: static bool isDecimalDigit(ushort c); static bool isHexDigit(QChar c); static bool isOctalDigit(ushort c); - static bool isUnicodeEscapeSequence(const QChar *chars); void syncProhibitAutomaticSemicolon(); QChar decodeUnicodeEscapeCharacter(bool *ok); |