From b555f45c10a050be9410f9dc6286a44753cc7d98 Mon Sep 17 00:00:00 2001 From: Lars Knoll Date: Tue, 6 Feb 2018 00:04:27 +0100 Subject: Implement parsing of ECMAScript 6 Unicode escape sequences ECMAScript 6 added the \u{XXXX} syntax to encode arbitrary Unicode code points. Support this properly in our lexer. One issue currently is that codepoints outside the BMP will not yet be handled correctly. Change-Id: Id46f9ec6fdbb264a5a919d84a16857afc9e8ca6e Reviewed-by: Simon Hausmann --- src/qml/parser/qqmljslexer.cpp | 56 +++++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 20 deletions(-) (limited to 'src/qml/parser/qqmljslexer.cpp') diff --git a/src/qml/parser/qqmljslexer.cpp b/src/qml/parser/qqmljslexer.cpp index a3382fa92e..a8c9ead80b 100644 --- a/src/qml/parser/qqmljslexer.cpp +++ b/src/qml/parser/qqmljslexer.cpp @@ -315,36 +315,52 @@ int Lexer::lex() return _tokenKind; } -bool Lexer::isUnicodeEscapeSequence(const QChar *chars) -{ - if (isHexDigit(chars[0]) && isHexDigit(chars[1]) && isHexDigit(chars[2]) && isHexDigit(chars[3])) - return true; - - return false; -} - QChar Lexer::decodeUnicodeEscapeCharacter(bool *ok) { - if (_char == QLatin1Char('u') && isUnicodeEscapeSequence(&_codePtr[0])) { - scanChar(); // skip u + Q_ASSERT(_char == QLatin1Char('u')); + scanChar(); // skip u + if (_codePtr + 4 <= _endPtr && isHexDigit(_char)) { + ushort codePoint = 0; + for (int i = 0; i < 4; ++i) { + int digit = hexDigit(_char); + if (digit < 0) + goto error; + codePoint *= 16; + codePoint += digit; + scanChar(); + } - const QChar c1 = _char; - scanChar(); + *ok = true; + return QChar(codePoint); + } else if (_codePtr < _endPtr && _char == QLatin1Char('{')) { + scanChar(); // skip '{' + uint codePoint = 0; + if (!isHexDigit(_char)) + // need at least one hex digit + goto error; - const QChar c2 = _char; - scanChar(); + while (_codePtr <= _endPtr) { + int digit = hexDigit(_char); + if (digit < 0) + break; + codePoint *= 16; + codePoint += digit; + if (codePoint > 0x10ffff) + goto error; + scanChar(); + } - const QChar c3 = _char; - scanChar(); + if (_char != QLatin1Char('}')) + goto error; - const QChar c4 = _char; - scanChar(); + scanChar(); // skip '}' - *ok = true; - return convertUnicode(c1, c2, c3, c4); + *ok = true; + return QChar(codePoint); } + error: _errorCode = IllegalUnicodeEscapeSequence; _errorMessage = QCoreApplication::translate("QQmlParser", "Illegal unicode escape sequence"); -- cgit v1.2.3