aboutsummaryrefslogtreecommitdiffstats
path: root/src/qml/parser/qqmljslexer.cpp
diff options
context:
space:
mode:
authorLars Knoll <lars.knoll@qt.io>2018-02-08 13:42:50 +0100
committerLars Knoll <lars.knoll@qt.io>2018-02-09 07:55:13 +0000
commit0ba8764849eb44e62dd8ddc22d1335174ac5beb4 (patch)
tree57461dab5c93569a2289726d036943a41c0a73a5 /src/qml/parser/qqmljslexer.cpp
parent6059f6f2a485ca3dddfedc55536e4f62f349a990 (diff)
Properly parse identifiers containing chars outside the BMP
If the identifier contains characters outside the Unicode BMP, we need to correctly decode them from surrogate pairs or unicode escape sequences before testing whether they are valid characters for an identifier. Change-Id: I5d6ceaf27a353d3a708da86f08cfb9796eb8b1d3 Reviewed-by: Simon Hausmann <simon.hausmann@qt.io>
Diffstat (limited to 'src/qml/parser/qqmljslexer.cpp')
-rw-r--r--src/qml/parser/qqmljslexer.cpp117
1 files changed, 76 insertions, 41 deletions
diff --git a/src/qml/parser/qqmljslexer.cpp b/src/qml/parser/qqmljslexer.cpp
index a8c9ead80b..6a3bd6d887 100644
--- a/src/qml/parser/qqmljslexer.cpp
+++ b/src/qml/parser/qqmljslexer.cpp
@@ -315,12 +315,12 @@ int Lexer::lex()
return _tokenKind;
}
-QChar Lexer::decodeUnicodeEscapeCharacter(bool *ok)
+uint Lexer::decodeUnicodeEscapeCharacter(bool *ok)
{
Q_ASSERT(_char == QLatin1Char('u'));
scanChar(); // skip u
if (_codePtr + 4 <= _endPtr && isHexDigit(_char)) {
- ushort codePoint = 0;
+ uint codePoint = 0;
for (int i = 0; i < 4; ++i) {
int digit = hexDigit(_char);
if (digit < 0)
@@ -331,7 +331,7 @@ QChar Lexer::decodeUnicodeEscapeCharacter(bool *ok)
}
*ok = true;
- return QChar(codePoint);
+ return codePoint;
} else if (_codePtr < _endPtr && _char == QLatin1Char('{')) {
scanChar(); // skip '{'
uint codePoint = 0;
@@ -357,7 +357,7 @@ QChar Lexer::decodeUnicodeEscapeCharacter(bool *ok)
*ok = true;
- return QChar(codePoint);
+ return codePoint;
}
error:
@@ -365,7 +365,7 @@ QChar Lexer::decodeUnicodeEscapeCharacter(bool *ok)
_errorMessage = QCoreApplication::translate("QQmlParser", "Illegal unicode escape sequence");
*ok = false;
- return QChar();
+ return 0;
}
QChar Lexer::decodeHexEscapeCharacter(bool *ok)
@@ -389,15 +389,15 @@ QChar Lexer::decodeHexEscapeCharacter(bool *ok)
return QChar();
}
-static inline bool isIdentifierStart(QChar ch)
+static inline bool isIdentifierStart(uint ch)
{
// fast path for ascii
- if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') ||
- (ch.unicode() >= 'A' && ch.unicode() <= 'Z') ||
+ if ((ch >= 'a' && ch <= 'z') ||
+ (ch >= 'A' && ch <= 'Z') ||
ch == '$' || ch == '_')
return true;
- switch (ch.category()) {
+ switch (QChar::category(ch)) {
case QChar::Number_Letter:
case QChar::Letter_Uppercase:
case QChar::Letter_Lowercase:
@@ -411,17 +411,17 @@ static inline bool isIdentifierStart(QChar ch)
return false;
}
-static bool isIdentifierPart(QChar ch)
+static bool isIdentifierPart(uint ch)
{
// fast path for ascii
- if ((ch.unicode() >= 'a' && ch.unicode() <= 'z') ||
- (ch.unicode() >= 'A' && ch.unicode() <= 'Z') ||
- (ch.unicode() >= '0' && ch.unicode() <= '9') ||
+ if ((ch >= 'a' && ch <= 'z') ||
+ (ch >= 'A' && ch <= 'Z') ||
+ (ch >= '0' && ch <= '9') ||
ch == '$' || ch == '_' ||
- ch.unicode() == 0x200c /* ZWNJ */ || ch.unicode() == 0x200d /* ZWJ */)
+ ch == 0x200c /* ZWNJ */ || ch == 0x200d /* ZWJ */)
return true;
- switch (ch.category()) {
+ switch (QChar::category(ch)) {
case QChar::Mark_NonSpacing:
case QChar::Mark_SpacingCombining:
@@ -731,9 +731,16 @@ again:
// unicode escape sequence
case 'u': {
bool ok = false;
- u = decodeUnicodeEscapeCharacter(&ok);
+ uint codePoint = decodeUnicodeEscapeCharacter(&ok);
if (!ok)
return T_ERROR;
+ if (QChar::requiresSurrogates(codePoint)) {
+ // need to use a surrogate pair
+ _tokenText += QChar(QChar::highSurrogate(codePoint));
+ u = QChar::lowSurrogate(codePoint);
+ } else {
+ u = codePoint;
+ }
} break;
// hex escape sequence
@@ -815,9 +822,12 @@ again:
return scanNumber(ch);
default: {
- QChar c = ch;
+ uint c = ch.unicode();
bool identifierWithEscapeChars = false;
- if (c == QLatin1Char('\\') && _char == QLatin1Char('u')) {
+ if (QChar::isHighSurrogate(c) && QChar::isLowSurrogate(_char.unicode())) {
+ c = QChar::surrogateToUcs4(ushort(c), _char.unicode());
+ scanChar();
+ } else if (c == '\\' && _char == QLatin1Char('u')) {
identifierWithEscapeChars = true;
bool ok = false;
c = decodeUnicodeEscapeCharacter(&ok);
@@ -827,13 +837,21 @@ again:
if (isIdentifierStart(c)) {
if (identifierWithEscapeChars) {
_tokenText.resize(0);
- _tokenText += c;
+ if (QChar::requiresSurrogates(c)) {
+ _tokenText += QChar(QChar::highSurrogate(c));
+ _tokenText += QChar(QChar::lowSurrogate(c));
+ } else {
+ _tokenText += QChar(c);
+ }
_validTokenText = true;
}
- while (true) {
- c = _char;
- if (_char == QLatin1Char('\\') && _codePtr[0] == QLatin1Char('u')) {
- if (! identifierWithEscapeChars) {
+ while (_codePtr <= _endPtr) {
+ c = _char.unicode();
+ if (QChar::isHighSurrogate(c) && QChar::isLowSurrogate(_codePtr->unicode())) {
+ scanChar();
+ c = QChar::surrogateToUcs4(ushort(c), _char.unicode());
+ } else if (_char == QLatin1Char('\\') && _codePtr[0] == QLatin1Char('u')) {
+ if (!identifierWithEscapeChars) {
identifierWithEscapeChars = true;
_tokenText.resize(0);
_tokenText.insert(0, _tokenStartPtr, _codePtr - _tokenStartPtr - 1);
@@ -845,33 +863,50 @@ again:
c = decodeUnicodeEscapeCharacter(&ok);
if (!ok)
return T_ERROR;
- if (isIdentifierPart(c))
- _tokenText += c;
- continue;
- } else if (isIdentifierPart(c)) {
- if (identifierWithEscapeChars)
- _tokenText += c;
- scanChar();
+ if (!isIdentifierPart(c))
+ break;
+
+ if (identifierWithEscapeChars) {
+ if (QChar::requiresSurrogates(c)) {
+ _tokenText += QChar(QChar::highSurrogate(c));
+ _tokenText += QChar(QChar::lowSurrogate(c));
+ } else {
+ _tokenText += QChar(c);
+ }
+ }
continue;
}
- _tokenLength = _codePtr - _tokenStartPtr - 1;
+ if (!isIdentifierPart(c))
+ break;
- int kind = T_IDENTIFIER;
+ if (identifierWithEscapeChars) {
+ if (QChar::requiresSurrogates(c)) {
+ _tokenText += QChar(QChar::highSurrogate(c));
+ _tokenText += QChar(QChar::lowSurrogate(c));
+ } else {
+ _tokenText += QChar(c);
+ }
+ }
+ scanChar();
+ }
- if (! identifierWithEscapeChars)
- kind = classify(_tokenStartPtr, _tokenLength, _qmlMode);
+ _tokenLength = _codePtr - _tokenStartPtr - 1;
- if (_engine) {
- if (kind == T_IDENTIFIER && identifierWithEscapeChars)
- _tokenSpell = _engine->newStringRef(_tokenText);
- else
- _tokenSpell = _engine->midRef(_tokenStartPtr - _code.unicode(), _tokenLength);
- }
+ int kind = T_IDENTIFIER;
+
+ if (!identifierWithEscapeChars)
+ kind = classify(_tokenStartPtr, _tokenLength, _qmlMode);
- return kind;
+ if (_engine) {
+ if (kind == T_IDENTIFIER && identifierWithEscapeChars)
+ _tokenSpell = _engine->newStringRef(_tokenText);
+ else
+ _tokenSpell = _engine->midRef(_tokenStartPtr - _code.unicode(), _tokenLength);
}
+
+ return kind;
}
}