aboutsummaryrefslogtreecommitdiffstats
path: root/src/libs/3rdparty/cplusplus/Lexer.h
diff options
context:
space:
mode:
authorNikolai Kosjar <nikolai.kosjar@digia.com>2014-05-09 10:04:13 -0400
committerNikolai Kosjar <nikolai.kosjar@digia.com>2014-05-23 14:34:01 +0200
commitc6358e5d380c18f3ebff148a095ddf3a9d6b266c (patch)
tree84fc2fa9919e2d57720ae3944e2d3a94b6c28c68 /src/libs/3rdparty/cplusplus/Lexer.h
parentbb7da966b801a2884cd7cf47f640bf7ac7d775df (diff)
C++: Add utf16 indices to Macro and Document::MacroUse
In most cases we need to work with the utf16 indices. Only in cppfindreferences the byte interface is still needed since there we read in files and work on a QByteArray to save memory. Change-Id: I6ef6a93fc1875a8c9a305c075d51a9ca034c41bb Reviewed-by: Erik Verbruggen <erik.verbruggen@digia.com>
Diffstat (limited to 'src/libs/3rdparty/cplusplus/Lexer.h')
-rw-r--r--src/libs/3rdparty/cplusplus/Lexer.h40
1 files changed, 23 insertions, 17 deletions
diff --git a/src/libs/3rdparty/cplusplus/Lexer.h b/src/libs/3rdparty/cplusplus/Lexer.h
index 8d63d2ba1d..78396a0e60 100644
--- a/src/libs/3rdparty/cplusplus/Lexer.h
+++ b/src/libs/3rdparty/cplusplus/Lexer.h
@@ -61,6 +61,28 @@ public:
LanguageFeatures languageFeatures() const { return _languageFeatures; }
void setLanguageFeatures(LanguageFeatures features) { _languageFeatures = features; }
+public:
+ static void yyinp_utf8(const char *&currentSourceChar, unsigned char &yychar,
+ unsigned &utf16charCounter)
+ {
+ ++utf16charCounter;
+
+ // Process multi-byte UTF-8 code point (non-latin1)
+ if (CPLUSPLUS_UNLIKELY(isByteOfMultiByteCodePoint(yychar))) {
+ unsigned trailingBytesCurrentCodePoint = 1;
+ for (unsigned char c = yychar << 2; isByteOfMultiByteCodePoint(c); c <<= 1)
+ ++trailingBytesCurrentCodePoint;
+ // Code points >= 0x00010000 are represented by two UTF-16 code units
+ if (trailingBytesCurrentCodePoint >= 3)
+ ++utf16charCounter;
+ yychar = *(currentSourceChar += trailingBytesCurrentCodePoint + 1);
+
+ // Process single-byte UTF-8 code point (latin1)
+ } else {
+ yychar = *++currentSourceChar;
+ }
+ }
+
private:
void pushLineStartOffset();
void scan_helper(Token *tok);
@@ -83,23 +105,7 @@ private:
void yyinp()
{
- ++_currentCharUtf16;
-
- // Process multi-byte UTF-8 code point (non-latin1)
- if (CPLUSPLUS_UNLIKELY(isByteOfMultiByteCodePoint(_yychar))) {
- unsigned trailingBytesCurrentCodePoint = 1;
- for (unsigned char c = _yychar << 2; isByteOfMultiByteCodePoint(c); c <<= 1)
- ++trailingBytesCurrentCodePoint;
- // Code points >= 0x00010000 are represented by two UTF16 code units
- if (trailingBytesCurrentCodePoint >= 3)
- ++_currentCharUtf16;
- _yychar = *(_currentChar += trailingBytesCurrentCodePoint + 1);
-
- // Process single-byte UTF-8 code point (latin1)
- } else {
- _yychar = *++_currentChar;
- }
-
+ yyinp_utf8(_currentChar, _yychar, _currentCharUtf16);
if (CPLUSPLUS_UNLIKELY(_yychar == '\n'))
pushLineStartOffset();
}