diff options
author | Nikolai Kosjar <nikolai.kosjar@digia.com> | 2014-02-25 13:44:11 -0300 |
---|---|---|
committer | Nikolai Kosjar <nikolai.kosjar@digia.com> | 2014-05-23 14:23:15 +0200 |
commit | 70122b3061ee3fbb07442beb0158edf849ceb98e (patch) | |
tree | e8c272ec1df948acd27378a44764dd683ab5b426 /src/libs/3rdparty/cplusplus/Token.h | |
parent | 4fefb1ca2a5270752acf00d586393f472fb1b9a3 (diff) |
C++: Support for UTF-8 in the lexer
This will save us toLatin1() conversations in CppTools (which already
holds UTF-8 encoded QByteArrays) and thus loss of information (see
QTCREATORBUG-7356). It also gives us support for non-latin1 identifiers.
API-wise the following functions are added to Token. In follow-up
patches these will become handy in combination with QStrings.
utf16chars() - aequivalent of bytes()
utf16charsBegin() - aequivalent of bytesBegin()
utf16charsEnd() - aequivalent of bytesEnd()
Next steps:
* Adapt functions from TranslationUnit. They should work with utf16
chars in order to calculate lines and columns correctly also for
UTF-8 multi-byte code points.
* Adapt the higher level clients:
* Cpp{Tools,Editor} should expect UTF-8 encoded Literals.
* Cpp{Tools,Editor}: When dealing with identifiers on the
QString/QTextDocument layer, code points
represendet by two QChars need to be respected, too.
* Ensure Macro::offsets() and Document::MacroUse::{begin,end}() report
offsets usable in CppEditor/CppTools.
Addresses QTCREATORBUG-7356.
Change-Id: I0791b5236be8215d24fb8e38a1f7cb0d279454c0
Reviewed-by: Erik Verbruggen <erik.verbruggen@digia.com>
Diffstat (limited to 'src/libs/3rdparty/cplusplus/Token.h')
-rw-r--r-- | src/libs/3rdparty/cplusplus/Token.h | 20 |
1 files changed, 11 insertions, 9 deletions
diff --git a/src/libs/3rdparty/cplusplus/Token.h b/src/libs/3rdparty/cplusplus/Token.h index 02d7f5ebe9..ec10483852 100644 --- a/src/libs/3rdparty/cplusplus/Token.h +++ b/src/libs/3rdparty/cplusplus/Token.h @@ -285,7 +285,7 @@ enum Kind { class CPLUSPLUS_EXPORT Token { public: - Token() : flags(0), byteOffset(0), ptr(0) {} + Token() : flags(0), byteOffset(0), utf16charOffset(0), ptr(0) {} inline bool is(unsigned k) const { return f.kind == k; } inline bool isNot(unsigned k) const { return f.kind != k; } @@ -298,13 +298,14 @@ public: inline bool joined() const { return f.joined; } inline bool expanded() const { return f.expanded; } inline bool generated() const { return f.generated; } - inline unsigned bytes() const { return f.bytes; } - inline unsigned bytesBegin() const - { return byteOffset; } + inline unsigned bytes() const { return f.bytes; } + inline unsigned bytesBegin() const { return byteOffset; } + inline unsigned bytesEnd() const { return byteOffset + f.bytes; } - inline unsigned bytesEnd() const - { return byteOffset + f.bytes; } + inline unsigned utf16chars() const { return f.utf16chars; } + inline unsigned utf16charsBegin() const { return utf16charOffset; } + inline unsigned utf16charsEnd() const { return utf16charOffset + f.utf16chars; } inline bool isLiteral() const { return f.kind >= T_FIRST_LITERAL && f.kind <= T_LAST_LITERAL; } @@ -354,15 +355,17 @@ public: unsigned generated : 1; // Unused... unsigned pad : 3; - // The token length in bytes. + // The token length in bytes and UTF16 chars. unsigned bytes : 16; + unsigned utf16chars : 16; }; union { - unsigned flags; + unsigned long flags; Flags f; }; unsigned byteOffset; + unsigned utf16charOffset; union { void *ptr; @@ -393,5 +396,4 @@ struct LanguageFeatures } // namespace CPlusPlus - #endif // CPLUSPLUS_TOKEN_H |