diff options
author | Nikolai Kosjar <nikolai.kosjar@digia.com> | 2014-05-09 10:04:13 -0400 |
---|---|---|
committer | Nikolai Kosjar <nikolai.kosjar@digia.com> | 2014-05-23 14:34:01 +0200 |
commit | c6358e5d380c18f3ebff148a095ddf3a9d6b266c (patch) | |
tree | 84fc2fa9919e2d57720ae3944e2d3a94b6c28c68 /src/libs | |
parent | bb7da966b801a2884cd7cf47f640bf7ac7d775df (diff) |
C++: Add utf16 indices to Macro and Document::MacroUse
In most cases we need to work with the utf16 indices. Only in
cppfindreferences the byte interface is still needed since there we read
in files and work on a QByteArray to save memory.
Change-Id: I6ef6a93fc1875a8c9a305c075d51a9ca034c41bb
Reviewed-by: Erik Verbruggen <erik.verbruggen@digia.com>
Diffstat (limited to 'src/libs')
-rw-r--r-- | src/libs/3rdparty/cplusplus/Lexer.cpp | 15 | ||||
-rw-r--r-- | src/libs/3rdparty/cplusplus/Lexer.h | 40 | ||||
-rw-r--r-- | src/libs/3rdparty/cplusplus/TranslationUnit.cpp | 2 | ||||
-rw-r--r-- | src/libs/cplusplus/CppDocument.cpp | 42 | ||||
-rw-r--r-- | src/libs/cplusplus/CppDocument.h | 67 | ||||
-rw-r--r-- | src/libs/cplusplus/FastPreprocessor.cpp | 32 | ||||
-rw-r--r-- | src/libs/cplusplus/FastPreprocessor.h | 7 | ||||
-rw-r--r-- | src/libs/cplusplus/Macro.cpp | 3 | ||||
-rw-r--r-- | src/libs/cplusplus/Macro.h | 20 | ||||
-rw-r--r-- | src/libs/cplusplus/PPToken.cpp | 1 | ||||
-rw-r--r-- | src/libs/cplusplus/PreprocessorClient.h | 38 | ||||
-rw-r--r-- | src/libs/cplusplus/pp-engine.cpp | 103 | ||||
-rw-r--r-- | src/libs/cplusplus/pp-engine.h | 6 |
13 files changed, 241 insertions, 135 deletions
diff --git a/src/libs/3rdparty/cplusplus/Lexer.cpp b/src/libs/3rdparty/cplusplus/Lexer.cpp index 8357b9e5544..2da7d3a41ca 100644 --- a/src/libs/3rdparty/cplusplus/Lexer.cpp +++ b/src/libs/3rdparty/cplusplus/Lexer.cpp @@ -36,6 +36,21 @@ using namespace CPlusPlus; \sa Token */ +/*! + \fn static void Lexer::yyinp_utf8(const char *¤tSourceChar, unsigned char &yychar, unsigned &utf16charCounter) + + Process a single unicode code point in an UTF-8 encoded source. + + \a currentSourceChar points to the UTF-8 encoded source. + \a yychar must be the byte pointed to by \a currentSourceChar. + + Points \a currentSourceChar to the byte of the next code point + and modifies \a yychar to the value pointed by the updated + \a currentSourceChar. \a utf16charCounter will be incremented by + the number of UTF-16 code units that were needed for that code + point. +*/ + Lexer::Lexer(TranslationUnit *unit) : _translationUnit(unit), _control(unit->control()), diff --git a/src/libs/3rdparty/cplusplus/Lexer.h b/src/libs/3rdparty/cplusplus/Lexer.h index 8d63d2ba1db..78396a0e600 100644 --- a/src/libs/3rdparty/cplusplus/Lexer.h +++ b/src/libs/3rdparty/cplusplus/Lexer.h @@ -61,6 +61,28 @@ public: LanguageFeatures languageFeatures() const { return _languageFeatures; } void setLanguageFeatures(LanguageFeatures features) { _languageFeatures = features; } +public: + static void yyinp_utf8(const char *¤tSourceChar, unsigned char &yychar, + unsigned &utf16charCounter) + { + ++utf16charCounter; + + // Process multi-byte UTF-8 code point (non-latin1) + if (CPLUSPLUS_UNLIKELY(isByteOfMultiByteCodePoint(yychar))) { + unsigned trailingBytesCurrentCodePoint = 1; + for (unsigned char c = yychar << 2; isByteOfMultiByteCodePoint(c); c <<= 1) + ++trailingBytesCurrentCodePoint; + // Code points >= 0x00010000 are represented by two UTF-16 code units + if (trailingBytesCurrentCodePoint >= 3) + ++utf16charCounter; + yychar = *(currentSourceChar += trailingBytesCurrentCodePoint + 1); + + // Process single-byte UTF-8 code point (latin1) + } else { + yychar = *++currentSourceChar; + } + } + private: void pushLineStartOffset(); void scan_helper(Token *tok); @@ -83,23 +105,7 @@ private: void yyinp() { - ++_currentCharUtf16; - - // Process multi-byte UTF-8 code point (non-latin1) - if (CPLUSPLUS_UNLIKELY(isByteOfMultiByteCodePoint(_yychar))) { - unsigned trailingBytesCurrentCodePoint = 1; - for (unsigned char c = _yychar << 2; isByteOfMultiByteCodePoint(c); c <<= 1) - ++trailingBytesCurrentCodePoint; - // Code points >= 0x00010000 are represented by two UTF16 code units - if (trailingBytesCurrentCodePoint >= 3) - ++_currentCharUtf16; - _yychar = *(_currentChar += trailingBytesCurrentCodePoint + 1); - - // Process single-byte UTF-8 code point (latin1) - } else { - _yychar = *++_currentChar; - } - + yyinp_utf8(_currentChar, _yychar, _currentCharUtf16); if (CPLUSPLUS_UNLIKELY(_yychar == '\n')) pushLineStartOffset(); } diff --git a/src/libs/3rdparty/cplusplus/TranslationUnit.cpp b/src/libs/3rdparty/cplusplus/TranslationUnit.cpp index fe309ed82f8..9bc8edecfda 100644 --- a/src/libs/3rdparty/cplusplus/TranslationUnit.cpp +++ b/src/libs/3rdparty/cplusplus/TranslationUnit.cpp @@ -264,7 +264,7 @@ void TranslationUnit::tokenize() currentExpanded = true; const std::pair<unsigned, unsigned> &p = lineColumn[lineColumnIdx]; if (p.first) - _expandedLineColumn.insert(std::make_pair(tk.bytesBegin(), p)); + _expandedLineColumn.insert(std::make_pair(tk.utf16charsBegin(), p)); else currentGenerated = true; diff --git a/src/libs/cplusplus/CppDocument.cpp b/src/libs/cplusplus/CppDocument.cpp index 61d82d945ed..4fa078d6ee8 100644 --- a/src/libs/cplusplus/CppDocument.cpp +++ b/src/libs/cplusplus/CppDocument.cpp @@ -365,25 +365,31 @@ void Document::appendMacro(const Macro ¯o) _definedMacros.append(macro); } -void Document::addMacroUse(const Macro ¯o, unsigned offset, unsigned length, +void Document::addMacroUse(const Macro ¯o, + unsigned bytesOffset, unsigned bytesLength, + unsigned utf16charsOffset, unsigned utf16charLength, unsigned beginLine, const QVector<MacroArgumentReference> &actuals) { - MacroUse use(macro, offset, offset + length, beginLine); + MacroUse use(macro, + bytesOffset, bytesOffset + bytesLength, + utf16charsOffset, utf16charsOffset + utf16charLength, + beginLine); foreach (const MacroArgumentReference &actual, actuals) { - const Block arg(actual.position(), actual.position() + actual.length()); - + const Block arg(0, 0, actual.utf16charsOffset(), + actual.utf16charsOffset() + actual.utf16charsLength()); use.addArgument(arg); } _macroUses.append(use); } -void Document::addUndefinedMacroUse(const QByteArray &name, unsigned offset) +void Document::addUndefinedMacroUse(const QByteArray &name, + unsigned bytesOffset, unsigned utf16charsOffset) { QByteArray copy(name.data(), name.size()); - UndefinedMacroUse use(copy, offset); + UndefinedMacroUse use(copy, bytesOffset, utf16charsOffset); _undefinedMacroUses.append(use); } @@ -548,19 +554,23 @@ const Macro *Document::findMacroDefinitionAt(unsigned line) const return 0; } -const Document::MacroUse *Document::findMacroUseAt(unsigned offset) const +const Document::MacroUse *Document::findMacroUseAt(unsigned utf16charsOffset) const { foreach (const Document::MacroUse &use, _macroUses) { - if (use.contains(offset) && (offset < use.begin() + use.macro().name().length())) + if (use.containsUtf16charOffset(utf16charsOffset) + && (utf16charsOffset < use.utf16charsBegin() + use.macro().nameToQString().size())) { return &use; + } } return 0; } -const Document::UndefinedMacroUse *Document::findUndefinedMacroUseAt(unsigned offset) const +const Document::UndefinedMacroUse *Document::findUndefinedMacroUseAt(unsigned utf16charsOffset) const { foreach (const Document::UndefinedMacroUse &use, _undefinedMacroUses) { - if (use.contains(offset) && (offset < use.begin() + use.name().length())) + if (use.containsUtf16charOffset(utf16charsOffset) + && (utf16charsOffset < use.utf16charsBegin() + + QString::fromUtf8(use.name(), use.name().size()).length())) return &use; } return 0; @@ -581,21 +591,21 @@ void Document::setUtf8Source(const QByteArray &source) _translationUnit->setSource(_source.constBegin(), _source.size()); } -void Document::startSkippingBlocks(unsigned start) +void Document::startSkippingBlocks(unsigned utf16charsOffset) { - _skippedBlocks.append(Block(start, 0)); + _skippedBlocks.append(Block(0, 0, utf16charsOffset, 0)); } -void Document::stopSkippingBlocks(unsigned stop) +void Document::stopSkippingBlocks(unsigned utf16charsOffset) { if (_skippedBlocks.isEmpty()) return; - unsigned start = _skippedBlocks.back().begin(); - if (start > stop) + unsigned start = _skippedBlocks.back().utf16charsBegin(); + if (start > utf16charsOffset) _skippedBlocks.removeLast(); // Ignore this block, it's invalid. else - _skippedBlocks.back() = Block(start, stop); + _skippedBlocks.back() = Block(0, 0, start, utf16charsOffset); } bool Document::isTokenized() const diff --git a/src/libs/cplusplus/CppDocument.h b/src/libs/cplusplus/CppDocument.h index 7d0c2aa902e..527f2ed82b8 100644 --- a/src/libs/cplusplus/CppDocument.h +++ b/src/libs/cplusplus/CppDocument.h @@ -77,10 +77,12 @@ public: QString fileName() const; void appendMacro(const Macro ¯o); - void addMacroUse(const Macro ¯o, unsigned offset, unsigned length, - unsigned beginLine, - const QVector<MacroArgumentReference> &range); - void addUndefinedMacroUse(const QByteArray &name, unsigned offset); + void addMacroUse(const Macro ¯o, + unsigned bytesOffset, unsigned bytesLength, + unsigned utf16charsOffset, unsigned utf16charLength, + unsigned beginLine, const QVector<MacroArgumentReference> &range); + void addUndefinedMacroUse(const QByteArray &name, + unsigned bytesOffset, unsigned utf16charsOffset); Control *control() const; TranslationUnit *translationUnit() const; @@ -108,8 +110,8 @@ public: void setFingerprint(const QByteArray &fingerprint) { m_fingerprint = fingerprint; } - void startSkippingBlocks(unsigned offset); - void stopSkippingBlocks(unsigned offset); + void startSkippingBlocks(unsigned utf16charsOffset); + void stopSkippingBlocks(unsigned utf16charsOffset); enum ParseMode { // ### keep in sync with CPlusPlus::TranslationUnit ParseTranlationUnit, @@ -207,22 +209,34 @@ public: class Block { - unsigned _begin; - unsigned _end; + unsigned _bytesBegin; + unsigned _bytesEnd; + unsigned _utf16charsBegin; + unsigned _utf16charsEnd; public: - inline Block(unsigned begin = 0, unsigned end = 0) - : _begin(begin), _end(end) - { } + inline Block(unsigned bytesBegin = 0, unsigned bytesEnd = 0, + unsigned utf16charsBegin = 0, unsigned utf16charsEnd = 0) + : _bytesBegin(bytesBegin), + _bytesEnd(bytesEnd), + _utf16charsBegin(utf16charsBegin), + _utf16charsEnd(utf16charsEnd) + {} + + inline unsigned bytesBegin() const + { return _bytesBegin; } + + inline unsigned bytesEnd() const + { return _bytesEnd; } - inline unsigned begin() const - { return _begin; } + inline unsigned utf16charsBegin() const + { return _utf16charsBegin; } - inline unsigned end() const - { return _end; } + inline unsigned utf16charsEnd() const + { return _utf16charsEnd; } - bool contains(unsigned pos) const - { return pos >= _begin && pos < _end; } + bool containsUtf16charOffset(unsigned utf16charOffset) const + { return utf16charOffset >= _utf16charsBegin && utf16charOffset < _utf16charsEnd; } }; class Include { @@ -259,8 +273,11 @@ public: unsigned _beginLine; public: - inline MacroUse(const Macro ¯o, unsigned begin, unsigned end, unsigned beginLine) - : Block(begin, end), + inline MacroUse(const Macro ¯o, + unsigned bytesBegin, unsigned bytesEnd, + unsigned utf16charsBegin, unsigned utf16charsEnd, + unsigned beginLine) + : Block(bytesBegin, bytesEnd, utf16charsBegin, utf16charsEnd), _macro(macro), _beginLine(beginLine) { } @@ -293,8 +310,12 @@ public: public: inline UndefinedMacroUse( const QByteArray &name, - unsigned begin) - : Block(begin, begin + name.length()), + unsigned bytesBegin, + unsigned utf16charsBegin) + : Block(bytesBegin, + bytesBegin + name.length(), + utf16charsBegin, + utf16charsBegin + QString::fromUtf8(name, name.size()).size()), _name(name) { } @@ -328,8 +349,8 @@ public: { return _includeGuardMacroName; } const Macro *findMacroDefinitionAt(unsigned line) const; - const MacroUse *findMacroUseAt(unsigned offset) const; - const UndefinedMacroUse *findUndefinedMacroUseAt(unsigned offset) const; + const MacroUse *findMacroUseAt(unsigned utf16charsOffset) const; + const UndefinedMacroUse *findUndefinedMacroUseAt(unsigned utf16charsOffset) const; void keepSourceAndAST(); void releaseSourceAndAST(); diff --git a/src/libs/cplusplus/FastPreprocessor.cpp b/src/libs/cplusplus/FastPreprocessor.cpp index 5c52764cf56..c49695fc8cc 100644 --- a/src/libs/cplusplus/FastPreprocessor.cpp +++ b/src/libs/cplusplus/FastPreprocessor.cpp @@ -108,37 +108,45 @@ static const Macro revision(const Snapshot &s, const Macro &m) return m; } -void FastPreprocessor::passedMacroDefinitionCheck(unsigned offset, unsigned line, const Macro ¯o) +void FastPreprocessor::passedMacroDefinitionCheck(unsigned bytesOffset, unsigned utf16charsOffset, + unsigned line, const Macro ¯o) { Q_ASSERT(_currentDoc); _currentDoc->addMacroUse(revision(_snapshot, macro), - offset, macro.name().length(), line, - QVector<MacroArgumentReference>()); + bytesOffset, macro.name().size(), + utf16charsOffset, macro.nameToQString().size(), + line, QVector<MacroArgumentReference>()); } -void FastPreprocessor::failedMacroDefinitionCheck(unsigned offset, const ByteArrayRef &name) +void FastPreprocessor::failedMacroDefinitionCheck(unsigned bytesOffset, unsigned utf16charsOffset, + const ByteArrayRef &name) { Q_ASSERT(_currentDoc); - _currentDoc->addUndefinedMacroUse(QByteArray(name.start(), name.size()), offset); + _currentDoc->addUndefinedMacroUse(QByteArray(name.start(), name.size()), + bytesOffset, utf16charsOffset); } -void FastPreprocessor::notifyMacroReference(unsigned offset, unsigned line, const Macro ¯o) +void FastPreprocessor::notifyMacroReference(unsigned bytesOffset, unsigned utf16charsOffset, + unsigned line, const Macro ¯o) { Q_ASSERT(_currentDoc); _currentDoc->addMacroUse(revision(_snapshot, macro), - offset, macro.name().length(), line, - QVector<MacroArgumentReference>()); + bytesOffset, macro.name().size(), + utf16charsOffset, macro.nameToQString().size(), + line, QVector<MacroArgumentReference>()); } -void FastPreprocessor::startExpandingMacro(unsigned offset, unsigned line, - const Macro ¯o, - const QVector<MacroArgumentReference> &actuals) +void FastPreprocessor::startExpandingMacro(unsigned bytesOffset, unsigned utf16charsOffset, + unsigned line, const Macro ¯o, + const QVector<MacroArgumentReference> &actuals) { Q_ASSERT(_currentDoc); _currentDoc->addMacroUse(revision(_snapshot, macro), - offset, macro.name().length(), line, actuals); + bytesOffset, macro.name().size(), + utf16charsOffset, macro.nameToQString().size(), + line, actuals); } diff --git a/src/libs/cplusplus/FastPreprocessor.h b/src/libs/cplusplus/FastPreprocessor.h index eabc4d03ded..df19c4dcdf5 100644 --- a/src/libs/cplusplus/FastPreprocessor.h +++ b/src/libs/cplusplus/FastPreprocessor.h @@ -61,13 +61,14 @@ public: virtual void macroAdded(const Macro &); - virtual void passedMacroDefinitionCheck(unsigned, unsigned, const Macro &); - virtual void failedMacroDefinitionCheck(unsigned, const ByteArrayRef &); + virtual void passedMacroDefinitionCheck(unsigned, unsigned, unsigned, const Macro &); + virtual void failedMacroDefinitionCheck(unsigned, unsigned, const ByteArrayRef &); - virtual void notifyMacroReference(unsigned, unsigned, const Macro &); + virtual void notifyMacroReference(unsigned, unsigned, unsigned, const Macro &); virtual void startExpandingMacro(unsigned, unsigned, + unsigned, const Macro &, const QVector<MacroArgumentReference> &); virtual void stopExpandingMacro(unsigned, const Macro &) {} diff --git a/src/libs/cplusplus/Macro.cpp b/src/libs/cplusplus/Macro.cpp index b957e32156f..c6b91b8b3ad 100644 --- a/src/libs/cplusplus/Macro.cpp +++ b/src/libs/cplusplus/Macro.cpp @@ -55,7 +55,8 @@ Macro::Macro() _hashcode(0), _fileRevision(0), _line(0), - _offset(0), + _bytesOffset(0), + _utf16charsOffset(0), _length(0), _state(0) { } diff --git a/src/libs/cplusplus/Macro.h b/src/libs/cplusplus/Macro.h index 0258345cfe6..01bb3dce9f1 100644 --- a/src/libs/cplusplus/Macro.h +++ b/src/libs/cplusplus/Macro.h @@ -71,6 +71,9 @@ public: QByteArray name() const { return _name; } + QString nameToQString() const + { return QString::fromUtf8(_name, _name.size()); } + void setName(const QByteArray &name) { _name = name; } @@ -107,11 +110,17 @@ public: void setLine(unsigned line) { _line = line; } - unsigned offset() const - { return _offset; } + unsigned bytesOffset() const + { return _bytesOffset; } + + void setBytesOffset(unsigned bytesOffset) + { _bytesOffset = bytesOffset; } + + unsigned utf16CharOffset() const + { return _utf16charsOffset; } - void setOffset(unsigned offset) - { _offset = offset; } + void setUtf16charOffset(unsigned utf16charOffset) + { _utf16charsOffset = utf16charOffset; } unsigned length() const { return _length; } @@ -161,7 +170,8 @@ private: unsigned _hashcode; unsigned _fileRevision; unsigned _line; - unsigned _offset; + unsigned _bytesOffset; + unsigned _utf16charsOffset; unsigned _length; union diff --git a/src/libs/cplusplus/PPToken.cpp b/src/libs/cplusplus/PPToken.cpp index fdfaacd2bdb..793dedb9b8c 100644 --- a/src/libs/cplusplus/PPToken.cpp +++ b/src/libs/cplusplus/PPToken.cpp @@ -58,5 +58,6 @@ void Internal::PPToken::squeezeSource() m_src = m_src.mid(byteOffset, f.bytes); m_src.squeeze(); byteOffset = 0; + utf16charOffset = 0; } } diff --git a/src/libs/cplusplus/PreprocessorClient.h b/src/libs/cplusplus/PreprocessorClient.h index 4802289dfa4..a990a393575 100644 --- a/src/libs/cplusplus/PreprocessorClient.h +++ b/src/libs/cplusplus/PreprocessorClient.h @@ -46,19 +46,19 @@ class Macro; class CPLUSPLUS_EXPORT MacroArgumentReference { - unsigned _position; - unsigned _length; + unsigned _utf16charsOffset; + unsigned _utf16charsLength; public: - explicit MacroArgumentReference(unsigned position = 0, unsigned length = 0) - : _position(position), _length(length) + explicit MacroArgumentReference(unsigned utf16charsOffset = 0, unsigned utf16charsLength = 0) + : _utf16charsOffset(utf16charsOffset), _utf16charsLength(utf16charsLength) { } - unsigned position() const - { return _position; } + unsigned utf16charsOffset() const + { return _utf16charsOffset; } - unsigned length() const - { return _length; } + unsigned utf16charsLength() const + { return _utf16charsLength; } }; class CPLUSPLUS_EXPORT Client @@ -79,24 +79,26 @@ public: virtual void macroAdded(const Macro ¯o) = 0; - virtual void passedMacroDefinitionCheck(unsigned offset, unsigned line, const Macro ¯o) = 0; - virtual void failedMacroDefinitionCheck(unsigned offset, const ByteArrayRef &name) = 0; + virtual void passedMacroDefinitionCheck(unsigned bytesOffset, unsigned utf16charsOffset, + unsigned line, const Macro ¯o) = 0; + virtual void failedMacroDefinitionCheck(unsigned bytesOffset, unsigned utf16charsOffset, + const ByteArrayRef &name) = 0; - virtual void notifyMacroReference(unsigned offset, unsigned line, const Macro ¯o) = 0; + virtual void notifyMacroReference(unsigned bytesOffset, unsigned utf16charsOffset, + unsigned line, const Macro ¯o) = 0; - virtual void startExpandingMacro(unsigned offset, - unsigned line, - const Macro ¯o, + virtual void startExpandingMacro(unsigned bytesOffset, unsigned utf16charsOffset, + unsigned line, const Macro ¯o, const QVector<MacroArgumentReference> &actuals = QVector<MacroArgumentReference>()) = 0; - virtual void stopExpandingMacro(unsigned offset, const Macro ¯o) = 0; + virtual void stopExpandingMacro(unsigned bytesOffset, const Macro ¯o) = 0; // TODO: ?! /// Mark the given macro name as the include guard for the current file. virtual void markAsIncludeGuard(const QByteArray ¯oName) = 0; - /// Start skipping from the given offset. - virtual void startSkippingBlocks(unsigned offset) = 0; - virtual void stopSkippingBlocks(unsigned offset) = 0; + /// Start skipping from the given utf16charsOffset. + virtual void startSkippingBlocks(unsigned utf16charsOffset) = 0; + virtual void stopSkippingBlocks(unsigned utf16charsOffset) = 0; virtual void sourceNeeded(unsigned line, const QString &fileName, IncludeType mode) = 0; diff --git a/src/libs/cplusplus/pp-engine.cpp b/src/libs/cplusplus/pp-engine.cpp index c6aabbe608e..fec9dd28067 100644 --- a/src/libs/cplusplus/pp-engine.cpp +++ b/src/libs/cplusplus/pp-engine.cpp @@ -277,7 +277,8 @@ inline bool isContinuationToken(const PPToken &tk) } Macro *macroDefinition(const ByteArrayRef &name, - unsigned offset, + unsigned bytesOffset, + unsigned utf16charsOffset, unsigned line, Environment *env, Client *client) @@ -285,9 +286,9 @@ Macro *macroDefinition(const ByteArrayRef &name, Macro *m = env->resolve(name); if (client) { if (m) - client->passedMacroDefinitionCheck(offset, line, *m); + client->passedMacroDefinitionCheck(bytesOffset, utf16charsOffset, line, *m); else - client->failedMacroDefinitionCheck(offset, name); + client->failedMacroDefinitionCheck(bytesOffset, utf16charsOffset, name); } return m; } @@ -304,6 +305,7 @@ public: { // WARN: `last' must be a valid iterator. trivial.byteOffset = last->byteOffset; + trivial.utf16charOffset = last->utf16charOffset; } inline operator bool() const @@ -422,6 +424,7 @@ protected: if ((*_lex)->is(T_IDENTIFIER)) { _value.set_long(macroDefinition(tokenSpell(), (*_lex)->byteOffset, + (*_lex)->utf16charOffset, (*_lex)->lineno, env, client) != 0); ++(*_lex); @@ -430,6 +433,7 @@ protected: if ((*_lex)->is(T_IDENTIFIER)) { _value.set_long(macroDefinition(tokenSpell(), (*_lex)->byteOffset, + (*_lex)->utf16charOffset, (*_lex)->lineno, env, client) != 0); @@ -610,7 +614,8 @@ Preprocessor::State::State() , m_markExpandedTokens(true) , m_noLines(false) , m_inCondition(false) - , m_offsetRef(0) + , m_bytesOffsetRef(0) + , m_utf16charsOffsetRef(0) , m_result(0) , m_lineRef(1) , m_currentExpansion(0) @@ -830,7 +835,9 @@ void Preprocessor::handleDefined(PPToken *tk) QByteArray result(1, '0'); const ByteArrayRef macroName = idToken.asByteArrayRef(); - if (macroDefinition(macroName, idToken.byteOffset + m_state.m_offsetRef, + if (macroDefinition(macroName, + idToken.byteOffset + m_state.m_bytesOffsetRef, + idToken.utf16charOffset + m_state.m_utf16charsOffsetRef, idToken.lineno, m_env, m_client)) { result[0] = '1'; } @@ -984,7 +991,8 @@ bool Preprocessor::handleIdentifier(PPToken *tk) if (!expandFunctionlikeMacros() // Still expand if this originally started with an object-like macro. && m_state.m_expansionStatus != Expanding) { - m_client->notifyMacroReference(m_state.m_offsetRef + idTk.byteOffset, + m_client->notifyMacroReference(m_state.m_bytesOffsetRef + idTk.byteOffset, + m_state.m_utf16charsOffsetRef + idTk.utf16charOffset, idTk.lineno, *macro); return false; @@ -1044,13 +1052,14 @@ bool Preprocessor::handleIdentifier(PPToken *tk) } else { argRefs.push_back(MacroArgumentReference( - m_state.m_offsetRef + argTks.first().bytesBegin(), - argTks.last().bytesBegin() + argTks.last().bytes() - - argTks.first().bytesBegin())); + m_state.m_utf16charsOffsetRef + argTks.first().utf16charsBegin(), + argTks.last().utf16charsBegin() + argTks.last().utf16chars() + - argTks.first().utf16charsBegin())); } } - m_client->startExpandingMacro(m_state.m_offsetRef + idTk.byteOffset, + m_client->startExpandingMacro(m_state.m_bytesOffsetRef + idTk.byteOffset, + m_state.m_utf16charsOffsetRef + idTk.utf16charOffset, idTk.lineno, *macro, argRefs); @@ -1062,7 +1071,9 @@ bool Preprocessor::handleIdentifier(PPToken *tk) return false; } } else if (m_client && !idTk.generated()) { - m_client->startExpandingMacro(m_state.m_offsetRef + idTk.byteOffset, idTk.lineno, *macro); + m_client->startExpandingMacro(m_state.m_bytesOffsetRef + idTk.byteOffset, + m_state.m_utf16charsOffsetRef + idTk.utf16charOffset, + idTk.lineno, *macro); } if (body.isEmpty()) { @@ -1379,7 +1390,8 @@ void Preprocessor::preprocess(const QString &fileName, const QByteArray &source, QByteArray *result, QByteArray *includeGuardMacroName, bool noLines, bool markGeneratedTokens, bool inCondition, - unsigned offsetRef, unsigned lineRef) + unsigned bytesOffsetRef, unsigned utf16charOffsetRef, + unsigned lineRef) { if (source.isEmpty()) return; @@ -1397,7 +1409,8 @@ void Preprocessor::preprocess(const QString &fileName, const QByteArray &source, m_state.m_noLines = noLines; m_state.m_markExpandedTokens = markGeneratedTokens; m_state.m_inCondition = inCondition; - m_state.m_offsetRef = offsetRef; + m_state.m_bytesOffsetRef = bytesOffsetRef; + m_state.m_utf16charsOffsetRef = utf16charOffsetRef; m_state.m_lineRef = lineRef; ScopedSwap<QString> savedFileName(m_env->currentFile, fileName); @@ -1638,7 +1651,8 @@ void Preprocessor::handleDefineDirective(PPToken *tk) macro.setLine(tk->lineno); QByteArray macroName = tk->asByteArrayRef().toByteArray(); macro.setName(macroName); - macro.setOffset(tk->byteOffset); + macro.setBytesOffset(tk->byteOffset); + macro.setUtf16charOffset(tk->utf16charOffset); PPToken idToken(*tk); @@ -1682,7 +1696,8 @@ void Preprocessor::handleDefineDirective(PPToken *tk) } QVector<PPToken> bodyTokens; - unsigned previousOffset = 0; + unsigned previousBytesOffset = 0; + unsigned previousUtf16charsOffset = 0; unsigned previousLine = 0; Macro *macroReference = 0; while (isContinuationToken(*tk)) { @@ -1699,17 +1714,21 @@ void Preprocessor::handleDefineDirective(PPToken *tk) macroReference = m_env->resolve(tk->asByteArrayRef()); if (macroReference) { if (!macroReference->isFunctionLike()) { - m_client->notifyMacroReference(tk->byteOffset, tk->lineno, *macroReference); + m_client->notifyMacroReference(tk->byteOffset, tk->utf16charOffset, + tk->lineno, *macroReference); macroReference = 0; } } } else if (macroReference) { - if (tk->is(T_LPAREN)) - m_client->notifyMacroReference(previousOffset, previousLine, *macroReference); + if (tk->is(T_LPAREN)) { + m_client->notifyMacroReference(previousBytesOffset, previousUtf16charsOffset, + previousLine, *macroReference); + } macroReference = 0; } - previousOffset = tk->byteOffset; + previousBytesOffset = tk->byteOffset; + previousUtf16charsOffset = tk->utf16charOffset; previousLine = tk->lineno; // Discard comments in macro definitions (keep comments flag doesn't apply here). @@ -1768,20 +1787,21 @@ void Preprocessor::handleDefineDirective(PPToken *tk) QByteArray Preprocessor::expand(PPToken *tk, PPToken *lastConditionToken) { unsigned line = tk->lineno; - unsigned begin = tk->bytesBegin(); + unsigned bytesBegin = tk->bytesBegin(); PPToken lastTk; while (isContinuationToken(*tk)) { lastTk = *tk; lex(tk); } // Gather the exact spelling of the content in the source. - QByteArray condition(m_state.m_source.mid(begin, lastTk.bytesBegin() + lastTk.bytes() - - begin)); + QByteArray condition(m_state.m_source.mid(bytesBegin, lastTk.bytesBegin() + lastTk.bytes() + - bytesBegin)); // qDebug("*** Condition before: [%s]", condition.constData()); QByteArray result; result.reserve(256); - preprocess(m_state.m_currentFileName, condition, &result, 0, true, false, true, begin, line); + preprocess(m_state.m_currentFileName, condition, &result, 0, true, false, true, + bytesBegin, tk->utf16charsBegin(), line); result.squeeze(); // qDebug("*** Condition after: [%s]", result.constData()); @@ -1855,7 +1875,7 @@ void Preprocessor::handleElifDirective(PPToken *tk, const PPToken £Token) m_state.m_trueTest[m_state.m_ifLevel] = !startSkipping; m_state.m_skipping[m_state.m_ifLevel] = startSkipping; if (m_client && !startSkipping) - m_client->stopSkippingBlocks(poundToken.byteOffset - 1); + m_client->stopSkippingBlocks(poundToken.utf16charOffset - 1); } } } @@ -1874,7 +1894,7 @@ void Preprocessor::handleElseDirective(PPToken *tk, const PPToken £Token) m_state.m_skipping[m_state.m_ifLevel] = startSkipping; if (m_client && wasSkipping && !startSkipping) - m_client->stopSkippingBlocks(poundToken.byteOffset - 1); + m_client->stopSkippingBlocks(poundToken.utf16charOffset - 1); else if (m_client && !wasSkipping && startSkipping) startSkippingBlocks(poundToken); } @@ -1900,7 +1920,7 @@ void Preprocessor::handleEndIfDirective(PPToken *tk, const PPToken £Token) m_state.m_trueTest[m_state.m_ifLevel] = false; --m_state.m_ifLevel; if (m_client && wasSkipping && !m_state.m_skipping[m_state.m_ifLevel]) - m_client->stopSkippingBlocks(poundToken.byteOffset - 1); + m_client->stopSkippingBlocks(poundToken.utf16charOffset - 1); if (m_state.m_ifLevel == 0) m_state.updateIncludeGuardState(State::IncludeGuardStateHint_Endif); @@ -1918,7 +1938,8 @@ void Preprocessor::handleIfDefDirective(bool checkUndefined, PPToken *tk) bool value = false; const ByteArrayRef macroName = tk->asByteArrayRef(); - if (Macro *macro = macroDefinition(macroName, tk->byteOffset, tk->lineno, m_env, m_client)) { + if (Macro *macro = macroDefinition(macroName, tk->byteOffset, tk->utf16charOffset, + tk->lineno, m_env, m_client)) { value = true; // the macro is a feature constraint(e.g. QT_NO_XXX) @@ -1957,17 +1978,21 @@ void Preprocessor::handleUndefDirective(PPToken *tk) lex(tk); // consume "undef" token if (tk->is(T_IDENTIFIER)) { const ByteArrayRef macroName = tk->asByteArrayRef(); - const unsigned offset = tk->byteOffset + m_state.m_offsetRef; + const unsigned bytesOffset = tk->byteOffset + m_state.m_bytesOffsetRef; + const unsigned utf16charsOffset = tk->utf16charOffset + m_state.m_utf16charsOffsetRef; // Track macro use if previously defined if (m_client) { - if (const Macro *existingMacro = m_env->resolve(macroName)) - m_client->notifyMacroReference(offset, tk->lineno, *existingMacro); + if (const Macro *existingMacro = m_env->resolve(macroName)) { + m_client->notifyMacroReference(bytesOffset, utf16charsOffset, + tk->lineno, *existingMacro); + } } synchronizeOutputLines(*tk); Macro *macro = m_env->remove(macroName); if (m_client && macro) { - macro->setOffset(offset); + macro->setBytesOffset(bytesOffset); + macro->setUtf16charOffset(utf16charsOffset); m_client->macroAdded(*macro); } lex(tk); // consume macro name @@ -2035,14 +2060,18 @@ void Preprocessor::startSkippingBlocks(const Preprocessor::PPToken &tk) const if (!m_client) return; - int iter = tk.bytesEnd(); - const QByteArray &txt = tk.source(); - for (; iter < txt.size(); ++iter) { - if (txt.at(iter) == '\n') { - m_client->startSkippingBlocks(iter + 1); + unsigned utf16charIter = tk.utf16charsEnd(); + const char *source = tk.source().constData() + tk.bytesEnd(); + const char *sourceEnd = tk.source().constEnd(); + unsigned char yychar = *source; + + do { + if (yychar == '\n') { + m_client->startSkippingBlocks(utf16charIter + 1); return; } - } + Lexer::yyinp_utf8(source, yychar, utf16charIter); + } while (source < sourceEnd); } bool Preprocessor::atStartOfOutputLine() const diff --git a/src/libs/cplusplus/pp-engine.h b/src/libs/cplusplus/pp-engine.h index 8ff8712cab4..bb920c4aee8 100644 --- a/src/libs/cplusplus/pp-engine.h +++ b/src/libs/cplusplus/pp-engine.h @@ -95,7 +95,8 @@ private: void preprocess(const QString &filename, const QByteArray &source, QByteArray *result, QByteArray *includeGuardMacroName, bool noLines, bool markGeneratedTokens, bool inCondition, - unsigned offsetRef = 0, unsigned lineRef = 1); + unsigned bytesOffsetRef = 0, unsigned utf16charOffsetRef = 0, + unsigned lineRef = 1); enum { MAX_LEVEL = 512 }; @@ -128,7 +129,8 @@ private: bool m_noLines; bool m_inCondition; - unsigned m_offsetRef; + unsigned m_bytesOffsetRef; + unsigned m_utf16charsOffsetRef; QByteArray *m_result; unsigned m_lineRef; |