C++: Core changes in preprocessing

Summary of most relevant items: - Preprocessor output format change. No more gen true/false. Instead a more intuitive and natural expansion (like from a real compiler) is performed directly corresponding to the macro invocation. Notice that information about the generated tokens is not lost, because it's now embedded in the expansion section header (in terms of lines and columns as explained in the code). In addition the location on where the macro expansion happens is also documented for future use. - Fix line control directives and associated token line numbers. This was not detected in tests cases because some of them were actually wrong: Within expansions the line information was being considered as originally computed in the macro definition, while the desired and expected for Creator's reporting mechanism (just like regular compilers) is the line from the expanded version of the tokens. - Do not allow for eager expansion. This was previously being done inside define directives. However, it's not allowed and might lead to incorrect results, since the argument substitution should only happen upon the macro invocation (and following nested ones). At least GCC and clang are consistent with that. See test case tst_Preprocessor:dont_eagerly_expand for a detailed explanation. - Revive the 'expanded' token flag. This is used to mark every token that originates from a macro expansion. Notice, however, that expanded tokens are not necessarily generated tokens (although every generated token is a expanded token). Expanded tokens that are not generated are those which are still considered by our code model features, since they are visible on the editor. The translation unit is smart enough to calculate line/column position for such tokens based on the information from the expansion section header. - How expansions are tracked has also changed. Now, we simply add two surrounding marker tokens to each "top-level" expansion sequence. There is an enumeration that control expansion states. Also, no "previous" token is kept around. - Preprocessor client methods suffered a change in signature so they now receive the line number of the action in question as a paramater. Previously such line could be retrieved by the client implementation by accessing the environment line. However, this is not reliable because we try to avoid synchronization of the output/environment lines in order to avoid unnecessary output, while expanding macros or handling preprocessor directives. - Although macros are not expanded during define directives (as mentioned above) the preprocessor client is now "notified" when it sees a macro. This is to allow usage tracking. - Other small stuff. This is all in one patch because the fixes are a consequence of the change in preprocessing control. Change-Id: I8f4c6e6366f37756ec65d0a93b79f72a3ac4ed50 Reviewed-by: Roberto Raggi <roberto.raggi@nokia.com>
author: Leandro Melo <leandro.melo@nokia.com> 2012-06-20 15:22:02 +0200
committer: Leandro Melo <leandro.melo@nokia.com> 2012-06-25 15:49:27 +0200
commit: d6ccffc06c0439f1d8248c374978f382b6bf5fe1 (patch)
tree: 3fa332e4512c11942a99e1b3d69891a9884886cd /src/libs/3rdparty/cplusplus/TranslationUnit.cpp
parent: e99c139352fe1f31fb2469c6a52ab41be610ef70 (diff)
1 files changed, 115 insertions, 19 deletions
diff --git a/src/libs/3rdparty/cplusplus/TranslationUnit.cpp b/src/libs/3rdparty/cplusplus/TranslationUnit.cpp
index 3a083d6e3f..a26a23eabc 100644
--- a/src/libs/3rdparty/cplusplus/TranslationUnit.cpp
+++ b/src/libs/3rdparty/cplusplus/TranslationUnit.cpp
@@ -27,8 +27,10 @@
 #include "Literals.h"
 #include "DiagnosticClient.h"
 #include <stack>
+#include <vector>
 #include <cstdarg>
 #include <algorithm>
+#include <utility>
 
 #ifdef _MSC_VER
 #    define va_copy(dst, src) ((dst) = (src))
@@ -176,27 +178,84 @@ void TranslationUnit::tokenize()
     pushPreprocessorLine(0, 1, fileId());
 
     const Identifier *lineId   = control()->identifier("line");
-    const Identifier *genId    = control()->identifier("gen");
+    const Identifier *expansionId = control()->identifier("expansion");
+    const Identifier *beginId = control()->identifier("begin");
+    const Identifier *endId = control()->identifier("end");
+
+    // We need to track information about the expanded tokens. A vector with an addition
+    // explicit index control is used instead of queue mainly for performance reasons.
+    std::vector<std::pair<unsigned, unsigned> > lineColumn;
+    unsigned lineColumnIdx = 0;
 
-    bool generated = false;
     Token tk;
     do {
         lex(&tk);
 
-      _Lrecognize:
+        _Lrecognize:
         if (tk.is(T_POUND) && tk.newline()) {
             unsigned offset = tk.offset;
             lex(&tk);
 
-            if (! tk.f.newline && tk.is(T_IDENTIFIER) && tk.identifier == genId) {
-                // it's a gen directive.
+            if (! tk.f.newline && tk.is(T_IDENTIFIER) && tk.identifier == expansionId) {
+                // It's an expansion mark.
                 lex(&tk);
 
-                if (! tk.f.newline && tk.is(T_TRUE)) {
-                    lex(&tk);
-                    generated = true;
-                } else {
-                    generated = false;
+                if (!tk.f.newline && tk.is(T_IDENTIFIER)) {
+                    if (tk.identifier == beginId) {
+                        // Start of a macro expansion section.
+                        lex(&tk);
+
+                        // Gather where the expansion happens and its length.
+                        unsigned macroOffset = static_cast<unsigned>(strtoul(tk.spell(), 0, 0));
+                        lex(&tk);
+                        lex(&tk); // Skip the separating comma
+                        unsigned macroLength = static_cast<unsigned>(strtoul(tk.spell(), 0, 0));
+                        lex(&tk);
+
+                        // NOTE: We are currently not using the macro offset and length. They
+                        // are kept here for now because of future use.
+                        Q_UNUSED(macroOffset)
+                        Q_UNUSED(macroLength)
+
+                        // Now we need to gather the real line and columns from the upcoming
+                        // tokens. But notice this is only relevant for tokens which are expanded
+                        // but not generated.
+                        while (tk.isNot(T_EOF_SYMBOL) && !tk.f.newline) {
+                            // When we get a ~ it means there's a number of generated tokens
+                            // following. Otherwise, we have actual data.
+                            if (tk.is(T_TILDE)) {
+                                lex(&tk);
+
+                                // Get the total number of generated tokens and specifiy "null"
+                                // information for them.
+                                unsigned totalGenerated =
+                                        static_cast<unsigned>(strtoul(tk.spell(), 0, 0));
+                                const std::size_t previousSize = lineColumn.size();
+                                lineColumn.resize(previousSize + totalGenerated);
+                                std::fill(lineColumn.begin() + previousSize,
+                                          lineColumn.end(),
+                                          std::make_pair(0, 0));
+
+                                lex(&tk);
+                            } else if (tk.is(T_NUMERIC_LITERAL)) {
+                                unsigned line = static_cast<unsigned>(strtoul(tk.spell(), 0, 0));
+                                lex(&tk);
+                                lex(&tk); // Skip the separating colon
+                                unsigned column = static_cast<unsigned>(strtoul(tk.spell(), 0, 0));
+
+                                // Store line and column for this non-generated token.
+                                lineColumn.push_back(std::make_pair(line, column));
+
+                                lex(&tk);
+                            }
+                        }
+                    } else if (tk.identifier == endId) {
+                        // End of a macro expansion.
+                        lineColumn.clear();
+                        lineColumnIdx = 0;
+
+                        lex(&tk);
+                    }
                 }
             } else {
                 if (! tk.f.newline && tk.is(T_IDENTIFIER) && tk.identifier == lineId)
@@ -211,9 +270,9 @@ void TranslationUnit::tokenize()
                         lex(&tk);
                     }
                 }
+                while (tk.isNot(T_EOF_SYMBOL) && ! tk.f.newline)
+                    lex(&tk);
             }
-            while (tk.isNot(T_EOF_SYMBOL) && ! tk.f.newline)
-                lex(&tk);
             goto _Lrecognize;
         } else if (tk.f.kind == T_LBRACE) {
             braces.push(_tokens->size());
@@ -225,7 +284,24 @@ void TranslationUnit::tokenize()
             _comments->push_back(tk);
             continue; // comments are not in the regular token stream
         }
-        tk.f.generated = generated;
+
+        bool currentExpanded = false;
+        bool currentGenerated = false;
+
+        if (!lineColumn.empty() && lineColumnIdx < lineColumn.size()) {
+            currentExpanded = true;
+            const std::pair<unsigned, unsigned> &p = lineColumn[lineColumnIdx];
+            if (p.first)
+                _expandedLineColumn.insert(std::make_pair(tk.offset, p));
+            else
+                currentGenerated = true;
+
+            ++lineColumnIdx;
+        }
+
+        tk.f.expanded = currentExpanded;
+        tk.f.generated = currentGenerated;
+
         _tokens->push_back(tk);
     } while (tk.f.kind);
 
@@ -355,12 +431,32 @@ void TranslationUnit::getPosition(unsigned tokenOffset,
                                   unsigned *column,
                                   const StringLiteral **fileName) const
 {
-    unsigned lineNumber = findLineNumber(tokenOffset);
-    unsigned columnNumber = findColumnNumber(tokenOffset, lineNumber);
-    const PPLine ppLine = findPreprocessorLine(tokenOffset);
+    unsigned lineNumber = 0;
+    unsigned columnNumber = 0;
+    const StringLiteral *file = 0;
+
+    // If this token is expanded we already have the information directly from the expansion
+    // section header. Otherwise, we need to calculate it.
+    std::map<unsigned, std::pair<unsigned, unsigned> >::const_iterator it =
+            _expandedLineColumn.find(tokenOffset);
+    if (it != _expandedLineColumn.end()) {
+        lineNumber = it->second.first;
+        columnNumber = it->second.second + 1;
+        file = _fileId;
+    } else {
+        // Identify line within the entire translation unit.
+        lineNumber = findLineNumber(tokenOffset);
+
+        // Identify column.
+        columnNumber = findColumnNumber(tokenOffset, lineNumber);
 
-    lineNumber -= findLineNumber(ppLine.offset) + 1;
-    lineNumber += ppLine.line;
+        // Adjust the line in regards to the preprocessing markers.
+        const PPLine ppLine = findPreprocessorLine(tokenOffset);
+        lineNumber -= findLineNumber(ppLine.offset) + 1;
+        lineNumber += ppLine.line;
+
+        file = ppLine.fileName;
+    }
 
     if (line)
         *line = lineNumber;
@@ -369,7 +465,7 @@ void TranslationUnit::getPosition(unsigned tokenOffset,
         *column = columnNumber;
 
     if (fileName)
-       *fileName = ppLine.fileName;
+       *fileName = file;
 }
 
 bool TranslationUnit::blockErrors(bool block)
author	Leandro Melo <leandro.melo@nokia.com>	2012-06-20 15:22:02 +0200
committer	Leandro Melo <leandro.melo@nokia.com>	2012-06-25 15:49:27 +0200
commit	d6ccffc06c0439f1d8248c374978f382b6bf5fe1 (patch)
tree	3fa332e4512c11942a99e1b3d69891a9884886cd /src/libs/3rdparty/cplusplus/TranslationUnit.cpp
parent	e99c139352fe1f31fb2469c6a52ab41be610ef70 (diff)