// Copyright (c) 2008 Roberto Raggi // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. #include "TranslationUnit.h" #include "Control.h" #include "Parser.h" #include "Lexer.h" #include "MemoryPool.h" #include "AST.h" #include "Literals.h" #include "DiagnosticClient.h" #include #include #include #include #include #if defined(__INTEL_COMPILER) && !defined(va_copy) # define va_copy __va_copy #endif using namespace CPlusPlus; const Token TranslationUnit::nullToken; TranslationUnit::TranslationUnit(Control *control, const StringLiteral *fileId) : _control(control), _fileId(fileId), _firstSourceChar(0), _lastSourceChar(0), _pool(0), _ast(0), _flags(0) { _tokens = new std::vector(); _comments = new std::vector(); _previousTranslationUnit = control->switchTranslationUnit(this); _pool = new MemoryPool(); } TranslationUnit::~TranslationUnit() { (void) _control->switchTranslationUnit(_previousTranslationUnit); release(); } Control *TranslationUnit::control() const { return _control; } const StringLiteral *TranslationUnit::fileId() const { return _fileId; } const char *TranslationUnit::fileName() const { return _fileId->chars(); } unsigned TranslationUnit::fileNameLength() const { return _fileId->size(); } const char *TranslationUnit::firstSourceChar() const { return _firstSourceChar; } const char *TranslationUnit::lastSourceChar() const { return _lastSourceChar; } unsigned TranslationUnit::sourceLength() const { return _lastSourceChar - _firstSourceChar; } void TranslationUnit::setSource(const char *source, unsigned size) { _firstSourceChar = source; _lastSourceChar = source + size; } const char *TranslationUnit::spell(unsigned index) const { if (! index) return 0; return tokenAt(index).spell(); } unsigned TranslationUnit::commentCount() const { return unsigned(_comments->size()); } const Token &TranslationUnit::commentAt(unsigned index) const { return _comments->at(index); } const Identifier *TranslationUnit::identifier(unsigned index) const { return tokenAt(index).identifier; } const Literal *TranslationUnit::literal(unsigned index) const { return tokenAt(index).literal; } const StringLiteral *TranslationUnit::stringLiteral(unsigned index) const { return tokenAt(index).string; } const NumericLiteral *TranslationUnit::numericLiteral(unsigned index) const { return tokenAt(index).number; } unsigned TranslationUnit::matchingBrace(unsigned index) const { return tokenAt(index).close_brace; } MemoryPool *TranslationUnit::memoryPool() const { return _pool; } AST *TranslationUnit::ast() const { return _ast; } bool TranslationUnit::isTokenized() const { return f._tokenized; } bool TranslationUnit::isParsed() const { return f._parsed; } void TranslationUnit::tokenize() { if (isTokenized()) return; f._tokenized = true; Lexer lex(this); lex.setLanguageFeatures(_languageFeatures); lex.setScanCommentTokens(true); std::stack braces; _tokens->push_back(nullToken); // the first token needs to be invalid! pushLineOffset(0); pushPreprocessorLine(0, 1, fileId()); const Identifier *lineId = control()->identifier("line"); const Identifier *expansionId = control()->identifier("expansion"); const Identifier *beginId = control()->identifier("begin"); const Identifier *endId = control()->identifier("end"); // We need to track information about the expanded tokens. A vector with an addition // explicit index control is used instead of queue mainly for performance reasons. std::vector > lineColumn; unsigned lineColumnIdx = 0; Token tk; do { lex(&tk); recognize: if (tk.is(T_POUND) && tk.newline()) { const unsigned utf16CharOffset = tk.utf16charOffset; lex(&tk); if (! tk.newline() && tk.is(T_IDENTIFIER) && tk.identifier == expansionId) { // It's an expansion mark. lex(&tk); if (!tk.newline() && tk.is(T_IDENTIFIER)) { if (tk.identifier == beginId) { // Start of a macro expansion section. lex(&tk); // Gather where the expansion happens and its length. //unsigned macroOffset = static_cast(strtoul(tk.spell(), 0, 0)); lex(&tk); lex(&tk); // Skip the separating comma //unsigned macroLength = static_cast(strtoul(tk.spell(), 0, 0)); lex(&tk); // NOTE: We are currently not using the macro offset and length. They // are kept here for now because of future use. //Q_UNUSED(macroOffset) //Q_UNUSED(macroLength) // Now we need to gather the real line and columns from the upcoming // tokens. But notice this is only relevant for tokens which are expanded // but not generated. while (tk.isNot(T_EOF_SYMBOL) && !tk.newline()) { // When we get a ~ it means there's a number of generated tokens // following. Otherwise, we have actual data. if (tk.is(T_TILDE)) { lex(&tk); // Get the total number of generated tokens and specify "null" // information for them. unsigned totalGenerated = static_cast(strtoul(tk.spell(), 0, 0)); const std::size_t previousSize = lineColumn.size(); lineColumn.resize(previousSize + totalGenerated); std::fill(lineColumn.begin() + previousSize, lineColumn.end(), std::make_pair(0, 0)); lex(&tk); } else if (tk.is(T_NUMERIC_LITERAL)) { unsigned line = static_cast(strtoul(tk.spell(), 0, 0)); lex(&tk); lex(&tk); // Skip the separating colon unsigned column = static_cast(strtoul(tk.spell(), 0, 0)); // Store line and column for this non-generated token. lineColumn.push_back(std::make_pair(line, column)); lex(&tk); } } } else if (tk.identifier == endId) { // End of a macro expansion. lineColumn.clear(); lineColumnIdx = 0; lex(&tk); } } } else { if (! tk.newline() && tk.is(T_IDENTIFIER) && tk.identifier == lineId) lex(&tk); if (! tk.newline() && tk.is(T_NUMERIC_LITERAL)) { unsigned line = (unsigned) strtoul(tk.spell(), 0, 0); lex(&tk); if (! tk.newline() && tk.is(T_STRING_LITERAL)) { const StringLiteral *fileName = control()->stringLiteral(tk.string->chars(), tk.string->size()); pushPreprocessorLine(utf16CharOffset, line, fileName); lex(&tk); } } while (tk.isNot(T_EOF_SYMBOL) && ! tk.newline()) lex(&tk); } goto recognize; } else if (tk.kind() == T_LBRACE) { braces.push(unsigned(_tokens->size())); } else if (tk.kind() == T_RBRACE && ! braces.empty()) { const unsigned open_brace_index = braces.top(); braces.pop(); if (open_brace_index < tokenCount()) (*_tokens)[open_brace_index].close_brace = unsigned(_tokens->size()); } else if (tk.isComment()) { _comments->push_back(tk); continue; // comments are not in the regular token stream } bool currentExpanded = false; bool currentGenerated = false; if (!lineColumn.empty() && lineColumnIdx < lineColumn.size()) { currentExpanded = true; const std::pair &p = lineColumn[lineColumnIdx]; if (p.first) _expandedLineColumn.insert(std::make_pair(tk.utf16charsBegin(), p)); else currentGenerated = true; ++lineColumnIdx; } tk.f.expanded = currentExpanded; tk.f.generated = currentGenerated; _tokens->push_back(tk); } while (tk.kind()); for (; ! braces.empty(); braces.pop()) { unsigned open_brace_index = braces.top(); (*_tokens)[open_brace_index].close_brace = unsigned(_tokens->size()); } } bool TranslationUnit::skipFunctionBody() const { return f._skipFunctionBody; } void TranslationUnit::setSkipFunctionBody(bool skipFunctionBody) { f._skipFunctionBody = skipFunctionBody; } bool TranslationUnit::parse(ParseMode mode) { if (isParsed()) return false; if (! isTokenized()) tokenize(); f._parsed = true; Parser parser(this); bool parsed = false; switch (mode) { case ParseTranlationUnit: { TranslationUnitAST *node = 0; parsed = parser.parseTranslationUnit(node); _ast = node; } break; case ParseDeclaration: { DeclarationAST *node = 0; parsed = parser.parseDeclaration(node); _ast = node; } break; case ParseExpression: { ExpressionAST *node = 0; parsed = parser.parseExpression(node); _ast = node; } break; case ParseDeclarator: { DeclaratorAST *node = 0; parsed = parser.parseDeclarator(node, /*decl_specifier_list =*/ 0); _ast = node; } break; case ParseStatement: { StatementAST *node = 0; parsed = parser.parseStatement(node); _ast = node; } break; default: break; } // switch return parsed; } void TranslationUnit::pushLineOffset(unsigned offset) { _lineOffsets.push_back(offset); } void TranslationUnit::pushPreprocessorLine(unsigned utf16charOffset, unsigned line, const StringLiteral *fileName) { _ppLines.push_back(PPLine(utf16charOffset, line, fileName)); } unsigned TranslationUnit::findLineNumber(unsigned utf16charOffset) const { std::vector::const_iterator it = std::lower_bound(_lineOffsets.begin(), _lineOffsets.end(), utf16charOffset); if (it != _lineOffsets.begin()) --it; return it - _lineOffsets.begin(); } TranslationUnit::PPLine TranslationUnit::findPreprocessorLine(unsigned utf16charOffset) const { std::vector::const_iterator it = std::lower_bound(_ppLines.begin(), _ppLines.end(), PPLine(utf16charOffset)); if (it != _ppLines.begin()) --it; return *it; } unsigned TranslationUnit::findColumnNumber(unsigned utf16CharOffset, unsigned lineNumber) const { if (! utf16CharOffset) return 0; return utf16CharOffset - _lineOffsets[lineNumber]; } void TranslationUnit::getTokenPosition(unsigned index, unsigned *line, unsigned *column, const StringLiteral **fileName) const { return getPosition(tokenAt(index).utf16charsBegin(), line, column, fileName); } void TranslationUnit::getTokenStartPosition(unsigned index, unsigned *line, unsigned *column, const StringLiteral **fileName) const { return getPosition(tokenAt(index).utf16charsBegin(), line, column, fileName); } void TranslationUnit::getTokenEndPosition(unsigned index, unsigned *line, unsigned *column, const StringLiteral **fileName) const { return getPosition(tokenAt(index).utf16charsEnd(), line, column, fileName); } void TranslationUnit::getPosition(unsigned utf16charOffset, unsigned *line, unsigned *column, const StringLiteral **fileName) const { unsigned lineNumber = 0; unsigned columnNumber = 0; const StringLiteral *file = 0; // If this token is expanded we already have the information directly from the expansion // section header. Otherwise, we need to calculate it. TokenLineColumn::const_iterator it = _expandedLineColumn.find(utf16charOffset); if (it != _expandedLineColumn.end()) { lineNumber = it->second.first; columnNumber = it->second.second + 1; file = _fileId; } else { // Identify line within the entire translation unit. lineNumber = findLineNumber(utf16charOffset); // Identify column. columnNumber = findColumnNumber(utf16charOffset, lineNumber); // Adjust the line in regards to the preprocessing markers. const PPLine ppLine = findPreprocessorLine(utf16charOffset); lineNumber -= findLineNumber(ppLine.utf16charOffset) + 1; lineNumber += ppLine.line; file = ppLine.fileName; } if (line) *line = lineNumber; if (column) *column = columnNumber; if (fileName) *fileName = file; } void TranslationUnit::message(DiagnosticClient::Level level, unsigned index, const char *format, va_list args) { if (f._blockErrors) return; index = std::min(index, tokenCount() - 1); unsigned line = 0, column = 0; const StringLiteral *fileName = 0; getTokenPosition(index, &line, &column, &fileName); if (DiagnosticClient *client = control()->diagnosticClient()) client->report(level, fileName, line, column, format, args); } void TranslationUnit::warning(unsigned index, const char *format, ...) { if (f._blockErrors) return; va_list args, ap; va_start(args, format); va_copy(ap, args); message(DiagnosticClient::Warning, index, format, args); va_end(ap); va_end(args); } void TranslationUnit::error(unsigned index, const char *format, ...) { if (f._blockErrors) return; va_list args, ap; va_start(args, format); va_copy(ap, args); message(DiagnosticClient::Error, index, format, args); va_end(ap); va_end(args); } void TranslationUnit::fatal(unsigned index, const char *format, ...) { if (f._blockErrors) return; va_list args, ap; va_start(args, format); va_copy(ap, args); message(DiagnosticClient::Fatal, index, format, args); va_end(ap); va_end(args); } unsigned TranslationUnit::findPreviousLineOffset(unsigned tokenIndex) const { unsigned lineOffset = _lineOffsets[findLineNumber(tokenAt(tokenIndex).utf16charsBegin())]; return lineOffset; } bool TranslationUnit::maybeSplitGreaterGreaterToken(unsigned tokenIndex) { if (tokenIndex >= tokenCount()) return false; Token &tok = (*_tokens)[tokenIndex]; if (tok.kind() != T_GREATER_GREATER) return false; tok.f.kind = T_GREATER; tok.f.bytes = 1; tok.f.utf16chars = 1; Token newGreater; newGreater.f.kind = T_GREATER; newGreater.f.expanded = tok.expanded(); newGreater.f.generated = tok.generated(); newGreater.f.bytes = 1; newGreater.f.utf16chars = 1; newGreater.byteOffset = tok.byteOffset + 1; newGreater.utf16charOffset = tok.utf16charOffset + 1; TokenLineColumn::const_iterator it = _expandedLineColumn.find(tok.bytesBegin()); _tokens->insert(_tokens->begin() + tokenIndex + 1, newGreater); if (it != _expandedLineColumn.end()) { const std::pair newPosition(it->second.first, it->second.second + 1); _expandedLineColumn.insert(std::make_pair(newGreater.bytesBegin(), newPosition)); } return true; } void TranslationUnit::releaseTokensAndComments() { delete _tokens; _tokens = 0; delete _comments; _comments = 0; } void TranslationUnit::resetAST() { delete _pool; _pool = 0; _ast = 0; } void TranslationUnit::release() { resetAST(); releaseTokensAndComments(); }