diff options
Diffstat (limited to 'clangd/index/SymbolCollector.cpp')
-rw-r--r-- | clangd/index/SymbolCollector.cpp | 271 |
1 files changed, 179 insertions, 92 deletions
diff --git a/clangd/index/SymbolCollector.cpp b/clangd/index/SymbolCollector.cpp index d3ce7712..af1938da 100644 --- a/clangd/index/SymbolCollector.cpp +++ b/clangd/index/SymbolCollector.cpp @@ -1,9 +1,8 @@ //===--- SymbolCollector.cpp -------------------------------------*- C++-*-===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -12,8 +11,10 @@ #include "CanonicalIncludes.h" #include "CodeComplete.h" #include "CodeCompletionStrings.h" +#include "ExpectedTypes.h" #include "Logger.h" #include "SourceCode.h" +#include "SymbolLocation.h" #include "URI.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclBase.h" @@ -23,7 +24,9 @@ #include "clang/Basic/SourceManager.h" #include "clang/Basic/Specifiers.h" #include "clang/Index/IndexSymbol.h" +#include "clang/Index/IndexingAction.h" #include "clang/Index/USRGeneration.h" +#include "clang/Lex/Preprocessor.h" #include "llvm/Support/Casting.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" @@ -126,36 +129,6 @@ bool shouldCollectIncludePath(index::SymbolKind Kind) { } } -/// Gets a canonical include (URI of the header or <header> or "header") for -/// header of \p Loc. -/// Returns None if fails to get include header for \p Loc. -llvm::Optional<std::string> -getIncludeHeader(llvm::StringRef QName, const SourceManager &SM, - SourceLocation Loc, const SymbolCollector::Options &Opts) { - std::vector<std::string> Headers; - // Collect the #include stack. - while (true) { - if (!Loc.isValid()) - break; - auto FilePath = SM.getFilename(Loc); - if (FilePath.empty()) - break; - Headers.push_back(FilePath); - if (SM.isInMainFile(Loc)) - break; - Loc = SM.getIncludeLoc(SM.getFileID(Loc)); - } - if (Headers.empty()) - return None; - llvm::StringRef Header = Headers[0]; - if (Opts.Includes) { - Header = Opts.Includes->mapHeader(Headers, QName); - if (Header.startswith("<") || Header.startswith("\"")) - return Header.str(); - } - return toURI(SM, Header, Opts); -} - // Return the symbol range of the token at \p TokLoc. std::pair<SymbolLocation::Position, SymbolLocation::Position> getTokenRange(SourceLocation TokLoc, const SourceManager &SM, @@ -210,7 +183,7 @@ getTokenLocation(SourceLocation TokLoc, const SourceManager &SM, // the first seen declaration as canonical declaration is not a good enough // heuristic. bool isPreferredDeclaration(const NamedDecl &ND, index::SymbolRoleSet Roles) { - const auto& SM = ND.getASTContext().getSourceManager(); + const auto &SM = ND.getASTContext().getSourceManager(); return (Roles & static_cast<unsigned>(index::SymbolRole::Definition)) && isa<TagDecl>(&ND) && !SM.isWrittenInMainFile(SM.getExpansionLoc(ND.getLocation())); @@ -220,13 +193,6 @@ RefKind toRefKind(index::SymbolRoleSet Roles) { return static_cast<RefKind>(static_cast<unsigned>(RefKind::All) & Roles); } -template <class T> bool explicitTemplateSpecialization(const NamedDecl &ND) { - if (const auto *TD = dyn_cast<T>(&ND)) - if (TD->getTemplateSpecializationKind() == TSK_ExplicitSpecialization) - return true; - return false; -} - } // namespace SymbolCollector::SymbolCollector(Options Opts) : Opts(std::move(Opts)) {} @@ -242,8 +208,6 @@ bool SymbolCollector::shouldCollectSymbol(const NamedDecl &ND, const ASTContext &ASTCtx, const Options &Opts, bool IsMainFileOnly) { - if (ND.isImplicit()) - return false; // Skip anonymous declarations, e.g (anonymous enum/class/struct). if (ND.getDeclName().isEmpty()) return false; @@ -278,10 +242,6 @@ bool SymbolCollector::shouldCollectSymbol(const NamedDecl &ND, if (!isa<RecordDecl>(DeclCtx)) return false; } - if (explicitTemplateSpecialization<FunctionDecl>(ND) || - explicitTemplateSpecialization<CXXRecordDecl>(ND) || - explicitTemplateSpecialization<VarDecl>(ND)) - return false; // Avoid indexing internal symbols in protobuf generated headers. if (isPrivateProtoDecl(ND)) @@ -297,6 +257,11 @@ bool SymbolCollector::handleDeclOccurence( assert(ASTCtx && PP.get() && "ASTContext and Preprocessor must be set."); assert(CompletionAllocator && CompletionTUInfo); assert(ASTNode.OrigD); + // Indexing API puts cannonical decl into D, which might not have a valid + // source location for implicit/built-in decls. Fallback to original decl in + // such cases. + if (D->getLocation().isInvalid()) + D = ASTNode.OrigD; // If OrigD is an declaration associated with a friend declaration and it's // not a definition, skip it. Note that OrigD is the occurrence that the // collector is currently visiting. @@ -304,6 +269,10 @@ bool SymbolCollector::handleDeclOccurence( Decl::FriendObjectKind::FOK_None) && !(Roles & static_cast<unsigned>(index::SymbolRole::Definition))) return true; + // Skip non-semantic references, we should start processing these when we + // decide to implement renaming with index support. + if ((Roles & static_cast<unsigned>(index::SymbolRole::NameReference))) + return true; // A declaration created for a friend declaration should not be used as the // canonical declaration in the index. Use OrigD instead, unless we've already // picked a replacement for D @@ -332,9 +301,11 @@ bool SymbolCollector::handleDeclOccurence( // ND is the canonical (i.e. first) declaration. If it's in the main file, // then no public declaration was visible, so assume it's main-file only. - bool IsMainFileOnly = SM.isWrittenInMainFile(SM.getExpansionLoc( - ND->getBeginLoc())); - if (!shouldCollectSymbol(*ND, *ASTCtx, Opts, IsMainFileOnly)) + bool IsMainFileOnly = + SM.isWrittenInMainFile(SM.getExpansionLoc(ND->getBeginLoc())); + // In C, printf is a redecl of an implicit builtin! So check OrigD instead. + if (ASTNode.OrigD->isImplicit() || + !shouldCollectSymbol(*ND, *ASTCtx, Opts, IsMainFileOnly)) return true; // Do not store references to main-file symbols. if (CollectRef && !IsMainFileOnly && !isa<NamespaceDecl>(ND) && @@ -348,19 +319,25 @@ bool SymbolCollector::handleDeclOccurence( if (!ID) return true; - const NamedDecl &OriginalDecl = *cast<NamedDecl>(ASTNode.OrigD); + // FIXME: ObjCPropertyDecl are not properly indexed here: + // - ObjCPropertyDecl may have an OrigD of ObjCPropertyImplDecl, which is + // not a NamedDecl. + auto *OriginalDecl = dyn_cast<NamedDecl>(ASTNode.OrigD); + if (!OriginalDecl) + return true; + const Symbol *BasicSymbol = Symbols.find(*ID); if (!BasicSymbol) // Regardless of role, ND is the canonical declaration. BasicSymbol = addDeclaration(*ND, std::move(*ID), IsMainFileOnly); - else if (isPreferredDeclaration(OriginalDecl, Roles)) + else if (isPreferredDeclaration(*OriginalDecl, Roles)) // If OriginalDecl is preferred, replace the existing canonical // declaration (e.g. a class forward declaration). There should be at most // one duplicate as we expect to see only one preferred declaration per // TU, because in practice they are definitions. - BasicSymbol = addDeclaration(OriginalDecl, std::move(*ID), IsMainFileOnly); + BasicSymbol = addDeclaration(*OriginalDecl, std::move(*ID), IsMainFileOnly); if (Roles & static_cast<unsigned>(index::SymbolRole::Definition)) - addDefinition(OriginalDecl, *BasicSymbol); + addDefinition(*OriginalDecl, *BasicSymbol); return true; } @@ -374,11 +351,18 @@ bool SymbolCollector::handleMacroOccurence(const IdentifierInfo *Name, const auto &SM = PP->getSourceManager(); auto DefLoc = MI->getDefinitionLoc(); - if (SM.isInMainFile(SM.getExpansionLoc(DefLoc))) + + // Builtin macros don't have useful locations and aren't needed in completion. + if (MI->isBuiltinMacro()) return true; - // Header guards are not interesting in index. Builtin macros don't have - // useful locations and are not needed for code completions. - if (MI->isUsedForHeaderGuard() || MI->isBuiltinMacro()) + + // Skip main-file symbols if we are not collecting them. + bool IsMainFileSymbol = SM.isInMainFile(SM.getExpansionLoc(DefLoc)); + if (IsMainFileSymbol && !Opts.CollectMainFileSymbols) + return false; + + // Also avoid storing predefined macros like __DBL_MIN__. + if (SM.isWrittenInBuiltinFile(DefLoc)) return true; // Mark the macro as referenced if this is a reference coming from the main @@ -405,7 +389,10 @@ bool SymbolCollector::handleMacroOccurence(const IdentifierInfo *Name, Symbol S; S.ID = std::move(*ID); S.Name = Name->getName(); - S.Flags |= Symbol::IndexedForCodeCompletion; + if (!IsMainFileSymbol) { + S.Flags |= Symbol::IndexedForCodeCompletion; + S.Flags |= Symbol::VisibleOutsideFile; + } S.SymInfo = index::getSymbolInfoForMacro(*MI); std::string FileURI; // FIXME: use the result to filter out symbols. @@ -420,22 +407,25 @@ bool SymbolCollector::handleMacroOccurence(const IdentifierInfo *Name, std::string Signature; std::string SnippetSuffix; getSignature(*CCS, &Signature, &SnippetSuffix); - - std::string Include; - if (Opts.CollectIncludePath && shouldCollectIncludePath(S.SymInfo.Kind)) { - if (auto Header = getIncludeHeader(Name->getName(), SM, - SM.getExpansionLoc(DefLoc), Opts)) - Include = std::move(*Header); - } S.Signature = Signature; S.CompletionSnippetSuffix = SnippetSuffix; - if (!Include.empty()) - S.IncludeHeaders.emplace_back(Include, 1); + IndexedMacros.insert(Name); + setIncludeLocation(S, DefLoc); Symbols.insert(S); return true; } +void SymbolCollector::setIncludeLocation(const Symbol &S, + SourceLocation Loc) { + if (Opts.CollectIncludePath) + if (shouldCollectIncludePath(S.SymInfo.Kind)) + // Use the expansion location to get the #include header since this is + // where the symbol is exposed. + IncludeFiles[S.ID] = + PP->getSourceManager().getDecomposedExpansionLoc(Loc).first; +} + void SymbolCollector::finish() { // At the end of the TU, add 1 to the refcount of all referenced symbols. auto IncRef = [this](const SymbolID &ID) { @@ -452,6 +442,14 @@ void SymbolCollector::finish() { } if (Opts.CollectMacro) { assert(PP); + // First, drop header guards. We can't identify these until EOF. + for (const IdentifierInfo *II : IndexedMacros) { + if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo()) + if (auto ID = getSymbolID(*II, MI, PP->getSourceManager())) + if (MI->isUsedForHeaderGuard()) + Symbols.erase(*ID); + } + // Now increment refcounts. for (const IdentifierInfo *II : ReferencedMacros) { if (const auto *MI = PP->getMacroDefinition(II).getMacroInfo()) if (auto ID = getSymbolID(*II, MI, PP->getSourceManager())) @@ -459,6 +457,21 @@ void SymbolCollector::finish() { } } + // Fill in IncludeHeaders. + // We delay this until end of TU so header guards are all resolved. + // Symbols in slabs aren' mutable, so insert() has to walk all the strings :-( + llvm::SmallString<256> QName; + for (const auto &Entry : IncludeFiles) + if (const Symbol *S = Symbols.find(Entry.first)) { + QName = S->Scope; + QName.append(S->Name); + if (auto Header = getIncludeHeader(QName, Entry.second)) { + Symbol NewSym = *S; + NewSym.IncludeHeaders.push_back({*Header, 1}); + Symbols.insert(NewSym); + } + } + const auto &SM = ASTCtx->getSourceManager(); llvm::DenseMap<FileID, std::string> URICache; auto GetURI = [&](FileID FID) -> llvm::Optional<std::string> { @@ -476,7 +489,7 @@ void SymbolCollector::finish() { } return Found->second; }; - + // Populate Refs slab from DeclRefs. if (auto MainFileURI = GetURI(SM.getMainFileID())) { for (const auto &It : DeclRefs) { if (auto ID = getSymbolID(It.first)) { @@ -503,10 +516,11 @@ void SymbolCollector::finish() { ReferencedMacros.clear(); DeclRefs.clear(); FilesToIndexCache.clear(); + HeaderIsSelfContainedCache.clear(); + IncludeFiles.clear(); } -const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, - SymbolID ID, +const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, SymbolID ID, bool IsMainFileOnly) { auto &Ctx = ND.getASTContext(); auto &SM = Ctx.getSourceManager(); @@ -514,9 +528,11 @@ const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, Symbol S; S.ID = std::move(ID); std::string QName = printQualifiedName(ND); - std::tie(S.Scope, S.Name) = splitQualifiedName(QName); // FIXME: this returns foo:bar: for objective-C methods, we prefer only foo: // for consistency with CodeCompletionString and a clean name/signature split. + std::tie(S.Scope, S.Name) = splitQualifiedName(QName); + std::string TemplateSpecializationArgs = printTemplateSpecializationArgs(ND); + S.TemplateSpecializationArgs = TemplateSpecializationArgs; // We collect main-file symbols, but do not use them for code completion. if (!IsMainFileOnly && isIndexedForCodeCompletion(ND, Ctx)) @@ -528,6 +544,7 @@ const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, S.SymInfo = index::getSymbolInfo(&ND); std::string FileURI; auto Loc = findNameLoc(&ND); + assert(Loc.isValid() && "Invalid source location for NamedDecl"); // FIXME: use the result to filter out symbols. shouldIndexFile(SM, SM.getFileID(Loc), Opts, &FilesToIndexCache); if (auto DeclLoc = @@ -550,18 +567,13 @@ const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, std::string Documentation = formatDocumentation(*CCS, getDocComment(Ctx, SymbolCompletion, /*CommentsFromHeaders=*/true)); - // For symbols not indexed for completion (class members), we also store their - // docs in the index, because Sema doesn't load the docs from the preamble, we - // rely on the index to get the docs. - // FIXME: this can be optimized by only storing the docs in dynamic index -- - // dynamic index should index these symbols when Sema completes a member - // completion. - S.Documentation = Documentation; if (!(S.Flags & Symbol::IndexedForCodeCompletion)) { + if (Opts.StoreAllDocumentation) + S.Documentation = Documentation; Symbols.insert(S); return Symbols.find(S.ID); } - + S.Documentation = Documentation; std::string Signature; std::string SnippetSuffix; getSignature(*CCS, &Signature, &SnippetSuffix); @@ -570,17 +582,6 @@ const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, std::string ReturnType = getReturnType(*CCS); S.ReturnType = ReturnType; - std::string Include; - if (Opts.CollectIncludePath && shouldCollectIncludePath(S.SymInfo.Kind)) { - // Use the expansion location to get the #include header since this is - // where the symbol is exposed. - if (auto Header = getIncludeHeader( - QName, SM, SM.getExpansionLoc(ND.getLocation()), Opts)) - Include = std::move(*Header); - } - if (!Include.empty()) - S.IncludeHeaders.emplace_back(Include, 1); - llvm::Optional<OpaqueType> TypeStorage; if (S.Flags & Symbol::IndexedForCodeCompletion) { TypeStorage = OpaqueType::fromCompletionResult(*ASTCtx, SymbolCompletion); @@ -589,6 +590,7 @@ const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, } Symbols.insert(S); + setIncludeLocation(S, ND.getLocation()); return Symbols.find(S.ID); } @@ -611,5 +613,90 @@ void SymbolCollector::addDefinition(const NamedDecl &ND, Symbols.insert(S); } +/// Gets a canonical include (URI of the header or <header> or "header") for +/// header of \p FID (which should usually be the *expansion* file). +/// Returns None if includes should not be inserted for this file. +llvm::Optional<std::string> +SymbolCollector::getIncludeHeader(llvm::StringRef QName, FileID FID) { + const SourceManager &SM = ASTCtx->getSourceManager(); + const FileEntry *FE = SM.getFileEntryForID(FID); + if (!FE || FE->getName().empty()) + return llvm::None; + llvm::StringRef Filename = FE->getName(); + // If a file is mapped by canonical headers, use that mapping, regardless + // of whether it's an otherwise-good header (header guards etc). + if (Opts.Includes) { + llvm::StringRef Canonical = Opts.Includes->mapHeader(Filename, QName); + // If we had a mapping, always use it. + if (Canonical.startswith("<") || Canonical.startswith("\"")) + return Canonical.str(); + if (Canonical != Filename) + return toURI(SM, Canonical, Opts); + } + if (!isSelfContainedHeader(FID)) { + // A .inc or .def file is often included into a real header to define + // symbols (e.g. LLVM tablegen files). + if (Filename.endswith(".inc") || Filename.endswith(".def")) + return getIncludeHeader(QName, SM.getFileID(SM.getIncludeLoc(FID))); + // Conservatively refuse to insert #includes to files without guards. + return llvm::None; + } + // Standard case: just insert the file itself. + return toURI(SM, Filename, Opts); +} + +bool SymbolCollector::isSelfContainedHeader(FileID FID) { + // The real computation (which will be memoized). + auto Compute = [&] { + const SourceManager &SM = ASTCtx->getSourceManager(); + const FileEntry *FE = SM.getFileEntryForID(FID); + if (!FE) + return false; + if (!PP->getHeaderSearchInfo().isFileMultipleIncludeGuarded(FE)) + return false; + // This pattern indicates that a header can't be used without + // particular preprocessor state, usually set up by another header. + if (isDontIncludeMeHeader(SM.getBufferData(FID))) + return false; + return true; + }; + + auto R = HeaderIsSelfContainedCache.try_emplace(FID, false); + if (R.second) + R.first->second = Compute(); + return R.first->second; +} + +// Is Line an #if or #ifdef directive? +static bool isIf(llvm::StringRef Line) { + Line = Line.ltrim(); + if (!Line.consume_front("#")) + return false; + Line = Line.ltrim(); + return Line.startswith("if"); +} +// Is Line an #error directive mentioning includes? +static bool isErrorAboutInclude(llvm::StringRef Line) { + Line = Line.ltrim(); + if (!Line.consume_front("#")) + return false; + Line = Line.ltrim(); + if (! Line.startswith("error")) + return false; + return Line.contains_lower("includ"); // Matches "include" or "including". +} + +bool SymbolCollector::isDontIncludeMeHeader(llvm::StringRef Content) { + llvm::StringRef Line; + // Only sniff up to 100 lines or 10KB. + Content = Content.take_front(100*100); + for (unsigned I = 0; I < 100 && !Content.empty(); ++I) { + std::tie(Line, Content) = Content.split('\n'); + if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first)) + return true; + } + return false; +} + } // namespace clangd } // namespace clang |