summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKadir Cetinkaya <kadircet@google.com>2019-01-10 17:03:04 +0000
committerKadir Cetinkaya <kadircet@google.com>2019-01-10 17:03:04 +0000
commit49e26a148ee0cf103a8de81f3fa7b91e146231d4 (patch)
tree7b5d4f150fff62ebaaf8862a1accef70137f1db5
parentb6a07261c461a738f1bdbd5eba3450badfc60d77 (diff)
[clangd] Introduce loading of shards within auto-index
Summary: Whenever a change happens on a CDB, load shards associated with that CDB before issuing re-index actions. Reviewers: ilya-biryukov Reviewed By: ilya-biryukov Subscribers: ioeric, MaskRay, jkorous, arphaman, cfe-commits Differential Revision: https://reviews.llvm.org/D55224 git-svn-id: https://llvm.org/svn/llvm-project/clang-tools-extra/trunk@350847 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--clangd/index/Background.cpp268
-rw-r--r--clangd/index/Background.h16
-rw-r--r--test/clangd/background-index.test3
-rw-r--r--unittests/clangd/BackgroundIndexTests.cpp81
4 files changed, 298 insertions, 70 deletions
diff --git a/clangd/index/Background.cpp b/clangd/index/Background.cpp
index 6a3eace6..88e76777 100644
--- a/clangd/index/Background.cpp
+++ b/clangd/index/Background.cpp
@@ -115,6 +115,19 @@ createFileFilter(const llvm::StringMap<FileDigest> &FileDigests,
};
}
+// We cannot use vfs->makeAbsolute because Cmd.FileName is either absolute or
+// relative to Cmd.Directory, which might not be the same as current working
+// directory.
+llvm::SmallString<128> getAbsolutePath(const tooling::CompileCommand &Cmd) {
+ llvm::SmallString<128> AbsolutePath;
+ if (llvm::sys::path::is_absolute(Cmd.Filename)) {
+ AbsolutePath = Cmd.Filename;
+ } else {
+ AbsolutePath = Cmd.Directory;
+ llvm::sys::path::append(AbsolutePath, Cmd.Filename);
+ }
+ return AbsolutePath;
+}
} // namespace
BackgroundIndex::BackgroundIndex(
@@ -204,40 +217,33 @@ void BackgroundIndex::enqueue(const std::vector<std::string> &ChangedFiles) {
[this, ChangedFiles] {
trace::Span Tracer("BackgroundIndexEnqueue");
// We're doing this asynchronously, because we'll read shards here too.
- // FIXME: read shards here too.
-
log("Enqueueing {0} commands for indexing", ChangedFiles.size());
SPAN_ATTACH(Tracer, "files", int64_t(ChangedFiles.size()));
- // We shuffle the files because processing them in a random order should
- // quickly give us good coverage of headers in the project.
- std::vector<unsigned> Permutation(ChangedFiles.size());
- std::iota(Permutation.begin(), Permutation.end(), 0);
- std::mt19937 Generator(std::random_device{}());
- std::shuffle(Permutation.begin(), Permutation.end(), Generator);
-
- for (const unsigned I : Permutation)
- enqueue(ChangedFiles[I]);
+ auto NeedsReIndexing = loadShards(std::move(ChangedFiles));
+ // Run indexing for files that need to be updated.
+ std::shuffle(NeedsReIndexing.begin(), NeedsReIndexing.end(),
+ std::mt19937(std::random_device{}()));
+ for (auto &Elem : NeedsReIndexing)
+ enqueue(std::move(Elem.first), Elem.second);
},
ThreadPriority::Normal);
}
-void BackgroundIndex::enqueue(const std::string &File) {
- ProjectInfo Project;
- if (auto Cmd = CDB.getCompileCommand(File, &Project)) {
- auto *Storage = IndexStorageFactory(Project.SourceRoot);
- // Set priority to low, since background indexing is a long running
- // task we do not want to eat up cpu when there are any other high
- // priority threads.
- enqueueTask(Bind(
- [this, File, Storage](tooling::CompileCommand Cmd) {
- Cmd.CommandLine.push_back("-resource-dir=" + ResourceDir);
- if (auto Error = index(std::move(Cmd), Storage))
- log("Indexing {0} failed: {1}", File, std::move(Error));
- },
- std::move(*Cmd)),
- ThreadPriority::Low);
- }
+void BackgroundIndex::enqueue(tooling::CompileCommand Cmd,
+ BackgroundIndexStorage *Storage) {
+ enqueueTask(Bind(
+ [this, Storage](tooling::CompileCommand Cmd) {
+ Cmd.CommandLine.push_back("-resource-dir=" + ResourceDir);
+ // We can't use llvm::StringRef here since we are going to
+ // move from Cmd during the call below.
+ const std::string FileName = Cmd.Filename;
+ if (auto Error = index(std::move(Cmd), Storage))
+ elog("Indexing {0} failed: {1}", FileName,
+ std::move(Error));
+ },
+ std::move(Cmd)),
+ ThreadPriority::Low);
}
void BackgroundIndex::enqueueTask(Task T, ThreadPriority Priority) {
@@ -299,22 +305,22 @@ void BackgroundIndex::update(llvm::StringRef MainFile, IndexFileIn Index,
}
// Build and store new slabs for each updated file.
- for (const auto &F : Files) {
- llvm::StringRef Path = F.first();
- vlog("Update symbols in {0}", Path);
+ for (const auto &I : *Index.Sources) {
+ std::string Path = URICache.resolve(I.first());
SymbolSlab::Builder Syms;
RefSlab::Builder Refs;
- for (const auto *S : F.second.Symbols)
- Syms.insert(*S);
- for (const auto *R : F.second.Refs)
- Refs.insert(RefToIDs[R], *R);
-
+ auto FileIt = Files.find(Path);
+ if (FileIt != Files.end()) {
+ auto &F = *FileIt;
+ for (const auto *S : F.second.Symbols)
+ Syms.insert(*S);
+ for (const auto *R : F.second.Refs)
+ Refs.insert(RefToIDs[R], *R);
+ }
auto SS = llvm::make_unique<SymbolSlab>(std::move(Syms).build());
auto RS = llvm::make_unique<RefSlab>(std::move(Refs).build());
auto IG = llvm::make_unique<IncludeGraph>(
getSubGraph(URI::create(Path), Index.Sources.getValue()));
-
- auto Hash = FilesToUpdate.lookup(Path);
// We need to store shards before updating the index, since the latter
// consumes slabs.
if (IndexStorage) {
@@ -327,13 +333,19 @@ void BackgroundIndex::update(llvm::StringRef MainFile, IndexFileIn Index,
elog("Failed to write background-index shard for file {0}: {1}", Path,
std::move(Error));
}
-
- std::lock_guard<std::mutex> Lock(DigestsMu);
- // This can override a newer version that is added in another thread,
- // if this thread sees the older version but finishes later. This should be
- // rare in practice.
- IndexedFileDigests[Path] = Hash;
- IndexedSymbols.update(Path, std::move(SS), std::move(RS));
+ {
+ std::lock_guard<std::mutex> Lock(DigestsMu);
+ auto Hash = I.second.Digest;
+ // Skip if file is already up to date.
+ auto DigestIt = IndexedFileDigests.try_emplace(Path);
+ if (!DigestIt.second && DigestIt.first->second == Hash)
+ continue;
+ DigestIt.first->second = Hash;
+ // This can override a newer version that is added in another thread, if
+ // this thread sees the older version but finishes later. This should be
+ // rare in practice.
+ IndexedSymbols.update(Path, std::move(SS), std::move(RS));
+ }
}
}
@@ -342,11 +354,11 @@ void BackgroundIndex::buildIndex() {
while (true) {
{
std::unique_lock<std::mutex> Lock(IndexMu);
- if (ShouldStop) // Avoid waiting if stopped.
+ if (ShouldStop) // Avoid waiting if stopped.
break;
// Wait until this is notified to stop or `BuildIndexPeriodMs` has past.
IndexCV.wait_for(Lock, std::chrono::milliseconds(BuildIndexPeriodMs));
- if (ShouldStop) // Avoid rebuilding index if stopped.
+ if (ShouldStop) // Avoid rebuilding index if stopped.
break;
}
if (!SymbolsUpdatedSinceLastIndex.exchange(false))
@@ -365,13 +377,7 @@ llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd,
BackgroundIndexStorage *IndexStorage) {
trace::Span Tracer("BackgroundIndex");
SPAN_ATTACH(Tracer, "file", Cmd.Filename);
- llvm::SmallString<128> AbsolutePath;
- if (llvm::sys::path::is_absolute(Cmd.Filename)) {
- AbsolutePath = Cmd.Filename;
- } else {
- AbsolutePath = Cmd.Directory;
- llvm::sys::path::append(AbsolutePath, Cmd.Filename);
- }
+ auto AbsolutePath = getAbsolutePath(Cmd);
auto FS = FSProvider.getFileSystem();
auto Buf = FS->getBufferForFile(AbsolutePath);
@@ -383,11 +389,6 @@ llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd,
llvm::StringMap<FileDigest> DigestsSnapshot;
{
std::lock_guard<std::mutex> Lock(DigestsMu);
- if (IndexedFileDigests.lookup(AbsolutePath) == Hash) {
- vlog("No need to index {0}, already up to date", AbsolutePath);
- return llvm::Error::success();
- }
-
DigestsSnapshot = IndexedFileDigests;
}
@@ -437,8 +438,8 @@ llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd,
"IndexingAction failed: has uncompilable errors");
}
- assert(Index.Symbols && Index.Refs && Index.Sources
- && "Symbols, Refs and Sources must be set.");
+ assert(Index.Symbols && Index.Refs && Index.Sources &&
+ "Symbols, Refs and Sources must be set.");
log("Indexed {0} ({1} symbols, {2} refs, {3} files)",
Inputs.CompileCommand.Filename, Index.Symbols->size(),
@@ -448,12 +449,6 @@ llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd,
SPAN_ATTACH(Tracer, "sources", int(Index.Sources->size()));
update(AbsolutePath, std::move(Index), FilesToUpdate, IndexStorage);
- {
- // Make sure hash for the main file is always updated even if there is no
- // index data in it.
- std::lock_guard<std::mutex> Lock(DigestsMu);
- IndexedFileDigests[AbsolutePath] = Hash;
- }
if (BuildIndexPeriodMs > 0)
SymbolsUpdatedSinceLastIndex = true;
@@ -464,5 +459,146 @@ llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd,
return llvm::Error::success();
}
+std::vector<BackgroundIndex::Source>
+BackgroundIndex::loadShard(const tooling::CompileCommand &Cmd,
+ BackgroundIndexStorage *IndexStorage,
+ llvm::StringSet<> &LoadedShards) {
+ struct ShardInfo {
+ std::string AbsolutePath;
+ std::unique_ptr<IndexFileIn> Shard;
+ FileDigest Digest;
+ };
+ std::vector<ShardInfo> IntermediateSymbols;
+ // Make sure we don't have duplicate elements in the queue. Keys are absolute
+ // paths.
+ llvm::StringSet<> InQueue;
+ auto FS = FSProvider.getFileSystem();
+ // Dependencies of this TU, paired with the information about whether they
+ // need to be re-indexed or not.
+ std::vector<Source> Dependencies;
+ std::string AbsolutePath = getAbsolutePath(Cmd).str();
+ // Up until we load the shard related to a dependency it needs to be
+ // re-indexed.
+ Dependencies.emplace_back(AbsolutePath, true);
+ InQueue.insert(AbsolutePath);
+ // Goes over each dependency.
+ for (size_t CurrentDependency = 0; CurrentDependency < Dependencies.size();
+ CurrentDependency++) {
+ llvm::StringRef CurDependencyPath = Dependencies[CurrentDependency].Path;
+ // If we have already seen this shard before(either loaded or failed) don't
+ // re-try again. Since the information in the shard won't change from one TU
+ // to another.
+ if (!LoadedShards.try_emplace(CurDependencyPath).second) {
+ // If the dependency needs to be re-indexed, first occurence would already
+ // have detected that, so we don't need to issue it again.
+ Dependencies[CurrentDependency].NeedsReIndexing = false;
+ continue;
+ }
+
+ auto Shard = IndexStorage->loadShard(CurDependencyPath);
+ if (!Shard || !Shard->Sources) {
+ // File will be returned as requiring re-indexing to caller.
+ vlog("Failed to load shard: {0}", CurDependencyPath);
+ continue;
+ }
+ // These are the edges in the include graph for current dependency.
+ for (const auto &I : *Shard->Sources) {
+ auto U = URI::parse(I.getKey());
+ if (!U)
+ continue;
+ auto AbsolutePath = URI::resolve(*U, CurDependencyPath);
+ if (!AbsolutePath)
+ continue;
+ // Add file as dependency if haven't seen before.
+ if (InQueue.try_emplace(*AbsolutePath).second)
+ Dependencies.emplace_back(*AbsolutePath, true);
+ // The node contains symbol information only for current file, the rest is
+ // just edges.
+ if (*AbsolutePath != CurDependencyPath)
+ continue;
+
+ // We found source file info for current dependency.
+ assert(I.getValue().Digest != FileDigest{{0}} && "Digest is empty?");
+ ShardInfo SI;
+ SI.AbsolutePath = CurDependencyPath;
+ SI.Shard = std::move(Shard);
+ SI.Digest = I.getValue().Digest;
+ IntermediateSymbols.push_back(std::move(SI));
+ // Check if the source needs re-indexing.
+ // Get the digest, skip it if file doesn't exist.
+ auto Buf = FS->getBufferForFile(CurDependencyPath);
+ if (!Buf) {
+ elog("Couldn't get buffer for file: {0}: {1}", CurDependencyPath,
+ Buf.getError().message());
+ continue;
+ }
+ // If digests match then dependency doesn't need re-indexing.
+ Dependencies[CurrentDependency].NeedsReIndexing =
+ digest(Buf->get()->getBuffer()) != I.getValue().Digest;
+ }
+ }
+ // Load shard information into background-index.
+ {
+ std::lock_guard<std::mutex> Lock(DigestsMu);
+ // This can override a newer version that is added in another thread,
+ // if this thread sees the older version but finishes later. This
+ // should be rare in practice.
+ for (const ShardInfo &SI : IntermediateSymbols) {
+ auto SS =
+ SI.Shard->Symbols
+ ? llvm::make_unique<SymbolSlab>(std::move(*SI.Shard->Symbols))
+ : nullptr;
+ auto RS = SI.Shard->Refs
+ ? llvm::make_unique<RefSlab>(std::move(*SI.Shard->Refs))
+ : nullptr;
+ IndexedFileDigests[SI.AbsolutePath] = SI.Digest;
+ IndexedSymbols.update(SI.AbsolutePath, std::move(SS), std::move(RS));
+ }
+ }
+
+ return Dependencies;
+}
+
+// Goes over each changed file and loads them from index. Returns the list of
+// TUs that had out-of-date/no shards.
+std::vector<std::pair<tooling::CompileCommand, BackgroundIndexStorage *>>
+BackgroundIndex::loadShards(std::vector<std::string> ChangedFiles) {
+ std::vector<std::pair<tooling::CompileCommand, BackgroundIndexStorage *>>
+ NeedsReIndexing;
+ // Keeps track of the files that will be reindexed, to make sure we won't
+ // re-index same dependencies more than once. Keys are AbsolutePaths.
+ llvm::StringSet<> FilesToIndex;
+ // Keeps track of the loaded shards to make sure we don't perform redundant
+ // disk IO. Keys are absolute paths.
+ llvm::StringSet<> LoadedShards;
+ for (const auto &File : ChangedFiles) {
+ ProjectInfo PI;
+ auto Cmd = CDB.getCompileCommand(File, &PI);
+ if (!Cmd)
+ continue;
+ BackgroundIndexStorage *IndexStorage = IndexStorageFactory(PI.SourceRoot);
+ auto Dependencies = loadShard(*Cmd, IndexStorage, LoadedShards);
+ for (const auto &Dependency : Dependencies) {
+ if (!Dependency.NeedsReIndexing || FilesToIndex.count(Dependency.Path))
+ continue;
+ // FIXME: Currently, we simply schedule indexing on a TU whenever any of
+ // its dependencies needs re-indexing. We might do it smarter by figuring
+ // out a minimal set of TUs that will cover all the stale dependencies.
+ vlog("Enqueueing TU {0} because its dependency {1} needs re-indexing.",
+ Cmd->Filename, Dependency.Path);
+ NeedsReIndexing.push_back({std::move(*Cmd), IndexStorage});
+ // Mark all of this TU's dependencies as to-be-indexed so that we won't
+ // try to re-index those.
+ for (const auto &Dependency : Dependencies)
+ FilesToIndex.insert(Dependency.Path);
+ break;
+ }
+ }
+ vlog("Loaded all shards");
+ reset(IndexedSymbols.buildIndex(IndexType::Light, DuplicateHandling::Merge));
+
+ return NeedsReIndexing;
+}
+
} // namespace clangd
} // namespace clang
diff --git a/clangd/index/Background.h b/clangd/index/Background.h
index 3318f4fc..c9b290c6 100644
--- a/clangd/index/Background.h
+++ b/clangd/index/Background.h
@@ -81,7 +81,6 @@ public:
// The indexing happens in a background thread, so the symbols will be
// available sometime later.
void enqueue(const std::vector<std::string> &ChangedFiles);
- void enqueue(const std::string &File);
// Cause background threads to stop after ther current task, any remaining
// tasks will be discarded.
@@ -118,6 +117,21 @@ private:
std::mutex DigestsMu;
BackgroundIndexStorage::Factory IndexStorageFactory;
+ struct Source {
+ std::string Path;
+ bool NeedsReIndexing;
+ Source(llvm::StringRef Path, bool NeedsReIndexing)
+ : Path(Path), NeedsReIndexing(NeedsReIndexing) {}
+ };
+ // Loads the shards for a single TU and all of its dependencies. Returns the
+ // list of sources and whether they need to be re-indexed.
+ std::vector<Source> loadShard(const tooling::CompileCommand &Cmd,
+ BackgroundIndexStorage *IndexStorage,
+ llvm::StringSet<> &LoadedShards);
+ // Tries to load shards for the ChangedFiles.
+ std::vector<std::pair<tooling::CompileCommand, BackgroundIndexStorage *>>
+ loadShards(std::vector<std::string> ChangedFiles);
+ void enqueue(tooling::CompileCommand Cmd, BackgroundIndexStorage *Storage);
// queue management
using Task = std::function<void()>;
diff --git a/test/clangd/background-index.test b/test/clangd/background-index.test
index 7826ef27..34c419ac 100644
--- a/test/clangd/background-index.test
+++ b/test/clangd/background-index.test
@@ -16,6 +16,5 @@
# RUN: ls %t/.clangd-index/foo.cpp.*.idx
# Test the index is read from disk: delete code and restart clangd.
-# FIXME: This test currently fails as we don't read the index yet.
# RUN: rm %t/foo.cpp
-# RUN: clangd -background-index -lit-test < %t/definition.jsonrpc | not FileCheck %t/definition.jsonrpc
+# RUN: clangd -background-index -lit-test < %t/definition.jsonrpc | FileCheck %t/definition.jsonrpc
diff --git a/unittests/clangd/BackgroundIndexTests.cpp b/unittests/clangd/BackgroundIndexTests.cpp
index c30968b5..c66f4429 100644
--- a/unittests/clangd/BackgroundIndexTests.cpp
+++ b/unittests/clangd/BackgroundIndexTests.cpp
@@ -9,6 +9,7 @@
using testing::_;
using testing::AllOf;
+using testing::Contains;
using testing::ElementsAre;
using testing::Not;
using testing::UnorderedElementsAre;
@@ -146,7 +147,7 @@ TEST_F(BackgroundIndexTest, IndexTwoFiles) {
FileURI("unittest:///root/B.cc")}));
}
-TEST_F(BackgroundIndexTest, ShardStorageWriteTest) {
+TEST_F(BackgroundIndexTest, ShardStorageTest) {
MockFSProvider FS;
FS.Files[testPath("root/A.h")] = R"cpp(
void common();
@@ -175,6 +176,16 @@ TEST_F(BackgroundIndexTest, ShardStorageWriteTest) {
EXPECT_EQ(CacheHits, 0U);
EXPECT_EQ(Storage.size(), 2U);
+ {
+ OverlayCDB CDB(/*Base=*/nullptr);
+ BackgroundIndex Idx(Context::empty(), "", FS, CDB,
+ [&](llvm::StringRef) { return &MSS; });
+ CDB.setCompileCommand(testPath("root"), Cmd);
+ ASSERT_TRUE(Idx.blockUntilIdleForTest());
+ }
+ EXPECT_EQ(CacheHits, 2U); // Check both A.cc and A.h loaded from cache.
+ EXPECT_EQ(Storage.size(), 2U);
+
auto ShardHeader = MSS.loadShard(testPath("root/A.h"));
EXPECT_NE(ShardHeader, nullptr);
EXPECT_THAT(
@@ -278,5 +289,73 @@ TEST_F(BackgroundIndexTest, DISABLED_PeriodicalIndex) {
EXPECT_THAT(runFuzzyFind(Idx, ""), ElementsAre(Named("Y")));
}
+TEST_F(BackgroundIndexTest, ShardStorageLoad) {
+ MockFSProvider FS;
+ FS.Files[testPath("root/A.h")] = R"cpp(
+ void common();
+ void f_b();
+ class A_CC {};
+ )cpp";
+ FS.Files[testPath("root/A.cc")] =
+ "#include \"A.h\"\nvoid g() { (void)common; }";
+
+ llvm::StringMap<std::string> Storage;
+ size_t CacheHits = 0;
+ MemoryShardStorage MSS(Storage, CacheHits);
+
+ tooling::CompileCommand Cmd;
+ Cmd.Filename = testPath("root/A.cc");
+ Cmd.Directory = testPath("root");
+ Cmd.CommandLine = {"clang++", testPath("root/A.cc")};
+ // Check nothing is loaded from Storage, but A.cc and A.h has been stored.
+ {
+ OverlayCDB CDB(/*Base=*/nullptr);
+ BackgroundIndex Idx(Context::empty(), "", FS, CDB,
+ [&](llvm::StringRef) { return &MSS; });
+ CDB.setCompileCommand(testPath("root/A.cc"), Cmd);
+ ASSERT_TRUE(Idx.blockUntilIdleForTest());
+ }
+
+ // Change header.
+ FS.Files[testPath("root/A.h")] = R"cpp(
+ void common();
+ void f_b();
+ class A_CC {};
+ class A_CCnew {};
+ )cpp";
+ {
+ OverlayCDB CDB(/*Base=*/nullptr);
+ BackgroundIndex Idx(Context::empty(), "", FS, CDB,
+ [&](llvm::StringRef) { return &MSS; });
+ CDB.setCompileCommand(testPath("root"), Cmd);
+ ASSERT_TRUE(Idx.blockUntilIdleForTest());
+ }
+ EXPECT_EQ(CacheHits, 2U); // Check both A.cc and A.h loaded from cache.
+
+ // Check if the new symbol has arrived.
+ auto ShardHeader = MSS.loadShard(testPath("root/A.h"));
+ EXPECT_NE(ShardHeader, nullptr);
+ EXPECT_THAT(*ShardHeader->Symbols, Contains(Named("A_CCnew")));
+
+ // Change source.
+ FS.Files[testPath("root/A.cc")] =
+ "#include \"A.h\"\nvoid g() { (void)common; }\nvoid f_b() {}";
+ {
+ CacheHits = 0;
+ OverlayCDB CDB(/*Base=*/nullptr);
+ BackgroundIndex Idx(Context::empty(), "", FS, CDB,
+ [&](llvm::StringRef) { return &MSS; });
+ CDB.setCompileCommand(testPath("root"), Cmd);
+ ASSERT_TRUE(Idx.blockUntilIdleForTest());
+ }
+ EXPECT_EQ(CacheHits, 2U); // Check both A.cc and A.h loaded from cache.
+
+ // Check if the new symbol has arrived.
+ auto ShardSource = MSS.loadShard(testPath("root/A.cc"));
+ EXPECT_NE(ShardHeader, nullptr);
+ EXPECT_THAT(*ShardSource->Symbols,
+ Contains(AllOf(Named("f_b"), Declared(), Defined())));
+}
+
} // namespace clangd
} // namespace clang