diff options
author | Kaushik Lingarkar <kaushikl@codeaurora.org> | 2021-11-09 13:34:15 -0800 |
---|---|---|
committer | Kaushik Lingarkar <kaushikl@codeaurora.org> | 2021-11-16 08:50:57 -0800 |
commit | 35fcc7c5373286596fb4e117157777e2d14f5850 (patch) | |
tree | 9e95ffd252165261c8326ea5cdbd278915e87576 | |
parent | 02f5982146b5fff4508cfea45723642c2470e33f (diff) |
AllChangesIndexer: Avoid scanning for change refs in each slice
Change refs are being scanned when a repos size is estimated. Avoid
re-scanning them in each project slice. With this change, we are
also able to remove duplicate code (to obtain number of changes in
a repository) in AllChangesIndexer that was pretty much identical
to ChangeNotes.
On a test-site with ~160k changes across 4 repos on NFS, with loose
refs equal to number of changes(created by notedb migration) and
with caches (change_kind, diff, diff_summary) populated, this change
helps bring down the reindex time from ~45mins to ~15mins. Also, if
the repos are repacked, this change does not degrade the performance.
On larger test-sites (~3.5m changes and ~15k projects) with similar
setup and state as the 160k site, this change brings down reindex
time from ~22hrs to ~6.5 hrs.
Change-Id: I4bf1d1918ffedb6ea901bf7cc8537f60696e2d58
-rw-r--r-- | java/com/google/gerrit/server/index/change/AllChangesIndexer.java | 68 | ||||
-rw-r--r-- | java/com/google/gerrit/server/notedb/ChangeNotes.java | 51 |
2 files changed, 59 insertions, 60 deletions
diff --git a/java/com/google/gerrit/server/index/change/AllChangesIndexer.java b/java/com/google/gerrit/server/index/change/AllChangesIndexer.java index 309c9154ed..4e403cbfd2 100644 --- a/java/com/google/gerrit/server/index/change/AllChangesIndexer.java +++ b/java/com/google/gerrit/server/index/change/AllChangesIndexer.java @@ -22,13 +22,11 @@ import static com.google.gerrit.server.git.QueueProvider.QueueType.BATCH; import com.google.common.base.Stopwatch; import com.google.common.flogger.FluentLogger; -import com.google.common.primitives.Ints; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.UncheckedExecutionException; import com.google.gerrit.entities.Change; import com.google.gerrit.entities.Project; -import com.google.gerrit.entities.RefNames; import com.google.gerrit.index.SiteIndexer; import com.google.gerrit.server.git.GitRepositoryManager; import com.google.gerrit.server.git.MultiProgressMonitor; @@ -37,6 +35,7 @@ import com.google.gerrit.server.index.IndexExecutor; import com.google.gerrit.server.index.OnlineReindexMode; import com.google.gerrit.server.notedb.ChangeNotes; import com.google.gerrit.server.notedb.ChangeNotes.Factory.ChangeNotesResult; +import com.google.gerrit.server.notedb.ChangeNotes.Factory.ScanResult; import com.google.gerrit.server.project.ProjectCache; import com.google.gerrit.server.query.change.ChangeData; import com.google.inject.Inject; @@ -44,11 +43,9 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; -import java.util.Objects; import java.util.concurrent.Callable; import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.atomic.AtomicBoolean; -import org.eclipse.jgit.errors.RepositoryNotFoundException; import org.eclipse.jgit.lib.ProgressMonitor; import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.lib.TextProgressMonitor; @@ -89,11 +86,13 @@ public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, Change private final Project.NameKey name; private final int slice; private final int slices; + private final ScanResult sr; - ProjectSlice(Project.NameKey name, int slice, int slices) { + ProjectSlice(Project.NameKey name, int slice, int slices, ScanResult sr) { this.name = name; this.slice = slice; this.slices = slices; + this.sr = sr; } public Project.NameKey getName() { @@ -107,6 +106,10 @@ public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, Change public int getSlices() { return slices; } + + public ScanResult getScanResult() { + return sr; + } } @Override @@ -133,7 +136,8 @@ public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, Change // which had 2 big projects, many middle sized ones, and lots of smaller ones, the // splitting of repos into smaller parts reduced indexing time from 1.5 hours to 55 minutes // in 2020. - int size = estimateSize(repo); + ScanResult sr = ChangeNotes.Factory.scanChangeIds(repo); + int size = sr.all().size(); if (size == 0) { pm.update(1); continue; @@ -144,7 +148,7 @@ public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, Change verboseWriter.println("Submitting " + name + " for indexing in " + slices + " slices"); } for (int slice = 0; slice < slices; slice++) { - projectSlices.add(new ProjectSlice(name, slice, slices)); + projectSlices.add(new ProjectSlice(name, slice, slices, sr)); } } catch (IOException e) { logger.atSevere().withCause(e).log("Error collecting project %s", name); @@ -169,19 +173,6 @@ public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, Change return indexAll(index, projectSlices); } - private int estimateSize(Repository repo) throws IOException { - // Estimate size based on IDs that show up in ref names. This is not perfect, since patch set - // refs may exist for changes whose metadata was never successfully stored. But that's ok, as - // the estimate is just used as a heuristic for sorting projects. - long size = - repo.getRefDatabase().getRefsByPrefix(RefNames.REFS_CHANGES).stream() - .map(r -> Change.Id.fromRef(r.getName())) - .filter(Objects::nonNull) - .distinct() - .count(); - return Ints.saturatedCast(size); - } - private SiteIndexer.Result indexAll(ChangeIndex index, List<ProjectSlice> projectSlices) { Stopwatch sw = Stopwatch.createStarted(); MultiProgressMonitor mpm = new MultiProgressMonitor(progressOut, "Reindexing changes"); @@ -204,6 +195,7 @@ public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, Change name, slice, slices, + projectSlice.getScanResult(), doneTask, failedTask)); String description = "project " + name + " (" + slice + "/" + slices + ")"; @@ -242,7 +234,13 @@ public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, Change public Callable<Void> reindexProject( ChangeIndexer indexer, Project.NameKey project, Task done, Task failed) { - return reindexProject(indexer, project, 0, 1, done, failed); + try (Repository repo = repoManager.openRepository(project)) { + return reindexProject( + indexer, project, 0, 1, ChangeNotes.Factory.scanChangeIds(repo), done, failed); + } catch (IOException e) { + logger.atSevere().log(e.getMessage()); + return null; + } } public Callable<Void> reindexProject( @@ -250,9 +248,10 @@ public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, Change Project.NameKey project, int slice, int slices, + ScanResult sr, Task done, Task failed) { - return new ProjectIndexer(indexer, project, slice, slices, done, failed); + return new ProjectIndexer(indexer, project, slice, slices, sr, done, failed); } private class ProjectIndexer implements Callable<Void> { @@ -260,6 +259,7 @@ public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, Change private final Project.NameKey project; private final int slice; private final int slices; + private final ScanResult sr; private final ProgressMonitor done; private final ProgressMonitor failed; @@ -268,32 +268,28 @@ public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, Change Project.NameKey project, int slice, int slices, + ScanResult sr, ProgressMonitor done, ProgressMonitor failed) { this.indexer = indexer; this.project = project; this.slice = slice; this.slices = slices; + this.sr = sr; this.done = done; this.failed = failed; } @Override public Void call() throws Exception { - try (Repository repo = repoManager.openRepository(project)) { - OnlineReindexMode.begin(); - - // Order of scanning changes is undefined. This is ok if we assume that packfile locality is - // not important for indexing, since sites should have a fully populated DiffSummary cache. - // It does mean that reindexing after invalidating the DiffSummary cache will be expensive, - // but the goal is to invalidate that cache as infrequently as we possibly can. And besides, - // we don't have concrete proof that improving packfile locality would help. - notesFactory.scan(repo, project, id -> (id.get() % slices) == slice).forEach(r -> index(r)); - } catch (RepositoryNotFoundException rnfe) { - logger.atSevere().log(rnfe.getMessage()); - } finally { - OnlineReindexMode.end(); - } + OnlineReindexMode.begin(); + // Order of scanning changes is undefined. This is ok if we assume that packfile locality is + // not important for indexing, since sites should have a fully populated DiffSummary cache. + // It does mean that reindexing after invalidating the DiffSummary cache will be expensive, + // but the goal is to invalidate that cache as infrequently as we possibly can. And besides, + // we don't have concrete proof that improving packfile locality would help. + notesFactory.scan(sr, project, id -> (id.get() % slices) == slice).forEach(r -> index(r)); + OnlineReindexMode.end(); return null; } diff --git a/java/com/google/gerrit/server/notedb/ChangeNotes.java b/java/com/google/gerrit/server/notedb/ChangeNotes.java index 36a61cc020..51acf16e52 100644 --- a/java/com/google/gerrit/server/notedb/ChangeNotes.java +++ b/java/com/google/gerrit/server/notedb/ChangeNotes.java @@ -106,6 +106,29 @@ public class ChangeNotes extends AbstractChangeNotes<ChangeNotes> { this.projectCache = projectCache; } + @AutoValue + public abstract static class ScanResult { + abstract ImmutableSet<Change.Id> fromPatchSetRefs(); + + abstract ImmutableSet<Change.Id> fromMetaRefs(); + + public SetView<Change.Id> all() { + return Sets.union(fromPatchSetRefs(), fromMetaRefs()); + } + } + + public static ScanResult scanChangeIds(Repository repo) throws IOException { + ImmutableSet.Builder<Change.Id> fromPs = ImmutableSet.builder(); + ImmutableSet.Builder<Change.Id> fromMeta = ImmutableSet.builder(); + for (Ref r : repo.getRefDatabase().getRefsByPrefix(RefNames.REFS_CHANGES)) { + Change.Id id = Change.Id.fromRef(r.getName()); + if (id != null) { + (r.getName().endsWith(RefNames.META_SUFFIX) ? fromMeta : fromPs).add(id); + } + } + return new AutoValue_ChangeNotes_Factory_ScanResult(fromPs.build(), fromMeta.build()); + } + public ChangeNotes createChecked(Change c) { return createChecked(c.getProject(), c.getId()); } @@ -213,8 +236,11 @@ public class ChangeNotes extends AbstractChangeNotes<ChangeNotes> { public Stream<ChangeNotesResult> scan( Repository repo, Project.NameKey project, Predicate<Change.Id> changeIdPredicate) throws IOException { - ScanResult sr = scanChangeIds(repo); + return scan(scanChangeIds(repo), project, changeIdPredicate); + } + public Stream<ChangeNotesResult> scan( + ScanResult sr, Project.NameKey project, Predicate<Change.Id> changeIdPredicate) { Stream<Change.Id> idStream = sr.all().stream(); if (changeIdPredicate != null) { idStream = idStream.filter(changeIdPredicate); @@ -286,29 +312,6 @@ public class ChangeNotes extends AbstractChangeNotes<ChangeNotes> { @Nullable abstract ChangeNotes maybeNotes(); } - - @AutoValue - abstract static class ScanResult { - abstract ImmutableSet<Change.Id> fromPatchSetRefs(); - - abstract ImmutableSet<Change.Id> fromMetaRefs(); - - SetView<Change.Id> all() { - return Sets.union(fromPatchSetRefs(), fromMetaRefs()); - } - } - - private static ScanResult scanChangeIds(Repository repo) throws IOException { - ImmutableSet.Builder<Change.Id> fromPs = ImmutableSet.builder(); - ImmutableSet.Builder<Change.Id> fromMeta = ImmutableSet.builder(); - for (Ref r : repo.getRefDatabase().getRefsByPrefix(RefNames.REFS_CHANGES)) { - Change.Id id = Change.Id.fromRef(r.getName()); - if (id != null) { - (r.getName().endsWith(RefNames.META_SUFFIX) ? fromMeta : fromPs).add(id); - } - } - return new AutoValue_ChangeNotes_Factory_ScanResult(fromPs.build(), fromMeta.build()); - } } private final boolean shouldExist; |