summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaushik Lingarkar <kaushikl@codeaurora.org>2021-11-09 13:34:15 -0800
committerKaushik Lingarkar <kaushikl@codeaurora.org>2021-11-16 08:50:57 -0800
commit35fcc7c5373286596fb4e117157777e2d14f5850 (patch)
tree9e95ffd252165261c8326ea5cdbd278915e87576
parent02f5982146b5fff4508cfea45723642c2470e33f (diff)
AllChangesIndexer: Avoid scanning for change refs in each slice
Change refs are being scanned when a repos size is estimated. Avoid re-scanning them in each project slice. With this change, we are also able to remove duplicate code (to obtain number of changes in a repository) in AllChangesIndexer that was pretty much identical to ChangeNotes. On a test-site with ~160k changes across 4 repos on NFS, with loose refs equal to number of changes(created by notedb migration) and with caches (change_kind, diff, diff_summary) populated, this change helps bring down the reindex time from ~45mins to ~15mins. Also, if the repos are repacked, this change does not degrade the performance. On larger test-sites (~3.5m changes and ~15k projects) with similar setup and state as the 160k site, this change brings down reindex time from ~22hrs to ~6.5 hrs. Change-Id: I4bf1d1918ffedb6ea901bf7cc8537f60696e2d58
-rw-r--r--java/com/google/gerrit/server/index/change/AllChangesIndexer.java68
-rw-r--r--java/com/google/gerrit/server/notedb/ChangeNotes.java51
2 files changed, 59 insertions, 60 deletions
diff --git a/java/com/google/gerrit/server/index/change/AllChangesIndexer.java b/java/com/google/gerrit/server/index/change/AllChangesIndexer.java
index 309c9154ed..4e403cbfd2 100644
--- a/java/com/google/gerrit/server/index/change/AllChangesIndexer.java
+++ b/java/com/google/gerrit/server/index/change/AllChangesIndexer.java
@@ -22,13 +22,11 @@ import static com.google.gerrit.server.git.QueueProvider.QueueType.BATCH;
import com.google.common.base.Stopwatch;
import com.google.common.flogger.FluentLogger;
-import com.google.common.primitives.Ints;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.UncheckedExecutionException;
import com.google.gerrit.entities.Change;
import com.google.gerrit.entities.Project;
-import com.google.gerrit.entities.RefNames;
import com.google.gerrit.index.SiteIndexer;
import com.google.gerrit.server.git.GitRepositoryManager;
import com.google.gerrit.server.git.MultiProgressMonitor;
@@ -37,6 +35,7 @@ import com.google.gerrit.server.index.IndexExecutor;
import com.google.gerrit.server.index.OnlineReindexMode;
import com.google.gerrit.server.notedb.ChangeNotes;
import com.google.gerrit.server.notedb.ChangeNotes.Factory.ChangeNotesResult;
+import com.google.gerrit.server.notedb.ChangeNotes.Factory.ScanResult;
import com.google.gerrit.server.project.ProjectCache;
import com.google.gerrit.server.query.change.ChangeData;
import com.google.inject.Inject;
@@ -44,11 +43,9 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
-import java.util.Objects;
import java.util.concurrent.Callable;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.atomic.AtomicBoolean;
-import org.eclipse.jgit.errors.RepositoryNotFoundException;
import org.eclipse.jgit.lib.ProgressMonitor;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.lib.TextProgressMonitor;
@@ -89,11 +86,13 @@ public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, Change
private final Project.NameKey name;
private final int slice;
private final int slices;
+ private final ScanResult sr;
- ProjectSlice(Project.NameKey name, int slice, int slices) {
+ ProjectSlice(Project.NameKey name, int slice, int slices, ScanResult sr) {
this.name = name;
this.slice = slice;
this.slices = slices;
+ this.sr = sr;
}
public Project.NameKey getName() {
@@ -107,6 +106,10 @@ public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, Change
public int getSlices() {
return slices;
}
+
+ public ScanResult getScanResult() {
+ return sr;
+ }
}
@Override
@@ -133,7 +136,8 @@ public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, Change
// which had 2 big projects, many middle sized ones, and lots of smaller ones, the
// splitting of repos into smaller parts reduced indexing time from 1.5 hours to 55 minutes
// in 2020.
- int size = estimateSize(repo);
+ ScanResult sr = ChangeNotes.Factory.scanChangeIds(repo);
+ int size = sr.all().size();
if (size == 0) {
pm.update(1);
continue;
@@ -144,7 +148,7 @@ public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, Change
verboseWriter.println("Submitting " + name + " for indexing in " + slices + " slices");
}
for (int slice = 0; slice < slices; slice++) {
- projectSlices.add(new ProjectSlice(name, slice, slices));
+ projectSlices.add(new ProjectSlice(name, slice, slices, sr));
}
} catch (IOException e) {
logger.atSevere().withCause(e).log("Error collecting project %s", name);
@@ -169,19 +173,6 @@ public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, Change
return indexAll(index, projectSlices);
}
- private int estimateSize(Repository repo) throws IOException {
- // Estimate size based on IDs that show up in ref names. This is not perfect, since patch set
- // refs may exist for changes whose metadata was never successfully stored. But that's ok, as
- // the estimate is just used as a heuristic for sorting projects.
- long size =
- repo.getRefDatabase().getRefsByPrefix(RefNames.REFS_CHANGES).stream()
- .map(r -> Change.Id.fromRef(r.getName()))
- .filter(Objects::nonNull)
- .distinct()
- .count();
- return Ints.saturatedCast(size);
- }
-
private SiteIndexer.Result indexAll(ChangeIndex index, List<ProjectSlice> projectSlices) {
Stopwatch sw = Stopwatch.createStarted();
MultiProgressMonitor mpm = new MultiProgressMonitor(progressOut, "Reindexing changes");
@@ -204,6 +195,7 @@ public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, Change
name,
slice,
slices,
+ projectSlice.getScanResult(),
doneTask,
failedTask));
String description = "project " + name + " (" + slice + "/" + slices + ")";
@@ -242,7 +234,13 @@ public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, Change
public Callable<Void> reindexProject(
ChangeIndexer indexer, Project.NameKey project, Task done, Task failed) {
- return reindexProject(indexer, project, 0, 1, done, failed);
+ try (Repository repo = repoManager.openRepository(project)) {
+ return reindexProject(
+ indexer, project, 0, 1, ChangeNotes.Factory.scanChangeIds(repo), done, failed);
+ } catch (IOException e) {
+ logger.atSevere().log(e.getMessage());
+ return null;
+ }
}
public Callable<Void> reindexProject(
@@ -250,9 +248,10 @@ public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, Change
Project.NameKey project,
int slice,
int slices,
+ ScanResult sr,
Task done,
Task failed) {
- return new ProjectIndexer(indexer, project, slice, slices, done, failed);
+ return new ProjectIndexer(indexer, project, slice, slices, sr, done, failed);
}
private class ProjectIndexer implements Callable<Void> {
@@ -260,6 +259,7 @@ public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, Change
private final Project.NameKey project;
private final int slice;
private final int slices;
+ private final ScanResult sr;
private final ProgressMonitor done;
private final ProgressMonitor failed;
@@ -268,32 +268,28 @@ public class AllChangesIndexer extends SiteIndexer<Change.Id, ChangeData, Change
Project.NameKey project,
int slice,
int slices,
+ ScanResult sr,
ProgressMonitor done,
ProgressMonitor failed) {
this.indexer = indexer;
this.project = project;
this.slice = slice;
this.slices = slices;
+ this.sr = sr;
this.done = done;
this.failed = failed;
}
@Override
public Void call() throws Exception {
- try (Repository repo = repoManager.openRepository(project)) {
- OnlineReindexMode.begin();
-
- // Order of scanning changes is undefined. This is ok if we assume that packfile locality is
- // not important for indexing, since sites should have a fully populated DiffSummary cache.
- // It does mean that reindexing after invalidating the DiffSummary cache will be expensive,
- // but the goal is to invalidate that cache as infrequently as we possibly can. And besides,
- // we don't have concrete proof that improving packfile locality would help.
- notesFactory.scan(repo, project, id -> (id.get() % slices) == slice).forEach(r -> index(r));
- } catch (RepositoryNotFoundException rnfe) {
- logger.atSevere().log(rnfe.getMessage());
- } finally {
- OnlineReindexMode.end();
- }
+ OnlineReindexMode.begin();
+ // Order of scanning changes is undefined. This is ok if we assume that packfile locality is
+ // not important for indexing, since sites should have a fully populated DiffSummary cache.
+ // It does mean that reindexing after invalidating the DiffSummary cache will be expensive,
+ // but the goal is to invalidate that cache as infrequently as we possibly can. And besides,
+ // we don't have concrete proof that improving packfile locality would help.
+ notesFactory.scan(sr, project, id -> (id.get() % slices) == slice).forEach(r -> index(r));
+ OnlineReindexMode.end();
return null;
}
diff --git a/java/com/google/gerrit/server/notedb/ChangeNotes.java b/java/com/google/gerrit/server/notedb/ChangeNotes.java
index 36a61cc020..51acf16e52 100644
--- a/java/com/google/gerrit/server/notedb/ChangeNotes.java
+++ b/java/com/google/gerrit/server/notedb/ChangeNotes.java
@@ -106,6 +106,29 @@ public class ChangeNotes extends AbstractChangeNotes<ChangeNotes> {
this.projectCache = projectCache;
}
+ @AutoValue
+ public abstract static class ScanResult {
+ abstract ImmutableSet<Change.Id> fromPatchSetRefs();
+
+ abstract ImmutableSet<Change.Id> fromMetaRefs();
+
+ public SetView<Change.Id> all() {
+ return Sets.union(fromPatchSetRefs(), fromMetaRefs());
+ }
+ }
+
+ public static ScanResult scanChangeIds(Repository repo) throws IOException {
+ ImmutableSet.Builder<Change.Id> fromPs = ImmutableSet.builder();
+ ImmutableSet.Builder<Change.Id> fromMeta = ImmutableSet.builder();
+ for (Ref r : repo.getRefDatabase().getRefsByPrefix(RefNames.REFS_CHANGES)) {
+ Change.Id id = Change.Id.fromRef(r.getName());
+ if (id != null) {
+ (r.getName().endsWith(RefNames.META_SUFFIX) ? fromMeta : fromPs).add(id);
+ }
+ }
+ return new AutoValue_ChangeNotes_Factory_ScanResult(fromPs.build(), fromMeta.build());
+ }
+
public ChangeNotes createChecked(Change c) {
return createChecked(c.getProject(), c.getId());
}
@@ -213,8 +236,11 @@ public class ChangeNotes extends AbstractChangeNotes<ChangeNotes> {
public Stream<ChangeNotesResult> scan(
Repository repo, Project.NameKey project, Predicate<Change.Id> changeIdPredicate)
throws IOException {
- ScanResult sr = scanChangeIds(repo);
+ return scan(scanChangeIds(repo), project, changeIdPredicate);
+ }
+ public Stream<ChangeNotesResult> scan(
+ ScanResult sr, Project.NameKey project, Predicate<Change.Id> changeIdPredicate) {
Stream<Change.Id> idStream = sr.all().stream();
if (changeIdPredicate != null) {
idStream = idStream.filter(changeIdPredicate);
@@ -286,29 +312,6 @@ public class ChangeNotes extends AbstractChangeNotes<ChangeNotes> {
@Nullable
abstract ChangeNotes maybeNotes();
}
-
- @AutoValue
- abstract static class ScanResult {
- abstract ImmutableSet<Change.Id> fromPatchSetRefs();
-
- abstract ImmutableSet<Change.Id> fromMetaRefs();
-
- SetView<Change.Id> all() {
- return Sets.union(fromPatchSetRefs(), fromMetaRefs());
- }
- }
-
- private static ScanResult scanChangeIds(Repository repo) throws IOException {
- ImmutableSet.Builder<Change.Id> fromPs = ImmutableSet.builder();
- ImmutableSet.Builder<Change.Id> fromMeta = ImmutableSet.builder();
- for (Ref r : repo.getRefDatabase().getRefsByPrefix(RefNames.REFS_CHANGES)) {
- Change.Id id = Change.Id.fromRef(r.getName());
- if (id != null) {
- (r.getName().endsWith(RefNames.META_SUFFIX) ? fromMeta : fromPs).add(id);
- }
- }
- return new AutoValue_ChangeNotes_Factory_ScanResult(fromPs.build(), fromMeta.build());
- }
}
private final boolean shouldExist;