summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSaša Živkov <sasa.zivkov@sap.com>2020-08-06 17:32:55 +0200
committerMatthias Sohn <matthias.sohn@sap.com>2020-10-07 00:10:19 +0200
commit8d41f4191e16f5c6401e7984ed874914f32ccbd7 (patch)
treeb8b1ab0bf37ac031409a7fe3efce1967ccb9fb19
parent02e2cf13bd179cca0f60592b90c083d51e0fd1ff (diff)
setNoteDbPrimary: update in chunks, one DB connection per chunk
Before this change we used one connection to migrate one change. We observed an excessive number of DB connections using netstat during the execution of the setNoteDbPrimary method. The number of connections reached 27K and then opening a new connection started to fail and the migration started to fail. I assume that this is caused by the exhaustion of the local port range: we open/close connections in quick succession and the operating system doesn't have enough time to release local ports. By updating a chunk of changes from a single thread, we make sure to use only one DB connection for one chunk. This should reduce the rate at which DB connections are open/closed and the overall number of connections open during the migration. Change-Id: Ie4a1b4d41b92824c87a0ae39b13a13d9ccb4ca3c
-rw-r--r--java/com/google/gerrit/server/notedb/PrimaryStorageMigrator.java73
-rw-r--r--java/com/google/gerrit/server/notedb/rebuild/NoteDbMigrator.java63
-rw-r--r--javatests/com/google/gerrit/acceptance/server/notedb/NoteDbPrimaryIT.java5
3 files changed, 77 insertions, 64 deletions
diff --git a/java/com/google/gerrit/server/notedb/PrimaryStorageMigrator.java b/java/com/google/gerrit/server/notedb/PrimaryStorageMigrator.java
index 7b427b478f..0fe3d68b68 100644
--- a/java/com/google/gerrit/server/notedb/PrimaryStorageMigrator.java
+++ b/java/com/google/gerrit/server/notedb/PrimaryStorageMigrator.java
@@ -15,6 +15,7 @@
package com.google.gerrit.server.notedb;
import static com.google.common.base.Preconditions.checkState;
+import static com.google.gerrit.reviewdb.server.ReviewDbUtil.unwrapDb;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static java.util.concurrent.TimeUnit.NANOSECONDS;
import static java.util.concurrent.TimeUnit.SECONDS;
@@ -27,6 +28,7 @@ import com.github.rholder.retry.WaitStrategies;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Stopwatch;
import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Iterables;
import com.google.common.flogger.FluentLogger;
import com.google.gerrit.common.Nullable;
import com.google.gerrit.extensions.restapi.RestApiException;
@@ -45,6 +47,7 @@ import com.google.gerrit.server.index.change.ChangeField;
import com.google.gerrit.server.notedb.NoteDbChangeState.PrimaryStorage;
import com.google.gerrit.server.notedb.NoteDbChangeState.RefState;
import com.google.gerrit.server.notedb.rebuild.ChangeRebuilder;
+import com.google.gerrit.server.project.ProjectCache;
import com.google.gerrit.server.query.change.ChangeData;
import com.google.gerrit.server.query.change.InternalChangeQuery;
import com.google.gerrit.server.update.BatchUpdate;
@@ -61,6 +64,7 @@ import com.google.inject.Provider;
import com.google.inject.Singleton;
import java.io.IOException;
import java.sql.Timestamp;
+import java.util.Collection;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
@@ -104,6 +108,7 @@ public class PrimaryStorageMigrator {
private final long skewMs;
private final long timeoutMs;
private final Retryer<NoteDbChangeState> testEnsureRebuiltRetryer;
+ private ProjectCache projectCache;
@Inject
PrimaryStorageMigrator(
@@ -116,7 +121,8 @@ public class PrimaryStorageMigrator {
Provider<InternalChangeQuery> queryProvider,
ChangeUpdate.Factory updateFactory,
InternalUser.Factory internalUserFactory,
- RetryHelper retryHelper) {
+ RetryHelper retryHelper,
+ ProjectCache projectCache) {
this(
cfg,
db,
@@ -128,7 +134,8 @@ public class PrimaryStorageMigrator {
queryProvider,
updateFactory,
internalUserFactory,
- retryHelper);
+ retryHelper,
+ projectCache);
}
@VisibleForTesting
@@ -143,7 +150,8 @@ public class PrimaryStorageMigrator {
Provider<InternalChangeQuery> queryProvider,
ChangeUpdate.Factory updateFactory,
InternalUser.Factory internalUserFactory,
- RetryHelper retryHelper) {
+ RetryHelper retryHelper,
+ ProjectCache projectCache) {
this.db = db;
this.repoManager = repoManager;
this.allUsers = allUsers;
@@ -154,6 +162,7 @@ public class PrimaryStorageMigrator {
this.updateFactory = updateFactory;
this.internalUserFactory = internalUserFactory;
this.retryHelper = retryHelper;
+ this.projectCache = projectCache;
skewMs = NoteDbChangeState.getReadOnlySkew(cfg);
String s = "notedb";
@@ -166,6 +175,64 @@ public class PrimaryStorageMigrator {
MILLISECONDS);
}
+ public boolean migrateToNoteDbPrimary(Collection<Change.Id> changes) {
+ boolean result = true;
+ for (Change.Id id : changes) {
+ try {
+ try {
+ migrateToNoteDbPrimary(id);
+ } catch (NoNoteDbStateException e) {
+ if (canSkipPrimaryStorageMigration(db(), id)) {
+ logger.atWarning().withCause(e).log(
+ "Change %s previously failed to rebuild;" + " skipping primary storage migration",
+ id);
+ } else {
+ throw e;
+ }
+ }
+ } catch (Exception e) {
+ logger.atSevere().withCause(e).log("Error migrating primary storage for %s", id);
+ result = false;
+ }
+ }
+ return result;
+ }
+
+ /**
+ * Checks whether a change is so corrupt that it can be completely skipped by the primary storage
+ * migration step.
+ *
+ * <p>To get to the point where this method is called from {@link #setNoteDbPrimary}, it means we
+ * attempted to rebuild it, and encountered an error that was then caught in {@link
+ * #rebuildProjectSlice} and skipped. As a result, there is no {@code noteDbState} field in the
+ * change by the time we get to {@link #setNoteDbPrimary}, so {@code migrateToNoteDbPrimary}
+ * throws an exception.
+ *
+ * <p>We have to do this hacky double-checking because we don't have a way for the rebuilding
+ * phase to communicate to the primary storage migration phase that the change is skippable. It
+ * would be possible to store this info in some field in this class, but there is no guarantee
+ * that the rebuild and primary storage migration phases are run in the same JVM invocation.
+ *
+ * <p>In an ideal world, we could do this through the {@link
+ * com.google.gerrit.server.notedb.NoteDbChangeState.PrimaryStorage} enum, having a separate value
+ * for errors. However, that would be an invasive change touching many non-migration-related parts
+ * of the NoteDb migration code, which is too risky to attempt in the stable branch where this bug
+ * had to be fixed.
+ *
+ * <p>As of this writing, there are only two cases where this happens: when a change has no patch
+ * sets, or the project doesn't exist.
+ */
+ private boolean canSkipPrimaryStorageMigration(ReviewDb db, Change.Id id) {
+ try {
+ return Iterables.isEmpty(unwrapDb(db).patchSets().byChange(id))
+ || projectCache.get(unwrapDb(db).changes().get(id).getProject()) == null;
+ } catch (Exception e) {
+ logger.atSevere().withCause(e).log(
+ "Error checking if change %s can be skipped, assuming no", id);
+ return false;
+ }
+ }
+
/**
* Migrate a change's primary storage from ReviewDb to NoteDb.
*
diff --git a/java/com/google/gerrit/server/notedb/rebuild/NoteDbMigrator.java b/java/com/google/gerrit/server/notedb/rebuild/NoteDbMigrator.java
index 88eed740df..30b2e12b18 100644
--- a/java/com/google/gerrit/server/notedb/rebuild/NoteDbMigrator.java
+++ b/java/com/google/gerrit/server/notedb/rebuild/NoteDbMigrator.java
@@ -32,7 +32,6 @@ import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Stopwatch;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableListMultimap;
-import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.MultimapBuilder;
import com.google.common.collect.Ordering;
@@ -69,7 +68,6 @@ import com.google.gerrit.server.notedb.NoteDbTable;
import com.google.gerrit.server.notedb.NoteDbUpdateManager;
import com.google.gerrit.server.notedb.NotesMigrationState;
import com.google.gerrit.server.notedb.PrimaryStorageMigrator;
-import com.google.gerrit.server.notedb.PrimaryStorageMigrator.NoNoteDbStateException;
import com.google.gerrit.server.notedb.RepoSequence;
import com.google.gerrit.server.notedb.rebuild.ChangeRebuilder.NoPatchSetsException;
import com.google.gerrit.server.project.NoSuchChangeException;
@@ -489,7 +487,6 @@ public class NoteDbMigrator implements AutoCloseable {
private final ImmutableList<Change.Id> changes;
private final OutputStream progressOut;
private final NotesMigrationState stopAtState;
- private final ProjectCache projectCache;
private final boolean trial;
private final boolean forceRebuild;
private final int sequenceGap;
@@ -556,7 +553,6 @@ public class NoteDbMigrator implements AutoCloseable {
this.changes = changes;
this.progressOut = progressOut;
this.stopAtState = stopAtState;
- this.projectCache = projectCache;
this.trial = trial;
this.forceRebuild = forceRebuild;
this.sequenceGap = sequenceGap;
@@ -710,34 +706,16 @@ public class NoteDbMigrator implements AutoCloseable {
try (ContextHelper contextHelper = new ContextHelper()) {
List<ListenableFuture<Boolean>> futures =
- allChanges.stream()
+ Lists.partition(allChanges, PROJECT_SLICE_MAX_REFS).stream()
.map(
- id ->
+ partition ->
executor.submit(
() -> {
try (ManualRequestContext ctx = contextHelper.open()) {
- try {
- primaryStorageMigrator.migrateToNoteDbPrimary(id);
- } catch (NoNoteDbStateException e) {
- if (canSkipPrimaryStorageMigration(
- ctx.getReviewDbProvider().get(), id)) {
- logger.atWarning().withCause(e).log(
- "Change %s previously failed to rebuild;"
- + " skipping primary storage migration",
- id);
- } else {
- throw e;
- }
- }
- return true;
- } catch (Exception e) {
- logger.atSevere().withCause(e).log(
- "Error migrating primary storage for %s", id);
- return false;
+ return primaryStorageMigrator.migrateToNoteDbPrimary(partition);
}
}))
.collect(toList());
-
boolean ok = futuresToBoolean(futures, "Error migrating primary storage");
double t = sw.elapsed(TimeUnit.MILLISECONDS) / 1000d;
logger.atInfo().log(
@@ -751,41 +729,6 @@ public class NoteDbMigrator implements AutoCloseable {
return disableReviewDb(prev);
}
- /**
- * Checks whether a change is so corrupt that it can be completely skipped by the primary storage
- * migration step.
- *
- * <p>To get to the point where this method is called from {@link #setNoteDbPrimary}, it means we
- * attempted to rebuild it, and encountered an error that was then caught in {@link
- * #rebuildProjectSlice} and skipped. As a result, there is no {@code noteDbState} field in the
- * change by the time we get to {@link #setNoteDbPrimary}, so {@code migrateToNoteDbPrimary}
- * throws an exception.
- *
- * <p>We have to do this hacky double-checking because we don't have a way for the rebuilding
- * phase to communicate to the primary storage migration phase that the change is skippable. It
- * would be possible to store this info in some field in this class, but there is no guarantee
- * that the rebuild and primary storage migration phases are run in the same JVM invocation.
- *
- * <p>In an ideal world, we could do this through the {@link
- * com.google.gerrit.server.notedb.NoteDbChangeState.PrimaryStorage} enum, having a separate value
- * for errors. However, that would be an invasive change touching many non-migration-related parts
- * of the NoteDb migration code, which is too risky to attempt in the stable branch where this bug
- * had to be fixed.
- *
- * <p>As of this writing, there are only two cases where this happens: when a change has no patch
- * sets, or the project doesn't exist.
- */
- private boolean canSkipPrimaryStorageMigration(ReviewDb db, Change.Id id) {
- try {
- return Iterables.isEmpty(unwrapDb(db).patchSets().byChange(id))
- || projectCache.get(unwrapDb(db).changes().get(id).getProject()) == null;
- } catch (Exception e) {
- logger.atSevere().withCause(e).log(
- "Error checking if change %s can be skipped, assuming no", id);
- return false;
- }
- }
-
private NotesMigrationState disableReviewDb(NotesMigrationState prev) throws IOException {
return saveState(prev, NOTE_DB, c -> setAutoMigrate(c, false));
}
diff --git a/javatests/com/google/gerrit/acceptance/server/notedb/NoteDbPrimaryIT.java b/javatests/com/google/gerrit/acceptance/server/notedb/NoteDbPrimaryIT.java
index 486a0de55a..50125488eb 100644
--- a/javatests/com/google/gerrit/acceptance/server/notedb/NoteDbPrimaryIT.java
+++ b/javatests/com/google/gerrit/acceptance/server/notedb/NoteDbPrimaryIT.java
@@ -60,6 +60,7 @@ import com.google.gerrit.server.notedb.NoteDbChangeState;
import com.google.gerrit.server.notedb.NoteDbChangeState.PrimaryStorage;
import com.google.gerrit.server.notedb.PrimaryStorageMigrator;
import com.google.gerrit.server.notedb.TestChangeRebuilderWrapper;
+import com.google.gerrit.server.project.ProjectCache;
import com.google.gerrit.server.update.RetryHelper;
import com.google.gerrit.server.util.time.TimeUtil;
import com.google.gerrit.testing.ConfigSuite;
@@ -101,6 +102,7 @@ public class NoteDbPrimaryIT extends AbstractDaemonTest {
@Inject private ChangeUpdate.Factory updateFactory;
@Inject private InternalUser.Factory internalUserFactory;
@Inject private RetryHelper retryHelper;
+ @Inject private ProjectCache projectCache;
private PrimaryStorageMigrator migrator;
@@ -125,7 +127,8 @@ public class NoteDbPrimaryIT extends AbstractDaemonTest {
queryProvider,
updateFactory,
internalUserFactory,
- retryHelper);
+ retryHelper,
+ projectCache);
}
@After