summaryrefslogtreecommitdiffstats
path: root/java/com/google/gerrit/server/notedb/PrimaryStorageMigrator.java
diff options
context:
space:
mode:
Diffstat (limited to 'java/com/google/gerrit/server/notedb/PrimaryStorageMigrator.java')
-rw-r--r--java/com/google/gerrit/server/notedb/PrimaryStorageMigrator.java510
1 files changed, 510 insertions, 0 deletions
diff --git a/java/com/google/gerrit/server/notedb/PrimaryStorageMigrator.java b/java/com/google/gerrit/server/notedb/PrimaryStorageMigrator.java
new file mode 100644
index 0000000000..7b427b478f
--- /dev/null
+++ b/java/com/google/gerrit/server/notedb/PrimaryStorageMigrator.java
@@ -0,0 +1,510 @@
+// Copyright (C) 2017 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.gerrit.server.notedb;
+
+import static com.google.common.base.Preconditions.checkState;
+import static java.util.concurrent.TimeUnit.MILLISECONDS;
+import static java.util.concurrent.TimeUnit.NANOSECONDS;
+import static java.util.concurrent.TimeUnit.SECONDS;
+
+import com.github.rholder.retry.RetryException;
+import com.github.rholder.retry.Retryer;
+import com.github.rholder.retry.RetryerBuilder;
+import com.github.rholder.retry.StopStrategies;
+import com.github.rholder.retry.WaitStrategies;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Stopwatch;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.flogger.FluentLogger;
+import com.google.gerrit.common.Nullable;
+import com.google.gerrit.extensions.restapi.RestApiException;
+import com.google.gerrit.reviewdb.client.Account;
+import com.google.gerrit.reviewdb.client.Change;
+import com.google.gerrit.reviewdb.client.Project;
+import com.google.gerrit.reviewdb.client.RefNames;
+import com.google.gerrit.reviewdb.server.ReviewDb;
+import com.google.gerrit.reviewdb.server.ReviewDbUtil;
+import com.google.gerrit.server.InternalUser;
+import com.google.gerrit.server.config.AllUsersName;
+import com.google.gerrit.server.config.GerritServerConfig;
+import com.google.gerrit.server.git.GitRepositoryManager;
+import com.google.gerrit.server.git.RepoRefCache;
+import com.google.gerrit.server.index.change.ChangeField;
+import com.google.gerrit.server.notedb.NoteDbChangeState.PrimaryStorage;
+import com.google.gerrit.server.notedb.NoteDbChangeState.RefState;
+import com.google.gerrit.server.notedb.rebuild.ChangeRebuilder;
+import com.google.gerrit.server.query.change.ChangeData;
+import com.google.gerrit.server.query.change.InternalChangeQuery;
+import com.google.gerrit.server.update.BatchUpdate;
+import com.google.gerrit.server.update.BatchUpdateOp;
+import com.google.gerrit.server.update.ChangeContext;
+import com.google.gerrit.server.update.RetryHelper;
+import com.google.gerrit.server.update.UpdateException;
+import com.google.gerrit.server.util.time.TimeUtil;
+import com.google.gwtorm.server.AtomicUpdate;
+import com.google.gwtorm.server.OrmException;
+import com.google.gwtorm.server.OrmRuntimeException;
+import com.google.inject.Inject;
+import com.google.inject.Provider;
+import com.google.inject.Singleton;
+import java.io.IOException;
+import java.sql.Timestamp;
+import java.util.List;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.atomic.AtomicBoolean;
+import org.eclipse.jgit.errors.RepositoryNotFoundException;
+import org.eclipse.jgit.lib.Config;
+import org.eclipse.jgit.lib.ObjectId;
+import org.eclipse.jgit.lib.Ref;
+import org.eclipse.jgit.lib.Repository;
+
+/** Helper to migrate the {@link PrimaryStorage} of individual changes. */
+@Singleton
+public class PrimaryStorageMigrator {
+ private static final FluentLogger logger = FluentLogger.forEnclosingClass();
+
+ /**
+ * Exception thrown during migration if the change has no {@code noteDbState} field at the
+ * beginning of the migration.
+ */
+ public static class NoNoteDbStateException extends RuntimeException {
+ private static final long serialVersionUID = 1L;
+
+ private NoNoteDbStateException(Change.Id id) {
+ super("change " + id + " has no note_db_state; rebuild it first");
+ }
+ }
+
+ private final AllUsersName allUsers;
+ private final ChangeNotes.Factory changeNotesFactory;
+ private final ChangeRebuilder rebuilder;
+ private final ChangeUpdate.Factory updateFactory;
+ private final GitRepositoryManager repoManager;
+ private final InternalUser.Factory internalUserFactory;
+ private final Provider<InternalChangeQuery> queryProvider;
+ private final Provider<ReviewDb> db;
+ private final RetryHelper retryHelper;
+
+ private final long skewMs;
+ private final long timeoutMs;
+ private final Retryer<NoteDbChangeState> testEnsureRebuiltRetryer;
+
+ @Inject
+ PrimaryStorageMigrator(
+ @GerritServerConfig Config cfg,
+ Provider<ReviewDb> db,
+ GitRepositoryManager repoManager,
+ AllUsersName allUsers,
+ ChangeRebuilder rebuilder,
+ ChangeNotes.Factory changeNotesFactory,
+ Provider<InternalChangeQuery> queryProvider,
+ ChangeUpdate.Factory updateFactory,
+ InternalUser.Factory internalUserFactory,
+ RetryHelper retryHelper) {
+ this(
+ cfg,
+ db,
+ repoManager,
+ allUsers,
+ rebuilder,
+ null,
+ changeNotesFactory,
+ queryProvider,
+ updateFactory,
+ internalUserFactory,
+ retryHelper);
+ }
+
+ @VisibleForTesting
+ public PrimaryStorageMigrator(
+ Config cfg,
+ Provider<ReviewDb> db,
+ GitRepositoryManager repoManager,
+ AllUsersName allUsers,
+ ChangeRebuilder rebuilder,
+ @Nullable Retryer<NoteDbChangeState> testEnsureRebuiltRetryer,
+ ChangeNotes.Factory changeNotesFactory,
+ Provider<InternalChangeQuery> queryProvider,
+ ChangeUpdate.Factory updateFactory,
+ InternalUser.Factory internalUserFactory,
+ RetryHelper retryHelper) {
+ this.db = db;
+ this.repoManager = repoManager;
+ this.allUsers = allUsers;
+ this.rebuilder = rebuilder;
+ this.testEnsureRebuiltRetryer = testEnsureRebuiltRetryer;
+ this.changeNotesFactory = changeNotesFactory;
+ this.queryProvider = queryProvider;
+ this.updateFactory = updateFactory;
+ this.internalUserFactory = internalUserFactory;
+ this.retryHelper = retryHelper;
+ skewMs = NoteDbChangeState.getReadOnlySkew(cfg);
+
+ String s = "notedb";
+ timeoutMs =
+ cfg.getTimeUnit(
+ s,
+ null,
+ "primaryStorageMigrationTimeout",
+ MILLISECONDS.convert(60, SECONDS),
+ MILLISECONDS);
+ }
+
+ /**
+ * Migrate a change's primary storage from ReviewDb to NoteDb.
+ *
+ * <p>This method will return only if the primary storage of the change is NoteDb afterwards. (It
+ * may return early if the primary storage was already NoteDb.)
+ *
+ * <p>If this method throws an exception, then the primary storage of the change is probably not
+ * NoteDb. (It is possible that the primary storage of the change is NoteDb in this case, but
+ * there was an error reading the state.) Moreover, after an exception, the change may be
+ * read-only until a lease expires. If the caller chooses to retry, they should wait until the
+ * read-only lease expires; this method will fail relatively quickly if called on a read-only
+ * change.
+ *
+ * <p>Note that if the change is read-only after this method throws an exception, that does not
+ * necessarily guarantee that the read-only lease was acquired during that particular method
+ * invocation; this call may have in fact failed because another thread acquired the lease first.
+ *
+ * @param id change ID.
+ * @throws OrmException if a ReviewDb-level error occurs.
+ * @throws IOException if a repo-level error occurs.
+ */
+ public void migrateToNoteDbPrimary(Change.Id id) throws OrmException, IOException {
+ // Since there are multiple non-atomic steps in this method, we need to
+ // consider what happens when there is another writer concurrent with the
+ // thread executing this method.
+ //
+ // Let:
+ // * OR = other writer writes noteDbState & new data to ReviewDb (in one
+ // transaction)
+ // * ON = other writer writes to NoteDb
+ // * MRO = migrator sets state to read-only
+ // * MR = ensureRebuilt writes rebuilt noteDbState to ReviewDb (but does not
+ // otherwise update ReviewDb in this transaction)
+ // * MN = ensureRebuilt writes rebuilt state to NoteDb
+ //
+ // Consider all the interleavings of these operations.
+ //
+ // * OR,ON,MRO,...
+ // Other writer completes before migrator begins; this is not a concurrent
+ // write.
+ // * MRO,...,OR,...
+ // OR will fail, since it atomically checks that the noteDbState is not
+ // read-only before proceeding. This results in an exception, but not a
+ // concurrent write.
+ //
+ // Thus all the "interesting" interleavings start with OR,MRO, and differ on
+ // where ON falls relative to MR/MN.
+ //
+ // * OR,MRO,ON,MR,MN
+ // The other NoteDb write succeeds despite the noteDbState being
+ // read-only. Because the read-only state from MRO includes the update
+ // from OR, the change is up-to-date at this point. Thus MR,MN is a no-op.
+ // The end result is an up-to-date, read-only change.
+ //
+ // * OR,MRO,MR,ON,MN
+ // The change is out-of-date when ensureRebuilt begins, because OR
+ // succeeded but the corresponding ON has not happened yet. ON will
+ // succeed, because there have been no intervening NoteDb writes. MN will
+ // fail, because ON updated the state in NoteDb to something other than
+ // what MR claimed. This leaves the change in an out-of-date, read-only
+ // state.
+ //
+ // If this method threw an exception in this case, the change would
+ // eventually switch back to read-write when the read-only lease expires,
+ // so this situation is recoverable. However, it would be inconvenient for
+ // a change to be read-only for so long.
+ //
+ // Thus, as an optimization, we have a retry loop that attempts
+ // ensureRebuilt while still holding the same read-only lease. This
+ // effectively results in the interleaving OR,MR,ON,MR,MN; in contrast
+ // with the previous case, here, MR/MN actually rebuilds the change. In
+ // the case of a write failure, MR/MN might fail and get retried again. If
+ // it exceeds the maximum number of retries, an exception is thrown.
+ //
+ // * OR,MRO,MR,MN,ON
+ // The change is out-of-date when ensureRebuilt begins. The change is
+ // rebuilt, leaving a new state in NoteDb. ON will fail, because the old
+ // NoteDb state has changed since the ref state was read when the update
+ // began (prior to OR). This results in an exception from ON, but the end
+ // result is still an up-to-date, read-only change. The end user that
+ // initiated the other write observes an error, but this is no different
+ // from other errors that need retrying, e.g. due to a backend write
+ // failure.
+
+ Stopwatch sw = Stopwatch.createStarted();
+ Change readOnlyChange = setReadOnlyInReviewDb(id); // MRO
+ if (readOnlyChange == null) {
+ return; // Already migrated.
+ }
+
+ NoteDbChangeState rebuiltState;
+ try {
+ // MR,MN
+ rebuiltState =
+ ensureRebuiltRetryer(sw)
+ .call(
+ () ->
+ ensureRebuilt(
+ readOnlyChange.getProject(),
+ id,
+ NoteDbChangeState.parse(readOnlyChange)));
+ } catch (RetryException | ExecutionException e) {
+ throw new OrmException(e);
+ }
+
+ // At this point, the noteDbState in ReviewDb is read-only, and it is
+ // guaranteed to match the state actually in NoteDb. Now it is safe to set
+ // the primary storage to NoteDb.
+
+ setPrimaryStorageNoteDb(id, rebuiltState);
+ logger.atFine().log(
+ "Migrated change %s to NoteDb primary in %sms", id, sw.elapsed(MILLISECONDS));
+ }
+
+ private Change setReadOnlyInReviewDb(Change.Id id) throws OrmException {
+ AtomicBoolean alreadyMigrated = new AtomicBoolean(false);
+ Change result =
+ db().changes()
+ .atomicUpdate(
+ id,
+ new AtomicUpdate<Change>() {
+ @Override
+ public Change update(Change change) {
+ NoteDbChangeState state = NoteDbChangeState.parse(change);
+ if (state == null) {
+ // Could rebuild the change here, but that's more complexity, and this
+ // normally shouldn't happen.
+ //
+ // Known cases where this happens are described in and handled by
+ // NoteDbMigrator#canSkipPrimaryStorageMigration.
+ throw new NoNoteDbStateException(id);
+ }
+ // If the change is already read-only, then the lease is held by another
+ // (likely failed) migrator thread. Fail early, as we can't take over
+ // the lease.
+ NoteDbChangeState.checkNotReadOnly(change, skewMs);
+ if (state.getPrimaryStorage() != PrimaryStorage.NOTE_DB) {
+ Timestamp now = TimeUtil.nowTs();
+ Timestamp until = new Timestamp(now.getTime() + timeoutMs);
+ change.setNoteDbState(state.withReadOnlyUntil(until).toString());
+ } else {
+ alreadyMigrated.set(true);
+ }
+ return change;
+ }
+ });
+ return alreadyMigrated.get() ? null : result;
+ }
+
+ private Retryer<NoteDbChangeState> ensureRebuiltRetryer(Stopwatch sw) {
+ if (testEnsureRebuiltRetryer != null) {
+ return testEnsureRebuiltRetryer;
+ }
+ // Retry the ensureRebuilt step with backoff until half the timeout has
+ // expired, leaving the remaining half for the rest of the steps.
+ long remainingNanos = (MILLISECONDS.toNanos(timeoutMs) / 2) - sw.elapsed(NANOSECONDS);
+ remainingNanos = Math.max(remainingNanos, 0);
+ return RetryerBuilder.<NoteDbChangeState>newBuilder()
+ .retryIfException(e -> (e instanceof IOException) || (e instanceof OrmException))
+ .withWaitStrategy(
+ WaitStrategies.join(
+ WaitStrategies.exponentialWait(250, MILLISECONDS),
+ WaitStrategies.randomWait(50, MILLISECONDS)))
+ .withStopStrategy(StopStrategies.stopAfterDelay(remainingNanos, NANOSECONDS))
+ .build();
+ }
+
+ private NoteDbChangeState ensureRebuilt(
+ Project.NameKey project, Change.Id id, NoteDbChangeState readOnlyState)
+ throws IOException, OrmException, RepositoryNotFoundException {
+ try (Repository changeRepo = repoManager.openRepository(project);
+ Repository allUsersRepo = repoManager.openRepository(allUsers)) {
+ if (!readOnlyState.isUpToDate(new RepoRefCache(changeRepo), new RepoRefCache(allUsersRepo))) {
+ NoteDbUpdateManager.Result r = rebuilder.rebuildEvenIfReadOnly(db(), id);
+ checkState(
+ r.newState().getReadOnlyUntil().equals(readOnlyState.getReadOnlyUntil()),
+ "state after rebuilding has different read-only lease: %s != %s",
+ r.newState(),
+ readOnlyState);
+ readOnlyState = r.newState();
+ }
+ }
+ return readOnlyState;
+ }
+
+ private void setPrimaryStorageNoteDb(Change.Id id, NoteDbChangeState expectedState)
+ throws OrmException {
+ db().changes()
+ .atomicUpdate(
+ id,
+ new AtomicUpdate<Change>() {
+ @Override
+ public Change update(Change change) {
+ NoteDbChangeState state = NoteDbChangeState.parse(change);
+ if (!Objects.equals(state, expectedState)) {
+ throw new OrmRuntimeException(badState(state, expectedState));
+ }
+ Timestamp until = state.getReadOnlyUntil().get();
+ if (TimeUtil.nowTs().after(until)) {
+ throw new OrmRuntimeException(
+ "read-only lease on change " + id + " expired at " + until);
+ }
+ change.setNoteDbState(NoteDbChangeState.NOTE_DB_PRIMARY_STATE);
+ return change;
+ }
+ });
+ }
+
+ private ReviewDb db() {
+ return ReviewDbUtil.unwrapDb(db.get());
+ }
+
+ private String badState(NoteDbChangeState actual, NoteDbChangeState expected) {
+ return "state changed unexpectedly: " + actual + " != " + expected;
+ }
+
+ public void migrateToReviewDbPrimary(Change.Id id, @Nullable Project.NameKey project)
+ throws OrmException, IOException {
+ // Migrating back to ReviewDb primary is much simpler than the original migration to NoteDb
+ // primary, because when NoteDb is primary, each write only goes to one storage location rather
+ // than both. We only need to consider whether a concurrent writer (OR) conflicts with the first
+ // setReadOnlyInNoteDb step (MR) in this method.
+ //
+ // If OR wins, then either:
+ // * MR will set read-only after OR is completed, which is not a concurrent write.
+ // * MR will fail to set read-only with a lock failure. The caller will have to retry, but the
+ // change is not in a read-only state, so behavior is not degraded in the meantime.
+ //
+ // If MR wins, then either:
+ // * OR will fail with a read-only exception (via AbstractChangeNotes#apply).
+ // * OR will fail with a lock failure.
+ //
+ // In all of these scenarios, the change is read-only if and only if MR succeeds.
+ //
+ // There will be no concurrent writes to ReviewDb for this change until
+ // setPrimaryStorageReviewDb completes, because ReviewDb writes are not attempted when primary
+ // storage is NoteDb. After the primary storage changes back, it is possible for subsequent
+ // NoteDb writes to conflict with the releaseReadOnlyLeaseInNoteDb step, but at this point,
+ // since ReviewDb is primary, we are back to ignoring them.
+ Stopwatch sw = Stopwatch.createStarted();
+ if (project == null) {
+ project = getProject(id);
+ }
+ ObjectId newMetaId = setReadOnlyInNoteDb(project, id);
+ rebuilder.rebuildReviewDb(db(), project, id);
+ setPrimaryStorageReviewDb(id, newMetaId);
+ releaseReadOnlyLeaseInNoteDb(project, id);
+ logger.atFine().log(
+ "Migrated change %s to ReviewDb primary in %sms", id, sw.elapsed(MILLISECONDS));
+ }
+
+ private ObjectId setReadOnlyInNoteDb(Project.NameKey project, Change.Id id)
+ throws OrmException, IOException {
+ Timestamp now = TimeUtil.nowTs();
+ Timestamp until = new Timestamp(now.getTime() + timeoutMs);
+ ChangeUpdate update =
+ updateFactory.create(
+ changeNotesFactory.createChecked(db.get(), project, id), internalUserFactory.create());
+ update.setReadOnlyUntil(until);
+ return update.commit();
+ }
+
+ private void setPrimaryStorageReviewDb(Change.Id id, ObjectId newMetaId)
+ throws OrmException, IOException {
+ ImmutableMap.Builder<Account.Id, ObjectId> draftIds = ImmutableMap.builder();
+ try (Repository repo = repoManager.openRepository(allUsers)) {
+ for (Ref draftRef :
+ repo.getRefDatabase().getRefsByPrefix(RefNames.refsDraftCommentsPrefix(id))) {
+ Account.Id accountId = Account.Id.fromRef(draftRef.getName());
+ if (accountId != null) {
+ draftIds.put(accountId, draftRef.getObjectId().copy());
+ }
+ }
+ }
+ NoteDbChangeState newState =
+ new NoteDbChangeState(
+ id,
+ PrimaryStorage.REVIEW_DB,
+ Optional.of(RefState.create(newMetaId, draftIds.build())),
+ Optional.empty());
+ db().changes()
+ .atomicUpdate(
+ id,
+ new AtomicUpdate<Change>() {
+ @Override
+ public Change update(Change change) {
+ if (PrimaryStorage.of(change) != PrimaryStorage.NOTE_DB) {
+ throw new OrmRuntimeException(
+ "change " + id + " is not NoteDb primary: " + change.getNoteDbState());
+ }
+ change.setNoteDbState(newState.toString());
+ return change;
+ }
+ });
+ }
+
+ private void releaseReadOnlyLeaseInNoteDb(Project.NameKey project, Change.Id id)
+ throws OrmException {
+ // Use a BatchUpdate since ReviewDb is primary at this point, so it needs to reflect the update.
+ // (In practice retrying won't happen, since we aren't using fused updates at this point.)
+ try {
+ retryHelper.execute(
+ updateFactory -> {
+ try (BatchUpdate bu =
+ updateFactory.create(
+ db.get(), project, internalUserFactory.create(), TimeUtil.nowTs())) {
+ bu.addOp(
+ id,
+ new BatchUpdateOp() {
+ @Override
+ public boolean updateChange(ChangeContext ctx) {
+ ctx.getUpdate(ctx.getChange().currentPatchSetId())
+ .setReadOnlyUntil(new Timestamp(0));
+ return true;
+ }
+ });
+ bu.execute();
+ return null;
+ }
+ });
+ } catch (RestApiException | UpdateException e) {
+ throw new OrmException(e);
+ }
+ }
+
+ private Project.NameKey getProject(Change.Id id) throws OrmException {
+ List<ChangeData> cds =
+ queryProvider.get().setRequestedFields(ChangeField.PROJECT).byLegacyChangeId(id);
+ Set<Project.NameKey> projects = new TreeSet<>();
+ for (ChangeData cd : cds) {
+ projects.add(cd.project());
+ }
+ if (projects.size() != 1) {
+ throw new OrmException(
+ "zero or multiple projects found for change "
+ + id
+ + ", must specify project explicitly: "
+ + projects);
+ }
+ return projects.iterator().next();
+ }
+}