diff options
author | Luca Milanesio <luca.milanesio@gmail.com> | 2017-03-15 23:53:12 +0000 |
---|---|---|
committer | Sebastian Schuberth <sschuberth@gmail.com> | 2017-05-29 10:22:50 +0200 |
commit | 033b6ec2b74ac506f6cb02bb25aecc4a51cdfde7 (patch) | |
tree | 843cc7c3926f1ab1f06cb3580e30c15bf0523391 | |
parent | 39bf331502ec3f6d6143c99905f8d5565946ff41 (diff) |
Set max retries to avoid queue congestion
When servers have *a lot* of remote slaves, some of them
unstable and potentially offline, a maximum retry policy is
needed to prevent push events to stay in the replication queue
and getting rescheduled forever.
Keep backward-compatible configuration by setting maxRetry
by default to zero, which means disabled.
Change-Id: I060cc7bc3a4d1089b0815db02d2e1430f83a2015
4 files changed, 45 insertions, 4 deletions
diff --git a/src/main/java/com/googlesource/gerrit/plugins/replication/Destination.java b/src/main/java/com/googlesource/gerrit/plugins/replication/Destination.java index 06cbe33..ed361a9 100644 --- a/src/main/java/com/googlesource/gerrit/plugins/replication/Destination.java +++ b/src/main/java/com/googlesource/gerrit/plugins/replication/Destination.java @@ -383,8 +383,9 @@ public class Destination { case TRANSPORT_ERROR: case REPOSITORY_MISSING: default: - pushOp.setToRetry(); - pool.schedule(pushOp, config.getRetryDelay(), TimeUnit.MINUTES); + if (pushOp.setToRetry()) { + pool.schedule(pushOp, config.getRetryDelay(), TimeUnit.MINUTES); + } break; } } @@ -549,6 +550,10 @@ public class Destination { return config.getRemoteConfig().getName(); } + public int getMaxRetries() { + return config.getMaxRetries(); + } + private static boolean matches(URIish uri, String urlMatch) { if (urlMatch == null || urlMatch.equals("") || urlMatch.equals("*")) { return true; diff --git a/src/main/java/com/googlesource/gerrit/plugins/replication/DestinationConfiguration.java b/src/main/java/com/googlesource/gerrit/plugins/replication/DestinationConfiguration.java index 0d7d3ce..f79f616 100644 --- a/src/main/java/com/googlesource/gerrit/plugins/replication/DestinationConfiguration.java +++ b/src/main/java/com/googlesource/gerrit/plugins/replication/DestinationConfiguration.java @@ -35,6 +35,7 @@ class DestinationConfiguration { private final ImmutableList<String> projects; private final ImmutableList<String> authGroupNames; private final RemoteConfig remoteConfig; + private final int maxRetries; DestinationConfiguration(RemoteConfig remoteConfig, Config cfg) { this.remoteConfig = remoteConfig; @@ -62,6 +63,9 @@ class DestinationConfiguration { cfg.getBoolean("remote", name, "replicateHiddenProjects", false); remoteNameStyle = MoreObjects.firstNonNull( cfg.getString("remote", name, "remoteNameStyle"), "slash"); + maxRetries = + getInt( + remoteConfig, cfg, "replicationMaxRetries", cfg.getInt("replication", "maxRetries", 0)); } public int getDelay() { @@ -120,6 +124,10 @@ class DestinationConfiguration { return remoteConfig; } + public int getMaxRetries() { + return maxRetries; + } + private static int getInt( RemoteConfig rc, Config cfg, String name, int defValue) { return cfg.getInt("remote", rc.getName(), name, defValue); diff --git a/src/main/java/com/googlesource/gerrit/plugins/replication/PushOne.java b/src/main/java/com/googlesource/gerrit/plugins/replication/PushOne.java index a251832..16c8577 100644 --- a/src/main/java/com/googlesource/gerrit/plugins/replication/PushOne.java +++ b/src/main/java/com/googlesource/gerrit/plugins/replication/PushOne.java @@ -110,6 +110,7 @@ class PushOne implements ProjectRunnable, CanceledWhileRunning { private Repository git; private boolean retrying; private int retryCount; + private final int maxRetries; private boolean canceled; private final Multimap<String,ReplicationState> stateMap = LinkedListMultimap.create(); @@ -155,6 +156,7 @@ class PushOne implements ProjectRunnable, CanceledWhileRunning { createdAt = System.nanoTime(); metrics = m; canceledWhileRunning = new AtomicBoolean(false); + maxRetries = p.getMaxRetries(); } @Override @@ -199,9 +201,10 @@ class PushOne implements ProjectRunnable, CanceledWhileRunning { return retrying; } - void setToRetry() { + boolean setToRetry() { retrying = true; retryCount++; + return retryCount <= maxRetries; } void canceledByReplication() { diff --git a/src/main/resources/Documentation/config.md b/src/main/resources/Documentation/config.md index 709d61f..9fae6a3 100644 --- a/src/main/resources/Documentation/config.md +++ b/src/main/resources/Documentation/config.md @@ -104,6 +104,18 @@ replication.lockErrorMaxRetries Default: 0 (disabled, i.e. never retry) +replication.maxRetries +: Maximum number of times to retry a push operation that previously + failed. + + When a push operation reaches its maximum number of retries + the replication event is discarded from the queue and the remote + destinations could be out of sync. + + Can be overridden at remote-level by setting replicationMaxRetries. + + By default, push are retried indefinitely. + remote.NAME.url : Address of the remote server to push to. Multiple URLs may be specified within a single remote block, listing different @@ -210,12 +222,25 @@ remote.NAME.replicationRetry If a remote push operation fails because a remote server was offline, all push operations to the same destination URL are - blocked, and the remote push is continuously retried. + blocked, and the remote push is continuously retried unless + the replicationMaxRetries value is set. This is a Gerrit specific extension to the Git remote block. By default, 1 minute. +remote.NAME.replicationMaxRetries +: Maximum number of times to retry a push operation that previously + failed. + + When a push operation reaches its maximum number of retries + the replication event is discarded from the queue and the remote + destinations could be out of sync. + + This is a Gerrit specific extension to the Git remote block. + + By default, use replication.maxRetries. + remote.NAME.threads : Number of worker threads to dedicate to pushing to the repositories described by this remote. Each thread can push |