summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLuca Milanesio <luca.milanesio@gmail.com>2017-03-15 23:53:12 +0000
committerLuca Milanesio <luca.milanesio@gmail.com>2017-03-18 22:38:19 +0000
commit7bdacabfcc4c9b17b773f2783bf406c02ad738fb (patch)
treeec6d973062d6532d74155b02d5b912b8303545da
parent305c864db28eb0c77c8499bc04c87de3f849cf3c (diff)
Set max retries to avoid queue congestion
When servers have *a lot* of remote slaves, some of them unstable and potentially offline, a maximum retry policy is needed to prevent push events to stay in the replication queue and getting rescheduled forever. Keep backward-compatible configuration by setting maxRetry by default to zero, which means disabled. Change-Id: I060cc7bc3a4d1089b0815db02d2e1430f83a2015
-rw-r--r--src/main/java/com/googlesource/gerrit/plugins/replication/Destination.java9
-rw-r--r--src/main/java/com/googlesource/gerrit/plugins/replication/DestinationConfiguration.java8
-rw-r--r--src/main/java/com/googlesource/gerrit/plugins/replication/PushOne.java5
-rw-r--r--src/main/resources/Documentation/config.md27
4 files changed, 45 insertions, 4 deletions
diff --git a/src/main/java/com/googlesource/gerrit/plugins/replication/Destination.java b/src/main/java/com/googlesource/gerrit/plugins/replication/Destination.java
index 3978f27..de705f0 100644
--- a/src/main/java/com/googlesource/gerrit/plugins/replication/Destination.java
+++ b/src/main/java/com/googlesource/gerrit/plugins/replication/Destination.java
@@ -393,8 +393,9 @@ public class Destination {
case TRANSPORT_ERROR:
case REPOSITORY_MISSING:
default:
- pushOp.setToRetry();
- pool.schedule(pushOp, config.getRetryDelay(), TimeUnit.MINUTES);
+ if (pushOp.setToRetry()) {
+ pool.schedule(pushOp, config.getRetryDelay(), TimeUnit.MINUTES);
+ }
break;
}
}
@@ -554,6 +555,10 @@ public class Destination {
return config.getRemoteConfig().getName();
}
+ public int getMaxRetries() {
+ return config.getMaxRetries();
+ }
+
private static boolean matches(URIish uri, String urlMatch) {
if (urlMatch == null || urlMatch.equals("") || urlMatch.equals("*")) {
return true;
diff --git a/src/main/java/com/googlesource/gerrit/plugins/replication/DestinationConfiguration.java b/src/main/java/com/googlesource/gerrit/plugins/replication/DestinationConfiguration.java
index d1d18d5..fc109bf 100644
--- a/src/main/java/com/googlesource/gerrit/plugins/replication/DestinationConfiguration.java
+++ b/src/main/java/com/googlesource/gerrit/plugins/replication/DestinationConfiguration.java
@@ -34,6 +34,7 @@ class DestinationConfiguration {
private final ImmutableList<String> projects;
private final ImmutableList<String> authGroupNames;
private final RemoteConfig remoteConfig;
+ private final int maxRetries;
DestinationConfiguration(RemoteConfig remoteConfig, Config cfg) {
this.remoteConfig = remoteConfig;
@@ -53,6 +54,9 @@ class DestinationConfiguration {
replicateHiddenProjects = cfg.getBoolean("remote", name, "replicateHiddenProjects", false);
remoteNameStyle =
MoreObjects.firstNonNull(cfg.getString("remote", name, "remoteNameStyle"), "slash");
+ maxRetries =
+ getInt(
+ remoteConfig, cfg, "replicationMaxRetries", cfg.getInt("replication", "maxRetries", 0));
}
public int getDelay() {
@@ -111,6 +115,10 @@ class DestinationConfiguration {
return remoteConfig;
}
+ public int getMaxRetries() {
+ return maxRetries;
+ }
+
private static int getInt(RemoteConfig rc, Config cfg, String name, int defValue) {
return cfg.getInt("remote", rc.getName(), name, defValue);
}
diff --git a/src/main/java/com/googlesource/gerrit/plugins/replication/PushOne.java b/src/main/java/com/googlesource/gerrit/plugins/replication/PushOne.java
index 7682f70..473ebc5 100644
--- a/src/main/java/com/googlesource/gerrit/plugins/replication/PushOne.java
+++ b/src/main/java/com/googlesource/gerrit/plugins/replication/PushOne.java
@@ -107,6 +107,7 @@ class PushOne implements ProjectRunnable, CanceledWhileRunning {
private Repository git;
private boolean retrying;
private int retryCount;
+ private final int maxRetries;
private boolean canceled;
private final ListMultimap<String, ReplicationState> stateMap = LinkedListMultimap.create();
private final int maxLockRetries;
@@ -152,6 +153,7 @@ class PushOne implements ProjectRunnable, CanceledWhileRunning {
createdAt = System.nanoTime();
metrics = m;
canceledWhileRunning = new AtomicBoolean(false);
+ maxRetries = p.getMaxRetries();
}
@Override
@@ -196,9 +198,10 @@ class PushOne implements ProjectRunnable, CanceledWhileRunning {
return retrying;
}
- void setToRetry() {
+ boolean setToRetry() {
retrying = true;
retryCount++;
+ return retryCount <= maxRetries;
}
void canceledByReplication() {
diff --git a/src/main/resources/Documentation/config.md b/src/main/resources/Documentation/config.md
index cfdd91d..50664dd 100644
--- a/src/main/resources/Documentation/config.md
+++ b/src/main/resources/Documentation/config.md
@@ -104,6 +104,18 @@ replication.lockErrorMaxRetries
Default: 0 (disabled, i.e. never retry)
+replication.maxRetries
+: Maximum number of times to retry a push operation that previously
+ failed.
+
+ When a push operation reaches its maximum number of retries
+ the replication event is discarded from the queue and the remote
+ destinations could be out of sync.
+
+ Can be overridden at remote-level by setting replicationMaxRetries.
+
+ By default, push are retried indefinitely.
+
remote.NAME.url
: Address of the remote server to push to. Multiple URLs may be
specified within a single remote block, listing different
@@ -210,12 +222,25 @@ remote.NAME.replicationRetry
If a remote push operation fails because a remote server was
offline, all push operations to the same destination URL are
- blocked, and the remote push is continuously retried.
+ blocked, and the remote push is continuously retried unless
+ the replicationMaxRetries value is set.
This is a Gerrit specific extension to the Git remote block.
By default, 1 minute.
+remote.NAME.replicationMaxRetries
+: Maximum number of times to retry a push operation that previously
+ failed.
+
+ When a push operation reaches its maximum number of retries
+ the replication event is discarded from the queue and the remote
+ destinations could be out of sync.
+
+ This is a Gerrit specific extension to the Git remote block.
+
+ By default, use replication.maxRetries.
+
remote.NAME.threads
: Number of worker threads to dedicate to pushing to the
repositories described by this remote. Each thread can push