summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKaushik Lingarkar <kaushik.lingarkar@linaro.org>2022-08-12 13:27:37 -0700
committerKaushik Lingarkar <kaushik.lingarkar@linaro.org>2022-08-29 19:41:19 -0700
commitf84e5180fc32210db3d85a75d088f5ce841e5c68 (patch)
tree109b24b2b6b26aa88610d3b13c7c285b9ceeae6d
parent7fdf41436979af925c00ed5e45a6348b810075fa (diff)
Provide configuration to paginate index queries with increasing size
A 'pageSizeMultiplier' config entry is added to allow paginating index queries with increasing size. Default multiplier is set to 1 to keep the current pagination unchanged. 'maxPageSize' has also been added to limit the maximum results a repeated index query can request. Increasing page sizes can help with performance of queries such as [1] where the visibility is being queried for an account that cannot see most/all changes. On a site with 1M abandoned changes, queries like [1] finish in: 45.1 mins with paginationType=OFFSET and pageSizeMultipler=1 8.5 mins with paginationType=OFFSET and pageSizeMultipler=10 9.1 mins with paginationType=SEARCH_AFTER and pageSizeMultipler=1 8.2 mins with paginationType=SEARCH_AFTER and pageSizeMultipler=10 This change also acts as a stepping stone for the next one in the series where no-limit will be updated to paginate without any regressions. [1] gerrit query 'status:abandoned visibleto:guest' Release-Notes: Index searches can now paginate with increasing size Change-Id: Iee014d1391c15d7ef31770b4fca538122944eb6b
-rw-r--r--Documentation/config-gerrit.txt47
-rw-r--r--java/com/google/gerrit/index/IndexConfig.java22
-rw-r--r--java/com/google/gerrit/index/query/AndSource.java26
-rw-r--r--java/com/google/gerrit/index/query/IndexedQuery.java8
-rw-r--r--java/com/google/gerrit/index/query/Paginated.java4
-rw-r--r--java/com/google/gerrit/pgm/init/InitIndex.java1
6 files changed, 99 insertions, 9 deletions
diff --git a/Documentation/config-gerrit.txt b/Documentation/config-gerrit.txt
index 121ac8fd8a..dbe390957e 100644
--- a/Documentation/config-gerrit.txt
+++ b/Documentation/config-gerrit.txt
@@ -3159,6 +3159,53 @@ this threshold times the requested limit will result in an error. Set to
+
Defaults to no limit.
+[[index.pageSizeMultiplier]]index.pageSizeMultiplier::
++
+When index queries are repeated to obtain more results, this multiplier
+will be used to determine the limit for the next query. Using a page
+multiplier allows queries to start off small and thus provide good
+latency for queries which may end up only having very few results, and
+then scaling up to have better throughput to handle queries with larger
+result sets without incurring the overhead of making as many queries as
+would be required with a smaller limit. This strategy of using a multiplier
+attempts to create a balance between latency and throughput by dynamically
+adjusting the query size to the number of results being returned by each
+query in the pagination.
++
+The larger the multiplier, the better the throughput on large queries, and
+it also improves latency on large queries by scaling up quickly. However, a
+larger multiplier can hurt latencies a bit by making the "last" query in a
+series longer than needed. The impact of this depends on how much the backend
+latency goes up when specifying a large limit and few results are returned.
+Setting link:#index.maxPageSize[index.maxPageSize] that isn't too large, can
+likely help reduce the impacts of this.
++
+A large multiplier with an appropriate link:#index.maxPageSize[index.maxPageSize]
+should benefit external index backends such as Elasticsearch more, which have
+a relatively high query latency.
++
+For example, if the limit of the previous query was 500 and pageSizeMultiplier
+is configured to 5, the next query will have a limit of 2500.
++
+Defaults to 1 which effectively turns this feature off.
+
+[[index.maxPageSize]]index.maxPageSize::
++
+Maximum size to allow when index queries are repeated to obtain more results. Note
+that, link:#index.maxLimit[index.maxLimit] will be used to limit page size if it
+is configured to a value lower than maxPageSize.
++
+For example, if the limit of previous query was 500, pageSizeMultiplier is
+configured to 5 and maxPageSize to 2000, the next query will have a limit of
+2000 (instead of 2500).
++
+When `index.type` is set to `ELASTICSEARCH`, this value should not exceed
+the `index.max_result_window` value configured on the Elasticsearch
+server. If a value is not configured during site initialization, defaults to
+10000, which is the default value of `index.max_result_window` in Elasticsearch.
++
+Defaults to no limit.
+
[[index.maxTerms]]index.maxTerms::
+
Maximum number of leaf terms to allow in a query. Too-large queries may
diff --git a/java/com/google/gerrit/index/IndexConfig.java b/java/com/google/gerrit/index/IndexConfig.java
index b1b019860c..420a057d56 100644
--- a/java/com/google/gerrit/index/IndexConfig.java
+++ b/java/com/google/gerrit/index/IndexConfig.java
@@ -30,6 +30,7 @@ import org.eclipse.jgit.lib.Config;
@AutoValue
public abstract class IndexConfig {
private static final int DEFAULT_MAX_TERMS = 1024;
+ private static final int DEFAULT_PAGE_SIZE_MULTIPLIER = 1;
public static IndexConfig createDefault() {
return builder().build();
@@ -40,6 +41,8 @@ public abstract class IndexConfig {
setIfPresent(cfg, "maxLimit", b::maxLimit);
setIfPresent(cfg, "maxPages", b::maxPages);
setIfPresent(cfg, "maxTerms", b::maxTerms);
+ setIfPresent(cfg, "pageSizeMultiplier", b::pageSizeMultiplier);
+ setIfPresent(cfg, "maxPageSize", b::maxPageSize);
setTypeOrDefault(cfg, b::type);
setPaginationTypeOrDefault(cfg, b::paginationType);
return b;
@@ -67,6 +70,8 @@ public abstract class IndexConfig {
.maxLimit(Integer.MAX_VALUE)
.maxPages(Integer.MAX_VALUE)
.maxTerms(DEFAULT_MAX_TERMS)
+ .pageSizeMultiplier(DEFAULT_PAGE_SIZE_MULTIPLIER)
+ .maxPageSize(Integer.MAX_VALUE)
.type(IndexType.getDefault())
.separateChangeSubIndexes(false)
.paginationType(PaginationType.OFFSET);
@@ -94,6 +99,10 @@ public abstract class IndexConfig {
public abstract Builder paginationType(PaginationType type);
+ public abstract Builder pageSizeMultiplier(int pageSizeMultiplier);
+
+ public abstract Builder maxPageSize(int maxPageSize);
+
abstract IndexConfig autoBuild();
public IndexConfig build() {
@@ -101,6 +110,8 @@ public abstract class IndexConfig {
checkLimit(cfg.maxLimit(), "maxLimit");
checkLimit(cfg.maxPages(), "maxPages");
checkLimit(cfg.maxTerms(), "maxTerms");
+ checkLimit(cfg.pageSizeMultiplier(), "pageSizeMultiplier");
+ checkLimit(cfg.maxPageSize(), "maxPageSize");
return cfg;
}
}
@@ -139,4 +150,15 @@ public abstract class IndexConfig {
* results.
*/
public abstract PaginationType paginationType();
+
+ /**
+ * @return multiplier to be used to determine the limit when queries are repeated to obtain the
+ * next set of results.
+ */
+ public abstract int pageSizeMultiplier();
+
+ /**
+ * @return maximum allowed limit when repeating index queries to obtain the next set of results.
+ */
+ public abstract int maxPageSize();
}
diff --git a/java/com/google/gerrit/index/query/AndSource.java b/java/com/google/gerrit/index/query/AndSource.java
index ca1acf13b0..c99e60ae5a 100644
--- a/java/com/google/gerrit/index/query/AndSource.java
+++ b/java/com/google/gerrit/index/query/AndSource.java
@@ -20,6 +20,7 @@ import static com.google.common.collect.ImmutableList.toImmutableList;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
+import com.google.common.collect.Ordering;
import com.google.gerrit.exceptions.StorageException;
import com.google.gerrit.index.IndexConfig;
import com.google.gerrit.index.PaginationType;
@@ -115,13 +116,16 @@ public class AndSource<T> extends AndPredicate<T>
//
@SuppressWarnings("unchecked")
Paginated<T> p = (Paginated<T>) source;
+ final int limit = p.getOptions().limit();
Object searchAfter = resultSet.searchAfter();
- while (skipped && r.size() < p.getOptions().limit() + start) {
+ int pageSize = limit;
+ while (skipped && r.size() < limit + start) {
skipped = false;
+ pageSize = getNextPageSize(pageSize);
ResultSet<T> next =
indexConfig.paginationType().equals(PaginationType.SEARCH_AFTER)
- ? p.restart(searchAfter)
- : p.restart(nextStart);
+ ? p.restart(searchAfter, pageSize)
+ : p.restart(nextStart, pageSize);
for (T data : buffer(next)) {
if (match(data)) {
r.add(data);
@@ -207,4 +211,20 @@ public class AndSource<T> extends AndPredicate<T>
private DataSource<T> toDataSource(Predicate<T> pred) {
return (DataSource<T>) pred;
}
+
+ private int getNextPageSize(int pageSize) {
+ List<Integer> possiblePageSizes = new ArrayList<>(3);
+ try {
+ possiblePageSizes.add(Math.multiplyExact(pageSize, indexConfig.pageSizeMultiplier()));
+ } catch (ArithmeticException e) {
+ possiblePageSizes.add(Integer.MAX_VALUE);
+ }
+ if (indexConfig.maxPageSize() > 0) {
+ possiblePageSizes.add(indexConfig.maxPageSize());
+ }
+ if (indexConfig.maxLimit() > 0) {
+ possiblePageSizes.add(indexConfig.maxLimit());
+ }
+ return Ordering.natural().min(possiblePageSizes);
+ }
}
diff --git a/java/com/google/gerrit/index/query/IndexedQuery.java b/java/com/google/gerrit/index/query/IndexedQuery.java
index ded0074cf7..ed98e623f8 100644
--- a/java/com/google/gerrit/index/query/IndexedQuery.java
+++ b/java/com/google/gerrit/index/query/IndexedQuery.java
@@ -87,14 +87,14 @@ public class IndexedQuery<I, T> extends Predicate<T> implements DataSource<T>, P
}
@Override
- public ResultSet<T> restart(int start) {
- opts = opts.withStart(start);
+ public ResultSet<T> restart(int start, int limit) {
+ opts = opts.withStart(start).withLimit(limit);
return search();
}
@Override
- public ResultSet<T> restart(Object searchAfter) {
- opts = opts.withSearchAfter(searchAfter);
+ public ResultSet<T> restart(Object searchAfter, int limit) {
+ opts = opts.withSearchAfter(searchAfter).withLimit(limit);
return search();
}
diff --git a/java/com/google/gerrit/index/query/Paginated.java b/java/com/google/gerrit/index/query/Paginated.java
index 475bcfe4fd..32bde39bf1 100644
--- a/java/com/google/gerrit/index/query/Paginated.java
+++ b/java/com/google/gerrit/index/query/Paginated.java
@@ -19,7 +19,7 @@ import com.google.gerrit.index.QueryOptions;
public interface Paginated<T> {
QueryOptions getOptions();
- ResultSet<T> restart(int start);
+ ResultSet<T> restart(int start, int limit);
- ResultSet<T> restart(Object searchAfter);
+ ResultSet<T> restart(Object searchAfter, int limit);
}
diff --git a/java/com/google/gerrit/pgm/init/InitIndex.java b/java/com/google/gerrit/pgm/init/InitIndex.java
index 83d9261042..d78d8a1bb5 100644
--- a/java/com/google/gerrit/pgm/init/InitIndex.java
+++ b/java/com/google/gerrit/pgm/init/InitIndex.java
@@ -63,6 +63,7 @@ class InitIndex implements InitStep {
elasticsearch.string("Index Prefix", "prefix", "gerrit_");
elasticsearch.string("Server", "server", "http://localhost:9200");
index.string("Result window size", "maxLimit", "10000");
+ index.string("Result page size", "maxPageSize", "10000");
}
if ((site.isNew || isEmptySite()) && type.isLucene()) {