diff options
author | Kaushik Lingarkar <kaushik.lingarkar@linaro.org> | 2022-08-12 13:27:37 -0700 |
---|---|---|
committer | Kaushik Lingarkar <kaushik.lingarkar@linaro.org> | 2022-08-29 19:41:19 -0700 |
commit | f84e5180fc32210db3d85a75d088f5ce841e5c68 (patch) | |
tree | 109b24b2b6b26aa88610d3b13c7c285b9ceeae6d | |
parent | 7fdf41436979af925c00ed5e45a6348b810075fa (diff) |
Provide configuration to paginate index queries with increasing size
A 'pageSizeMultiplier' config entry is added to allow paginating
index queries with increasing size. Default multiplier is set to
1 to keep the current pagination unchanged. 'maxPageSize' has
also been added to limit the maximum results a repeated index
query can request.
Increasing page sizes can help with performance of queries such
as [1] where the visibility is being queried for an account that
cannot see most/all changes. On a site with 1M abandoned changes,
queries like [1] finish in:
45.1 mins with paginationType=OFFSET and pageSizeMultipler=1
8.5 mins with paginationType=OFFSET and pageSizeMultipler=10
9.1 mins with paginationType=SEARCH_AFTER and pageSizeMultipler=1
8.2 mins with paginationType=SEARCH_AFTER and pageSizeMultipler=10
This change also acts as a stepping stone for the next one in the
series where no-limit will be updated to paginate without any
regressions.
[1] gerrit query 'status:abandoned visibleto:guest'
Release-Notes: Index searches can now paginate with increasing size
Change-Id: Iee014d1391c15d7ef31770b4fca538122944eb6b
-rw-r--r-- | Documentation/config-gerrit.txt | 47 | ||||
-rw-r--r-- | java/com/google/gerrit/index/IndexConfig.java | 22 | ||||
-rw-r--r-- | java/com/google/gerrit/index/query/AndSource.java | 26 | ||||
-rw-r--r-- | java/com/google/gerrit/index/query/IndexedQuery.java | 8 | ||||
-rw-r--r-- | java/com/google/gerrit/index/query/Paginated.java | 4 | ||||
-rw-r--r-- | java/com/google/gerrit/pgm/init/InitIndex.java | 1 |
6 files changed, 99 insertions, 9 deletions
diff --git a/Documentation/config-gerrit.txt b/Documentation/config-gerrit.txt index 121ac8fd8a..dbe390957e 100644 --- a/Documentation/config-gerrit.txt +++ b/Documentation/config-gerrit.txt @@ -3159,6 +3159,53 @@ this threshold times the requested limit will result in an error. Set to + Defaults to no limit. +[[index.pageSizeMultiplier]]index.pageSizeMultiplier:: ++ +When index queries are repeated to obtain more results, this multiplier +will be used to determine the limit for the next query. Using a page +multiplier allows queries to start off small and thus provide good +latency for queries which may end up only having very few results, and +then scaling up to have better throughput to handle queries with larger +result sets without incurring the overhead of making as many queries as +would be required with a smaller limit. This strategy of using a multiplier +attempts to create a balance between latency and throughput by dynamically +adjusting the query size to the number of results being returned by each +query in the pagination. ++ +The larger the multiplier, the better the throughput on large queries, and +it also improves latency on large queries by scaling up quickly. However, a +larger multiplier can hurt latencies a bit by making the "last" query in a +series longer than needed. The impact of this depends on how much the backend +latency goes up when specifying a large limit and few results are returned. +Setting link:#index.maxPageSize[index.maxPageSize] that isn't too large, can +likely help reduce the impacts of this. ++ +A large multiplier with an appropriate link:#index.maxPageSize[index.maxPageSize] +should benefit external index backends such as Elasticsearch more, which have +a relatively high query latency. ++ +For example, if the limit of the previous query was 500 and pageSizeMultiplier +is configured to 5, the next query will have a limit of 2500. ++ +Defaults to 1 which effectively turns this feature off. + +[[index.maxPageSize]]index.maxPageSize:: ++ +Maximum size to allow when index queries are repeated to obtain more results. Note +that, link:#index.maxLimit[index.maxLimit] will be used to limit page size if it +is configured to a value lower than maxPageSize. ++ +For example, if the limit of previous query was 500, pageSizeMultiplier is +configured to 5 and maxPageSize to 2000, the next query will have a limit of +2000 (instead of 2500). ++ +When `index.type` is set to `ELASTICSEARCH`, this value should not exceed +the `index.max_result_window` value configured on the Elasticsearch +server. If a value is not configured during site initialization, defaults to +10000, which is the default value of `index.max_result_window` in Elasticsearch. ++ +Defaults to no limit. + [[index.maxTerms]]index.maxTerms:: + Maximum number of leaf terms to allow in a query. Too-large queries may diff --git a/java/com/google/gerrit/index/IndexConfig.java b/java/com/google/gerrit/index/IndexConfig.java index b1b019860c..420a057d56 100644 --- a/java/com/google/gerrit/index/IndexConfig.java +++ b/java/com/google/gerrit/index/IndexConfig.java @@ -30,6 +30,7 @@ import org.eclipse.jgit.lib.Config; @AutoValue public abstract class IndexConfig { private static final int DEFAULT_MAX_TERMS = 1024; + private static final int DEFAULT_PAGE_SIZE_MULTIPLIER = 1; public static IndexConfig createDefault() { return builder().build(); @@ -40,6 +41,8 @@ public abstract class IndexConfig { setIfPresent(cfg, "maxLimit", b::maxLimit); setIfPresent(cfg, "maxPages", b::maxPages); setIfPresent(cfg, "maxTerms", b::maxTerms); + setIfPresent(cfg, "pageSizeMultiplier", b::pageSizeMultiplier); + setIfPresent(cfg, "maxPageSize", b::maxPageSize); setTypeOrDefault(cfg, b::type); setPaginationTypeOrDefault(cfg, b::paginationType); return b; @@ -67,6 +70,8 @@ public abstract class IndexConfig { .maxLimit(Integer.MAX_VALUE) .maxPages(Integer.MAX_VALUE) .maxTerms(DEFAULT_MAX_TERMS) + .pageSizeMultiplier(DEFAULT_PAGE_SIZE_MULTIPLIER) + .maxPageSize(Integer.MAX_VALUE) .type(IndexType.getDefault()) .separateChangeSubIndexes(false) .paginationType(PaginationType.OFFSET); @@ -94,6 +99,10 @@ public abstract class IndexConfig { public abstract Builder paginationType(PaginationType type); + public abstract Builder pageSizeMultiplier(int pageSizeMultiplier); + + public abstract Builder maxPageSize(int maxPageSize); + abstract IndexConfig autoBuild(); public IndexConfig build() { @@ -101,6 +110,8 @@ public abstract class IndexConfig { checkLimit(cfg.maxLimit(), "maxLimit"); checkLimit(cfg.maxPages(), "maxPages"); checkLimit(cfg.maxTerms(), "maxTerms"); + checkLimit(cfg.pageSizeMultiplier(), "pageSizeMultiplier"); + checkLimit(cfg.maxPageSize(), "maxPageSize"); return cfg; } } @@ -139,4 +150,15 @@ public abstract class IndexConfig { * results. */ public abstract PaginationType paginationType(); + + /** + * @return multiplier to be used to determine the limit when queries are repeated to obtain the + * next set of results. + */ + public abstract int pageSizeMultiplier(); + + /** + * @return maximum allowed limit when repeating index queries to obtain the next set of results. + */ + public abstract int maxPageSize(); } diff --git a/java/com/google/gerrit/index/query/AndSource.java b/java/com/google/gerrit/index/query/AndSource.java index ca1acf13b0..c99e60ae5a 100644 --- a/java/com/google/gerrit/index/query/AndSource.java +++ b/java/com/google/gerrit/index/query/AndSource.java @@ -20,6 +20,7 @@ import static com.google.common.collect.ImmutableList.toImmutableList; import com.google.common.collect.FluentIterable; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; +import com.google.common.collect.Ordering; import com.google.gerrit.exceptions.StorageException; import com.google.gerrit.index.IndexConfig; import com.google.gerrit.index.PaginationType; @@ -115,13 +116,16 @@ public class AndSource<T> extends AndPredicate<T> // @SuppressWarnings("unchecked") Paginated<T> p = (Paginated<T>) source; + final int limit = p.getOptions().limit(); Object searchAfter = resultSet.searchAfter(); - while (skipped && r.size() < p.getOptions().limit() + start) { + int pageSize = limit; + while (skipped && r.size() < limit + start) { skipped = false; + pageSize = getNextPageSize(pageSize); ResultSet<T> next = indexConfig.paginationType().equals(PaginationType.SEARCH_AFTER) - ? p.restart(searchAfter) - : p.restart(nextStart); + ? p.restart(searchAfter, pageSize) + : p.restart(nextStart, pageSize); for (T data : buffer(next)) { if (match(data)) { r.add(data); @@ -207,4 +211,20 @@ public class AndSource<T> extends AndPredicate<T> private DataSource<T> toDataSource(Predicate<T> pred) { return (DataSource<T>) pred; } + + private int getNextPageSize(int pageSize) { + List<Integer> possiblePageSizes = new ArrayList<>(3); + try { + possiblePageSizes.add(Math.multiplyExact(pageSize, indexConfig.pageSizeMultiplier())); + } catch (ArithmeticException e) { + possiblePageSizes.add(Integer.MAX_VALUE); + } + if (indexConfig.maxPageSize() > 0) { + possiblePageSizes.add(indexConfig.maxPageSize()); + } + if (indexConfig.maxLimit() > 0) { + possiblePageSizes.add(indexConfig.maxLimit()); + } + return Ordering.natural().min(possiblePageSizes); + } } diff --git a/java/com/google/gerrit/index/query/IndexedQuery.java b/java/com/google/gerrit/index/query/IndexedQuery.java index ded0074cf7..ed98e623f8 100644 --- a/java/com/google/gerrit/index/query/IndexedQuery.java +++ b/java/com/google/gerrit/index/query/IndexedQuery.java @@ -87,14 +87,14 @@ public class IndexedQuery<I, T> extends Predicate<T> implements DataSource<T>, P } @Override - public ResultSet<T> restart(int start) { - opts = opts.withStart(start); + public ResultSet<T> restart(int start, int limit) { + opts = opts.withStart(start).withLimit(limit); return search(); } @Override - public ResultSet<T> restart(Object searchAfter) { - opts = opts.withSearchAfter(searchAfter); + public ResultSet<T> restart(Object searchAfter, int limit) { + opts = opts.withSearchAfter(searchAfter).withLimit(limit); return search(); } diff --git a/java/com/google/gerrit/index/query/Paginated.java b/java/com/google/gerrit/index/query/Paginated.java index 475bcfe4fd..32bde39bf1 100644 --- a/java/com/google/gerrit/index/query/Paginated.java +++ b/java/com/google/gerrit/index/query/Paginated.java @@ -19,7 +19,7 @@ import com.google.gerrit.index.QueryOptions; public interface Paginated<T> { QueryOptions getOptions(); - ResultSet<T> restart(int start); + ResultSet<T> restart(int start, int limit); - ResultSet<T> restart(Object searchAfter); + ResultSet<T> restart(Object searchAfter, int limit); } diff --git a/java/com/google/gerrit/pgm/init/InitIndex.java b/java/com/google/gerrit/pgm/init/InitIndex.java index 83d9261042..d78d8a1bb5 100644 --- a/java/com/google/gerrit/pgm/init/InitIndex.java +++ b/java/com/google/gerrit/pgm/init/InitIndex.java @@ -63,6 +63,7 @@ class InitIndex implements InitStep { elasticsearch.string("Index Prefix", "prefix", "gerrit_"); elasticsearch.string("Server", "server", "http://localhost:9200"); index.string("Result window size", "maxLimit", "10000"); + index.string("Result page size", "maxPageSize", "10000"); } if ((site.isNew || isEmptySite()) && type.isLucene()) { |