From f84e5180fc32210db3d85a75d088f5ce841e5c68 Mon Sep 17 00:00:00 2001 From: Kaushik Lingarkar Date: Fri, 12 Aug 2022 13:27:37 -0700 Subject: Provide configuration to paginate index queries with increasing size A 'pageSizeMultiplier' config entry is added to allow paginating index queries with increasing size. Default multiplier is set to 1 to keep the current pagination unchanged. 'maxPageSize' has also been added to limit the maximum results a repeated index query can request. Increasing page sizes can help with performance of queries such as [1] where the visibility is being queried for an account that cannot see most/all changes. On a site with 1M abandoned changes, queries like [1] finish in: 45.1 mins with paginationType=OFFSET and pageSizeMultipler=1 8.5 mins with paginationType=OFFSET and pageSizeMultipler=10 9.1 mins with paginationType=SEARCH_AFTER and pageSizeMultipler=1 8.2 mins with paginationType=SEARCH_AFTER and pageSizeMultipler=10 This change also acts as a stepping stone for the next one in the series where no-limit will be updated to paginate without any regressions. [1] gerrit query 'status:abandoned visibleto:guest' Release-Notes: Index searches can now paginate with increasing size Change-Id: Iee014d1391c15d7ef31770b4fca538122944eb6b --- Documentation/config-gerrit.txt | 47 ++++++++++++++++++++++ java/com/google/gerrit/index/IndexConfig.java | 22 ++++++++++ java/com/google/gerrit/index/query/AndSource.java | 26 ++++++++++-- .../google/gerrit/index/query/IndexedQuery.java | 8 ++-- java/com/google/gerrit/index/query/Paginated.java | 4 +- java/com/google/gerrit/pgm/init/InitIndex.java | 1 + 6 files changed, 99 insertions(+), 9 deletions(-) diff --git a/Documentation/config-gerrit.txt b/Documentation/config-gerrit.txt index 121ac8fd8a..dbe390957e 100644 --- a/Documentation/config-gerrit.txt +++ b/Documentation/config-gerrit.txt @@ -3159,6 +3159,53 @@ this threshold times the requested limit will result in an error. Set to + Defaults to no limit. +[[index.pageSizeMultiplier]]index.pageSizeMultiplier:: ++ +When index queries are repeated to obtain more results, this multiplier +will be used to determine the limit for the next query. Using a page +multiplier allows queries to start off small and thus provide good +latency for queries which may end up only having very few results, and +then scaling up to have better throughput to handle queries with larger +result sets without incurring the overhead of making as many queries as +would be required with a smaller limit. This strategy of using a multiplier +attempts to create a balance between latency and throughput by dynamically +adjusting the query size to the number of results being returned by each +query in the pagination. ++ +The larger the multiplier, the better the throughput on large queries, and +it also improves latency on large queries by scaling up quickly. However, a +larger multiplier can hurt latencies a bit by making the "last" query in a +series longer than needed. The impact of this depends on how much the backend +latency goes up when specifying a large limit and few results are returned. +Setting link:#index.maxPageSize[index.maxPageSize] that isn't too large, can +likely help reduce the impacts of this. ++ +A large multiplier with an appropriate link:#index.maxPageSize[index.maxPageSize] +should benefit external index backends such as Elasticsearch more, which have +a relatively high query latency. ++ +For example, if the limit of the previous query was 500 and pageSizeMultiplier +is configured to 5, the next query will have a limit of 2500. ++ +Defaults to 1 which effectively turns this feature off. + +[[index.maxPageSize]]index.maxPageSize:: ++ +Maximum size to allow when index queries are repeated to obtain more results. Note +that, link:#index.maxLimit[index.maxLimit] will be used to limit page size if it +is configured to a value lower than maxPageSize. ++ +For example, if the limit of previous query was 500, pageSizeMultiplier is +configured to 5 and maxPageSize to 2000, the next query will have a limit of +2000 (instead of 2500). ++ +When `index.type` is set to `ELASTICSEARCH`, this value should not exceed +the `index.max_result_window` value configured on the Elasticsearch +server. If a value is not configured during site initialization, defaults to +10000, which is the default value of `index.max_result_window` in Elasticsearch. ++ +Defaults to no limit. + [[index.maxTerms]]index.maxTerms:: + Maximum number of leaf terms to allow in a query. Too-large queries may diff --git a/java/com/google/gerrit/index/IndexConfig.java b/java/com/google/gerrit/index/IndexConfig.java index b1b019860c..420a057d56 100644 --- a/java/com/google/gerrit/index/IndexConfig.java +++ b/java/com/google/gerrit/index/IndexConfig.java @@ -30,6 +30,7 @@ import org.eclipse.jgit.lib.Config; @AutoValue public abstract class IndexConfig { private static final int DEFAULT_MAX_TERMS = 1024; + private static final int DEFAULT_PAGE_SIZE_MULTIPLIER = 1; public static IndexConfig createDefault() { return builder().build(); @@ -40,6 +41,8 @@ public abstract class IndexConfig { setIfPresent(cfg, "maxLimit", b::maxLimit); setIfPresent(cfg, "maxPages", b::maxPages); setIfPresent(cfg, "maxTerms", b::maxTerms); + setIfPresent(cfg, "pageSizeMultiplier", b::pageSizeMultiplier); + setIfPresent(cfg, "maxPageSize", b::maxPageSize); setTypeOrDefault(cfg, b::type); setPaginationTypeOrDefault(cfg, b::paginationType); return b; @@ -67,6 +70,8 @@ public abstract class IndexConfig { .maxLimit(Integer.MAX_VALUE) .maxPages(Integer.MAX_VALUE) .maxTerms(DEFAULT_MAX_TERMS) + .pageSizeMultiplier(DEFAULT_PAGE_SIZE_MULTIPLIER) + .maxPageSize(Integer.MAX_VALUE) .type(IndexType.getDefault()) .separateChangeSubIndexes(false) .paginationType(PaginationType.OFFSET); @@ -94,6 +99,10 @@ public abstract class IndexConfig { public abstract Builder paginationType(PaginationType type); + public abstract Builder pageSizeMultiplier(int pageSizeMultiplier); + + public abstract Builder maxPageSize(int maxPageSize); + abstract IndexConfig autoBuild(); public IndexConfig build() { @@ -101,6 +110,8 @@ public abstract class IndexConfig { checkLimit(cfg.maxLimit(), "maxLimit"); checkLimit(cfg.maxPages(), "maxPages"); checkLimit(cfg.maxTerms(), "maxTerms"); + checkLimit(cfg.pageSizeMultiplier(), "pageSizeMultiplier"); + checkLimit(cfg.maxPageSize(), "maxPageSize"); return cfg; } } @@ -139,4 +150,15 @@ public abstract class IndexConfig { * results. */ public abstract PaginationType paginationType(); + + /** + * @return multiplier to be used to determine the limit when queries are repeated to obtain the + * next set of results. + */ + public abstract int pageSizeMultiplier(); + + /** + * @return maximum allowed limit when repeating index queries to obtain the next set of results. + */ + public abstract int maxPageSize(); } diff --git a/java/com/google/gerrit/index/query/AndSource.java b/java/com/google/gerrit/index/query/AndSource.java index ca1acf13b0..c99e60ae5a 100644 --- a/java/com/google/gerrit/index/query/AndSource.java +++ b/java/com/google/gerrit/index/query/AndSource.java @@ -20,6 +20,7 @@ import static com.google.common.collect.ImmutableList.toImmutableList; import com.google.common.collect.FluentIterable; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; +import com.google.common.collect.Ordering; import com.google.gerrit.exceptions.StorageException; import com.google.gerrit.index.IndexConfig; import com.google.gerrit.index.PaginationType; @@ -115,13 +116,16 @@ public class AndSource extends AndPredicate // @SuppressWarnings("unchecked") Paginated p = (Paginated) source; + final int limit = p.getOptions().limit(); Object searchAfter = resultSet.searchAfter(); - while (skipped && r.size() < p.getOptions().limit() + start) { + int pageSize = limit; + while (skipped && r.size() < limit + start) { skipped = false; + pageSize = getNextPageSize(pageSize); ResultSet next = indexConfig.paginationType().equals(PaginationType.SEARCH_AFTER) - ? p.restart(searchAfter) - : p.restart(nextStart); + ? p.restart(searchAfter, pageSize) + : p.restart(nextStart, pageSize); for (T data : buffer(next)) { if (match(data)) { r.add(data); @@ -207,4 +211,20 @@ public class AndSource extends AndPredicate private DataSource toDataSource(Predicate pred) { return (DataSource) pred; } + + private int getNextPageSize(int pageSize) { + List possiblePageSizes = new ArrayList<>(3); + try { + possiblePageSizes.add(Math.multiplyExact(pageSize, indexConfig.pageSizeMultiplier())); + } catch (ArithmeticException e) { + possiblePageSizes.add(Integer.MAX_VALUE); + } + if (indexConfig.maxPageSize() > 0) { + possiblePageSizes.add(indexConfig.maxPageSize()); + } + if (indexConfig.maxLimit() > 0) { + possiblePageSizes.add(indexConfig.maxLimit()); + } + return Ordering.natural().min(possiblePageSizes); + } } diff --git a/java/com/google/gerrit/index/query/IndexedQuery.java b/java/com/google/gerrit/index/query/IndexedQuery.java index ded0074cf7..ed98e623f8 100644 --- a/java/com/google/gerrit/index/query/IndexedQuery.java +++ b/java/com/google/gerrit/index/query/IndexedQuery.java @@ -87,14 +87,14 @@ public class IndexedQuery extends Predicate implements DataSource, P } @Override - public ResultSet restart(int start) { - opts = opts.withStart(start); + public ResultSet restart(int start, int limit) { + opts = opts.withStart(start).withLimit(limit); return search(); } @Override - public ResultSet restart(Object searchAfter) { - opts = opts.withSearchAfter(searchAfter); + public ResultSet restart(Object searchAfter, int limit) { + opts = opts.withSearchAfter(searchAfter).withLimit(limit); return search(); } diff --git a/java/com/google/gerrit/index/query/Paginated.java b/java/com/google/gerrit/index/query/Paginated.java index 475bcfe4fd..32bde39bf1 100644 --- a/java/com/google/gerrit/index/query/Paginated.java +++ b/java/com/google/gerrit/index/query/Paginated.java @@ -19,7 +19,7 @@ import com.google.gerrit.index.QueryOptions; public interface Paginated { QueryOptions getOptions(); - ResultSet restart(int start); + ResultSet restart(int start, int limit); - ResultSet restart(Object searchAfter); + ResultSet restart(Object searchAfter, int limit); } diff --git a/java/com/google/gerrit/pgm/init/InitIndex.java b/java/com/google/gerrit/pgm/init/InitIndex.java index 83d9261042..d78d8a1bb5 100644 --- a/java/com/google/gerrit/pgm/init/InitIndex.java +++ b/java/com/google/gerrit/pgm/init/InitIndex.java @@ -63,6 +63,7 @@ class InitIndex implements InitStep { elasticsearch.string("Index Prefix", "prefix", "gerrit_"); elasticsearch.string("Server", "server", "http://localhost:9200"); index.string("Result window size", "maxLimit", "10000"); + index.string("Result page size", "maxPageSize", "10000"); } if ((site.isNew || isEmptySite()) && type.isLucene()) { -- cgit v1.2.3