diff options
author | Diego Zambelli Sessona <diego.sessona@gmail.com> | 2023-07-10 14:22:29 +0100 |
---|---|---|
committer | Luca Milanesio <luca.milanesio@gmail.com> | 2023-07-28 07:36:47 +0000 |
commit | b79ed753852e297d1dc133ab94d4fe6b1e5c6d99 (patch) | |
tree | a06ea5c52435b8f35f8c51e4d73ec49e1afef797 | |
parent | 55662623f29fcd5348b006351c362287de96ee50 (diff) |
Add configuration to disable internal indexes pagination
Add NONE option to disable index backend pagination,
allowing the user deciding between performance
and data consistency.
This option needs to be honoured by the indexing backend
and this change introduces the correct implementation
for Lucene.
We should also create the corresponding change on the
elasticsearch libModule for throwing an IAE to highlight
that the backend doesn't support NONE pagination type.
API pagination is not affected by this change and is
always preserved.
When having pagination, we could encounter some problems
in the result set, when moving from one page to the
next of the index results:
* duplicated entries (when new changes are created)
* missing entries (when changes are removed or set to
private)
* wrong entries status (when changes are switching
their state during the page switching)
Having duplicates or missing results could have side effects,
depending on the use-case of the consumer of the data.
For example, since indexes are used to populate some LoadingCache
(SearchingChangeCache [1]), inconsistent results returned by
the indexing backend may cause the cache loading function
to fail or keep an inconsistent state which doesn't reflect
the underlying data.
For this case, a workaround [2] has been put in place in master
and 3.8, however, it will not work for the missing
or inconsistent entries.
[1]: https://gerrit.googlesource.com/gerrit/+/refs/heads/stable-3.6/java/com/google/gerrit/server/git/SearchingChangeCacheImpl.java#157
[2]: https://gerrit-review.googlesource.com/c/gerrit/+/349995
Bug: Issue 287484350
Release-Notes: Introduce NONE pagination type and its support in Lucene indexing backend
Change-Id: I40f87895d9dac951ae30c5562b2ddbf28b34a41a
Co-Authored-With: Fabio Ponciroli <ponch78@gmail.com>
(cherry picked from commit a0da72341b8159c5341af711e6b8991ef1aa7875)
9 files changed, 68 insertions, 7 deletions
diff --git a/Documentation/config-gerrit.txt b/Documentation/config-gerrit.txt index 1ca7870550..87e476113f 100644 --- a/Documentation/config-gerrit.txt +++ b/Documentation/config-gerrit.txt @@ -3167,6 +3167,8 @@ obtain the next set of results. Supported values are: + Index queries are repeated with a non-zero offset to obtain the next set of results. +_Note: Results may be inaccurate if the data-set is changing during the query +execution._ + * `SEARCH_AFTER` + @@ -3174,6 +3176,18 @@ Index queries are repeated using a search-after object. This type is supported for Lucene and ElasticSearch. Other index backends can provide their custom implementations for search-after. Note that, `SEARCH_AFTER` does not impact using offsets in Gerrit query APIs. +_Note: Depending on the index backend and its settings, results may be +inaccurate if the data-set is changing during the query execution._ ++ +* `NONE` ++ +Index queries are executed returning all results, without internal +pagination. +_Note: Since the entire set of indexing results is kept in memory +during the API execution, this option may lead to higher memory utilisation +and overall reduced performance. +Bear in mind that some indexing backends may not support unbounded queries; +therefore, the NONE option is unavailable._ + Defaults to `OFFSET`. @@ -3228,6 +3242,8 @@ a relatively high query latency. For example, if the limit of the previous query was 500 and pageSizeMultiplier is configured to 5, the next query will have a limit of 2500. + +_Note: ignored when paginationType is `NONE`_ ++ Defaults to 1 which effectively turns this feature off. [[index.maxPageSize]]index.maxPageSize:: @@ -3245,6 +3261,8 @@ the `index.max_result_window` value configured on the Elasticsearch server. If a value is not configured during site initialization, defaults to 10000, which is the default value of `index.max_result_window` in Elasticsearch. + +_Note: ignored when paginationType is `NONE`_ ++ Defaults to no limit. [[index.maxTerms]]index.maxTerms:: diff --git a/java/com/google/gerrit/index/PaginationType.java b/java/com/google/gerrit/index/PaginationType.java index e7e34fdc0a..f18f2899a5 100644 --- a/java/com/google/gerrit/index/PaginationType.java +++ b/java/com/google/gerrit/index/PaginationType.java @@ -25,5 +25,8 @@ public enum PaginationType { * <p>For example, Lucene implementation uses the last doc from the previous search as * search-after object and uses the IndexSearcher.searchAfter API to get the next set of results. */ - SEARCH_AFTER + SEARCH_AFTER, + + /** Index queries are executed returning all results, without internal pagination. */ + NONE } diff --git a/java/com/google/gerrit/index/query/PaginatingSource.java b/java/com/google/gerrit/index/query/PaginatingSource.java index fd3a2187e6..4eceb319e7 100644 --- a/java/com/google/gerrit/index/query/PaginatingSource.java +++ b/java/com/google/gerrit/index/query/PaginatingSource.java @@ -63,7 +63,13 @@ public class PaginatingSource<T> implements DataSource<T> { pageResultSize++; } - if (last != null && source instanceof Paginated) { + if (last != null + && source instanceof Paginated + // TODO: this fix is only for the stable branches and the real refactoring would be to + // restore the logic + // for the filtering in AndSource (L58 - 64) as per + // https://gerrit-review.googlesource.com/c/gerrit/+/345634/9 + && !indexConfig.paginationType().equals(PaginationType.NONE)) { // Restart source and continue if we have not filled the // full limit the caller wants. // diff --git a/java/com/google/gerrit/index/query/QueryProcessor.java b/java/com/google/gerrit/index/query/QueryProcessor.java index e9f795f66e..4b14290b6f 100644 --- a/java/com/google/gerrit/index/query/QueryProcessor.java +++ b/java/com/google/gerrit/index/query/QueryProcessor.java @@ -25,6 +25,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Ordering; import com.google.common.flogger.FluentLogger; +import com.google.common.primitives.Ints; import com.google.gerrit.common.Nullable; import com.google.gerrit.exceptions.StorageException; import com.google.gerrit.index.Index; @@ -264,7 +265,11 @@ public abstract class QueryProcessor<T> { start, initialPageSize, pageSizeMultiplier, - limit, + // Always bump limit by 1, even if this results in exceeding the permitted + // max for this user. The only way to see if there are more entities is to + // ask for one more result from the query. + // NOTE: This is consistent to the behaviour before the introduction of pagination.` + Ints.saturatedCast((long) limit + 1), getRequestedFields()); logger.atFine().log("Query options: " + opts); Predicate<T> pred = rewriter.rewrite(q, opts); diff --git a/java/com/google/gerrit/lucene/AbstractLuceneIndex.java b/java/com/google/gerrit/lucene/AbstractLuceneIndex.java index 6cede89aa5..08e9357018 100644 --- a/java/com/google/gerrit/lucene/AbstractLuceneIndex.java +++ b/java/com/google/gerrit/lucene/AbstractLuceneIndex.java @@ -25,6 +25,7 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.ListMultimap; import com.google.common.collect.Sets; import com.google.common.flogger.FluentLogger; +import com.google.common.primitives.Ints; import com.google.common.util.concurrent.AbstractFuture; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; @@ -36,6 +37,7 @@ import com.google.gerrit.exceptions.StorageException; import com.google.gerrit.index.FieldDef; import com.google.gerrit.index.FieldType; import com.google.gerrit.index.Index; +import com.google.gerrit.index.PaginationType; import com.google.gerrit.index.QueryOptions; import com.google.gerrit.index.Schema; import com.google.gerrit.index.Schema.Values; @@ -419,6 +421,10 @@ public abstract class AbstractLuceneIndex<K, V> implements Index<K, V> { return f.isStored() ? Field.Store.YES : Field.Store.NO; } + static int getLimitBasedOnPaginationType(QueryOptions opts, int pagesize) { + return PaginationType.NONE == opts.config().paginationType() ? opts.limit() : pagesize; + } + private final class NrtFuture extends AbstractFuture<Void> { private final long gen; @@ -538,7 +544,9 @@ public abstract class AbstractLuceneIndex<K, V> implements Index<K, V> { ScoreDoc scoreDoc = null; try { searcher = acquire(); - int realLimit = opts.start() + opts.pageSize(); + int realLimit = + Ints.saturatedCast( + (long) getLimitBasedOnPaginationType(opts, opts.pageSize()) + opts.start()); TopFieldDocs docs = opts.searchAfter() != null ? searcher.searchAfter( diff --git a/java/com/google/gerrit/lucene/LuceneChangeIndex.java b/java/com/google/gerrit/lucene/LuceneChangeIndex.java index 542b9084a3..ca73fdcff4 100644 --- a/java/com/google/gerrit/lucene/LuceneChangeIndex.java +++ b/java/com/google/gerrit/lucene/LuceneChangeIndex.java @@ -468,6 +468,7 @@ public class LuceneChangeIndex implements ChangeIndex { if (Integer.MAX_VALUE - opts.pageSize() < opts.start()) { realPageSize = Integer.MAX_VALUE; } + int queryLimit = AbstractLuceneIndex.getLimitBasedOnPaginationType(opts, realPageSize); List<TopFieldDocs> hits = new ArrayList<>(); int searchAfterHitsCount = 0; for (int i = 0; i < indexes.size(); i++) { @@ -491,11 +492,10 @@ public class LuceneChangeIndex implements ChangeIndex { subIndex, Iterables.getLast(Arrays.asList(subIndexHits.scoreDocs), searchAfter)); } } else { - hits.add(searchers[i].search(query, realPageSize, sort)); + hits.add(searchers[i].search(query, queryLimit, sort)); } } - TopDocs docs = - TopDocs.merge(sort, realPageSize, hits.stream().toArray(TopFieldDocs[]::new)); + TopDocs docs = TopDocs.merge(sort, queryLimit, hits.stream().toArray(TopFieldDocs[]::new)); List<Document> result = new ArrayList<>(docs.scoreDocs.length); for (int i = opts.start(); i < docs.scoreDocs.length; i++) { diff --git a/javatests/com/google/gerrit/server/query/change/AbstractQueryChangesTest.java b/javatests/com/google/gerrit/server/query/change/AbstractQueryChangesTest.java index 0e4db30591..602154c337 100644 --- a/javatests/com/google/gerrit/server/query/change/AbstractQueryChangesTest.java +++ b/javatests/com/google/gerrit/server/query/change/AbstractQueryChangesTest.java @@ -85,6 +85,7 @@ import com.google.gerrit.extensions.restapi.RestApiException; import com.google.gerrit.httpd.raw.IndexPreloadingUtil; import com.google.gerrit.index.FieldDef; import com.google.gerrit.index.IndexConfig; +import com.google.gerrit.index.PaginationType; import com.google.gerrit.index.Schema; import com.google.gerrit.index.query.IndexPredicate; import com.google.gerrit.index.query.Predicate; @@ -108,6 +109,7 @@ import com.google.gerrit.server.change.ChangeInserter; import com.google.gerrit.server.change.ChangeTriplet; import com.google.gerrit.server.change.NotifyResolver; import com.google.gerrit.server.change.PatchSetInserter; +import com.google.gerrit.server.config.AllProjectsName; import com.google.gerrit.server.config.AllUsersName; import com.google.gerrit.server.git.meta.MetaDataUpdate; import com.google.gerrit.server.group.testing.TestGroupBackend; @@ -169,6 +171,7 @@ public abstract class AbstractQueryChangesTest extends GerritServerTests { @Inject protected AccountManager accountManager; @Inject protected AllUsersName allUsersName; @Inject protected BatchUpdate.Factory updateFactory; + @Inject protected AllProjectsName allProjectsName; @Inject protected ChangeInserter.Factory changeFactory; @Inject protected Provider<ChangeQueryBuilder> queryBuilderProvider; @Inject protected GerritApi gApi; @@ -4010,4 +4013,8 @@ public abstract class AbstractQueryChangesTest extends GerritServerTests { protected Schema<ChangeData> getSchema() { return indexes.getSearchIndex().getSchema(); } + + PaginationType getCurrentPaginationType() { + return config.getEnum("index", null, "paginationType", PaginationType.OFFSET); + } } diff --git a/javatests/com/google/gerrit/server/query/change/LuceneQueryChangesLatestIndexVersionTest.java b/javatests/com/google/gerrit/server/query/change/LuceneQueryChangesLatestIndexVersionTest.java index 45879433e6..bfd1bd6baa 100644 --- a/javatests/com/google/gerrit/server/query/change/LuceneQueryChangesLatestIndexVersionTest.java +++ b/javatests/com/google/gerrit/server/query/change/LuceneQueryChangesLatestIndexVersionTest.java @@ -30,4 +30,11 @@ public class LuceneQueryChangesLatestIndexVersionTest extends LuceneQueryChanges config.setString("index", null, "paginationType", "SEARCH_AFTER"); return config; } + + @ConfigSuite.Config + public static Config nonePaginationType() { + Config config = defaultConfig(); + config.setString("index", null, "paginationType", "NONE"); + return config; + } } diff --git a/javatests/com/google/gerrit/server/query/change/LuceneQueryChangesPreviousIndexVersionTest.java b/javatests/com/google/gerrit/server/query/change/LuceneQueryChangesPreviousIndexVersionTest.java index 178269730c..f93a2a7492 100644 --- a/javatests/com/google/gerrit/server/query/change/LuceneQueryChangesPreviousIndexVersionTest.java +++ b/javatests/com/google/gerrit/server/query/change/LuceneQueryChangesPreviousIndexVersionTest.java @@ -40,4 +40,11 @@ public class LuceneQueryChangesPreviousIndexVersionTest extends LuceneQueryChang config.setString("index", null, "paginationType", "SEARCH_AFTER"); return config; } + + @ConfigSuite.Config + public static Config nonePaginationType() { + Config config = againstPreviousIndexVersion(); + config.setString("index", null, "paginationType", "NONE"); + return config; + } } |