diff options
author | Shawn O. Pearce <sop@google.com> | 2010-08-21 15:37:57 -0700 |
---|---|---|
committer | Shawn O. Pearce <sop@google.com> | 2010-08-21 15:37:57 -0700 |
commit | 8c11f0c458c47997cec704775ba66e2240e88384 (patch) | |
tree | e8a7835a5141cec3429b109202ad382a06862192 | |
parent | 3b175d5800f06db0001119a04efeb370c12056c2 (diff) |
Optimize RegexFilePredicate for common matches
A common pattern is file:^some/directory/.*, which we can efficiently
match by sorting the paths and using the common prefix to narrow our
search space to only the portion that has a chance of matching with us.
Change-Id: I12cad5e5d5e8af3e1fe902b1e70828491e5cc048
Signed-off-by: Shawn O. Pearce <sop@google.com>
4 files changed, 148 insertions, 16 deletions
diff --git a/gerrit-server/src/main/java/com/google/gerrit/server/mail/CommentSender.java b/gerrit-server/src/main/java/com/google/gerrit/server/mail/CommentSender.java index b5e9259241..e501a3e690 100644 --- a/gerrit-server/src/main/java/com/google/gerrit/server/mail/CommentSender.java +++ b/gerrit-server/src/main/java/com/google/gerrit/server/mail/CommentSender.java @@ -26,6 +26,7 @@ import org.eclipse.jgit.errors.RepositoryNotFoundException; import org.eclipse.jgit.lib.Repository; import java.io.IOException; +import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.List; @@ -54,7 +55,9 @@ public class CommentSender extends ReplyToChangeSender { paths.add(p.getFileName()); } } - changeData.setCurrentFilePaths(paths); + String[] names = paths.toArray(new String[paths.size()]); + Arrays.sort(names); + changeData.setCurrentFilePaths(names); } @Override diff --git a/gerrit-server/src/main/java/com/google/gerrit/server/query/change/ChangeData.java b/gerrit-server/src/main/java/com/google/gerrit/server/query/change/ChangeData.java index d5bf1e061d..930b2d6960 100644 --- a/gerrit-server/src/main/java/com/google/gerrit/server/query/change/ChangeData.java +++ b/gerrit-server/src/main/java/com/google/gerrit/server/query/change/ChangeData.java @@ -29,6 +29,7 @@ import com.google.gwtorm.client.OrmException; import com.google.inject.Provider; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.List; @@ -39,7 +40,7 @@ public class ChangeData { private Collection<PatchSet> patches; private Collection<PatchSetApproval> approvals; private Collection<PatchSetApproval> currentApprovals; - private Collection<String> currentFiles; + private String[] currentFiles; private Collection<PatchLineComment> comments; private Collection<TrackingId> trackingIds; private CurrentUser visibleTo; @@ -53,11 +54,11 @@ public class ChangeData { change = c; } - public void setCurrentFilePaths(Collection<String> filePaths) { + public void setCurrentFilePaths(String[] filePaths) { currentFiles = filePaths; } - public Collection<String> currentFilePaths(Provider<ReviewDb> db, + public String[] currentFilePaths(Provider<ReviewDb> db, PatchListCache cache) throws OrmException { if (currentFiles == null) { Change c = change(db); @@ -88,7 +89,8 @@ public class ChangeData { break; } } - currentFiles = r; + currentFiles = r.toArray(new String[r.size()]); + Arrays.sort(currentFiles); } return currentFiles; } diff --git a/gerrit-server/src/main/java/com/google/gerrit/server/query/change/RegexFilePredicate.java b/gerrit-server/src/main/java/com/google/gerrit/server/query/change/RegexFilePredicate.java index cb4f5603b8..6bccec1498 100644 --- a/gerrit-server/src/main/java/com/google/gerrit/server/query/change/RegexFilePredicate.java +++ b/gerrit-server/src/main/java/com/google/gerrit/server/query/change/RegexFilePredicate.java @@ -20,35 +20,75 @@ import com.google.gerrit.server.query.OperatorPredicate; import com.google.gwtorm.client.OrmException; import com.google.inject.Provider; -import java.util.Collection; -import java.util.regex.Pattern; -import java.util.regex.PatternSyntaxException; +import dk.brics.automaton.Automaton; +import dk.brics.automaton.RegExp; +import dk.brics.automaton.RunAutomaton; + +import java.util.Arrays; class RegexFilePredicate extends OperatorPredicate<ChangeData> { private final Provider<ReviewDb> db; private final PatchListCache cache; - private final Pattern pattern; + private final RunAutomaton pattern; + + private final String prefixBegin; + private final String prefixEnd; + private final int prefixLen; + private final boolean prefixOnly; RegexFilePredicate(Provider<ReviewDb> db, PatchListCache plc, String re) { super(ChangeQueryBuilder.FIELD_FILE, re); this.db = db; this.cache = plc; - try { - this.pattern = Pattern.compile(re); - } catch (PatternSyntaxException e) { - throw new IllegalArgumentException(e.getMessage()); + + if (re.startsWith("^")) { + re = re.substring(1); + } + + if (re.endsWith("$") && !re.endsWith("\\$")) { + re = re.substring(0, re.length() - 1); + } + + Automaton automaton = new RegExp(re).toAutomaton(); + prefixBegin = automaton.getCommonPrefix(); + prefixLen = prefixBegin.length(); + + if (0 < prefixLen) { + char max = (char) (prefixBegin.charAt(prefixLen - 1) + 1); + prefixEnd = prefixBegin.substring(0, prefixLen - 1) + max; + prefixOnly = re.equals(prefixBegin + ".*"); + } else { + prefixEnd = ""; + prefixOnly = false; } + + pattern = prefixOnly ? null : new RunAutomaton(automaton); } @Override public boolean match(ChangeData object) throws OrmException { - Collection<String> files = object.currentFilePaths(db, cache); + String[] files = object.currentFilePaths(db, cache); if (files != null) { - for (String path : files) { - if (pattern.matcher(path).find()) { + int begin, end; + + if (0 < prefixLen) { + begin = find(files, prefixBegin); + end = find(files, prefixEnd); + } else { + begin = 0; + end = files.length; + } + + if (prefixOnly) { + return begin < end; + } + + while (begin < end) { + if (pattern.run(files[begin++])) { return true; } } + return false; } else { @@ -60,6 +100,11 @@ class RegexFilePredicate extends OperatorPredicate<ChangeData> { } } + private static int find(String[] files, String p) { + int r = Arrays.binarySearch(files, p); + return r < 0 ? -(r + 1) : r; + } + @Override public int getCost() { return 1; diff --git a/gerrit-server/src/test/java/com/google/gerrit/server/query/change/RegexFilePredicateTest.java b/gerrit-server/src/test/java/com/google/gerrit/server/query/change/RegexFilePredicateTest.java new file mode 100644 index 0000000000..3a390b5978 --- /dev/null +++ b/gerrit-server/src/test/java/com/google/gerrit/server/query/change/RegexFilePredicateTest.java @@ -0,0 +1,82 @@ +// Copyright (C) 2010 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.gerrit.server.query.change; + +import com.google.gerrit.reviewdb.Change; +import com.google.gwtorm.client.OrmException; + +import junit.framework.TestCase; + +import java.util.Arrays; + +public class RegexFilePredicateTest extends TestCase { + public void testPrefixOnlyOptimization() throws OrmException { + RegexFilePredicate p = predicate("^a/b/.*"); + assertTrue(p.match(change("a/b/source.c"))); + assertFalse(p.match(change("source.c"))); + + assertTrue(p.match(change("a/b/source.c", "a/c/test"))); + assertFalse(p.match(change("a/bb/source.c"))); + } + + public void testPrefixReducesSearchSpace() throws OrmException { + RegexFilePredicate p = predicate("^a/b/.*\\.[ch]"); + assertTrue(p.match(change("a/b/source.c"))); + assertFalse(p.match(change("a/b/source.res"))); + assertFalse(p.match(change("source.res"))); + + assertTrue(p.match(change("a/b/a.a", "a/b/a.d", "a/b/a.h"))); + } + + public void testFileExtension_Constant() throws OrmException { + RegexFilePredicate p = predicate("^.*\\.res"); + assertTrue(p.match(change("test.res"))); + assertTrue(p.match(change("foo/bar/test.res"))); + assertFalse(p.match(change("test.res.bar"))); + } + + public void testFileExtension_CharacterGroup() throws OrmException { + RegexFilePredicate p = predicate("^.*\\.[ch]"); + assertTrue(p.match(change("test.c"))); + assertTrue(p.match(change("test.h"))); + assertFalse(p.match(change("test.C"))); + } + + public void testEndOfString() throws OrmException { + assertTrue(predicate("^a$").match(change("a"))); + assertFalse(predicate("^a$").match(change("a$"))); + + assertFalse(predicate("^a\\$").match(change("a"))); + assertTrue(predicate("^a\\$").match(change("a$"))); + } + + public void testExactMatch() throws OrmException { + RegexFilePredicate p = predicate("^foo.c"); + assertTrue(p.match(change("foo.c"))); + assertFalse(p.match(change("foo.cc"))); + assertFalse(p.match(change("bar.c"))); + } + + private static RegexFilePredicate predicate(String pattern) { + return new RegexFilePredicate(null, null, pattern); + } + + private static ChangeData change(String... files) { + Arrays.sort(files); + ChangeData cd = new ChangeData(new Change.Id(1)); + cd.setCurrentFilePaths(files); + return cd; + } +} |