summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorShawn O. Pearce <sop@google.com>2010-08-21 15:37:57 -0700
committerShawn O. Pearce <sop@google.com>2010-08-21 15:37:57 -0700
commit8c11f0c458c47997cec704775ba66e2240e88384 (patch)
treee8a7835a5141cec3429b109202ad382a06862192
parent3b175d5800f06db0001119a04efeb370c12056c2 (diff)
Optimize RegexFilePredicate for common matches
A common pattern is file:^some/directory/.*, which we can efficiently match by sorting the paths and using the common prefix to narrow our search space to only the portion that has a chance of matching with us. Change-Id: I12cad5e5d5e8af3e1fe902b1e70828491e5cc048 Signed-off-by: Shawn O. Pearce <sop@google.com>
-rw-r--r--gerrit-server/src/main/java/com/google/gerrit/server/mail/CommentSender.java5
-rw-r--r--gerrit-server/src/main/java/com/google/gerrit/server/query/change/ChangeData.java10
-rw-r--r--gerrit-server/src/main/java/com/google/gerrit/server/query/change/RegexFilePredicate.java67
-rw-r--r--gerrit-server/src/test/java/com/google/gerrit/server/query/change/RegexFilePredicateTest.java82
4 files changed, 148 insertions, 16 deletions
diff --git a/gerrit-server/src/main/java/com/google/gerrit/server/mail/CommentSender.java b/gerrit-server/src/main/java/com/google/gerrit/server/mail/CommentSender.java
index b5e9259241..e501a3e690 100644
--- a/gerrit-server/src/main/java/com/google/gerrit/server/mail/CommentSender.java
+++ b/gerrit-server/src/main/java/com/google/gerrit/server/mail/CommentSender.java
@@ -26,6 +26,7 @@ import org.eclipse.jgit.errors.RepositoryNotFoundException;
import org.eclipse.jgit.lib.Repository;
import java.io.IOException;
+import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
@@ -54,7 +55,9 @@ public class CommentSender extends ReplyToChangeSender {
paths.add(p.getFileName());
}
}
- changeData.setCurrentFilePaths(paths);
+ String[] names = paths.toArray(new String[paths.size()]);
+ Arrays.sort(names);
+ changeData.setCurrentFilePaths(names);
}
@Override
diff --git a/gerrit-server/src/main/java/com/google/gerrit/server/query/change/ChangeData.java b/gerrit-server/src/main/java/com/google/gerrit/server/query/change/ChangeData.java
index d5bf1e061d..930b2d6960 100644
--- a/gerrit-server/src/main/java/com/google/gerrit/server/query/change/ChangeData.java
+++ b/gerrit-server/src/main/java/com/google/gerrit/server/query/change/ChangeData.java
@@ -29,6 +29,7 @@ import com.google.gwtorm.client.OrmException;
import com.google.inject.Provider;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
@@ -39,7 +40,7 @@ public class ChangeData {
private Collection<PatchSet> patches;
private Collection<PatchSetApproval> approvals;
private Collection<PatchSetApproval> currentApprovals;
- private Collection<String> currentFiles;
+ private String[] currentFiles;
private Collection<PatchLineComment> comments;
private Collection<TrackingId> trackingIds;
private CurrentUser visibleTo;
@@ -53,11 +54,11 @@ public class ChangeData {
change = c;
}
- public void setCurrentFilePaths(Collection<String> filePaths) {
+ public void setCurrentFilePaths(String[] filePaths) {
currentFiles = filePaths;
}
- public Collection<String> currentFilePaths(Provider<ReviewDb> db,
+ public String[] currentFilePaths(Provider<ReviewDb> db,
PatchListCache cache) throws OrmException {
if (currentFiles == null) {
Change c = change(db);
@@ -88,7 +89,8 @@ public class ChangeData {
break;
}
}
- currentFiles = r;
+ currentFiles = r.toArray(new String[r.size()]);
+ Arrays.sort(currentFiles);
}
return currentFiles;
}
diff --git a/gerrit-server/src/main/java/com/google/gerrit/server/query/change/RegexFilePredicate.java b/gerrit-server/src/main/java/com/google/gerrit/server/query/change/RegexFilePredicate.java
index cb4f5603b8..6bccec1498 100644
--- a/gerrit-server/src/main/java/com/google/gerrit/server/query/change/RegexFilePredicate.java
+++ b/gerrit-server/src/main/java/com/google/gerrit/server/query/change/RegexFilePredicate.java
@@ -20,35 +20,75 @@ import com.google.gerrit.server.query.OperatorPredicate;
import com.google.gwtorm.client.OrmException;
import com.google.inject.Provider;
-import java.util.Collection;
-import java.util.regex.Pattern;
-import java.util.regex.PatternSyntaxException;
+import dk.brics.automaton.Automaton;
+import dk.brics.automaton.RegExp;
+import dk.brics.automaton.RunAutomaton;
+
+import java.util.Arrays;
class RegexFilePredicate extends OperatorPredicate<ChangeData> {
private final Provider<ReviewDb> db;
private final PatchListCache cache;
- private final Pattern pattern;
+ private final RunAutomaton pattern;
+
+ private final String prefixBegin;
+ private final String prefixEnd;
+ private final int prefixLen;
+ private final boolean prefixOnly;
RegexFilePredicate(Provider<ReviewDb> db, PatchListCache plc, String re) {
super(ChangeQueryBuilder.FIELD_FILE, re);
this.db = db;
this.cache = plc;
- try {
- this.pattern = Pattern.compile(re);
- } catch (PatternSyntaxException e) {
- throw new IllegalArgumentException(e.getMessage());
+
+ if (re.startsWith("^")) {
+ re = re.substring(1);
+ }
+
+ if (re.endsWith("$") && !re.endsWith("\\$")) {
+ re = re.substring(0, re.length() - 1);
+ }
+
+ Automaton automaton = new RegExp(re).toAutomaton();
+ prefixBegin = automaton.getCommonPrefix();
+ prefixLen = prefixBegin.length();
+
+ if (0 < prefixLen) {
+ char max = (char) (prefixBegin.charAt(prefixLen - 1) + 1);
+ prefixEnd = prefixBegin.substring(0, prefixLen - 1) + max;
+ prefixOnly = re.equals(prefixBegin + ".*");
+ } else {
+ prefixEnd = "";
+ prefixOnly = false;
}
+
+ pattern = prefixOnly ? null : new RunAutomaton(automaton);
}
@Override
public boolean match(ChangeData object) throws OrmException {
- Collection<String> files = object.currentFilePaths(db, cache);
+ String[] files = object.currentFilePaths(db, cache);
if (files != null) {
- for (String path : files) {
- if (pattern.matcher(path).find()) {
+ int begin, end;
+
+ if (0 < prefixLen) {
+ begin = find(files, prefixBegin);
+ end = find(files, prefixEnd);
+ } else {
+ begin = 0;
+ end = files.length;
+ }
+
+ if (prefixOnly) {
+ return begin < end;
+ }
+
+ while (begin < end) {
+ if (pattern.run(files[begin++])) {
return true;
}
}
+
return false;
} else {
@@ -60,6 +100,11 @@ class RegexFilePredicate extends OperatorPredicate<ChangeData> {
}
}
+ private static int find(String[] files, String p) {
+ int r = Arrays.binarySearch(files, p);
+ return r < 0 ? -(r + 1) : r;
+ }
+
@Override
public int getCost() {
return 1;
diff --git a/gerrit-server/src/test/java/com/google/gerrit/server/query/change/RegexFilePredicateTest.java b/gerrit-server/src/test/java/com/google/gerrit/server/query/change/RegexFilePredicateTest.java
new file mode 100644
index 0000000000..3a390b5978
--- /dev/null
+++ b/gerrit-server/src/test/java/com/google/gerrit/server/query/change/RegexFilePredicateTest.java
@@ -0,0 +1,82 @@
+// Copyright (C) 2010 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.gerrit.server.query.change;
+
+import com.google.gerrit.reviewdb.Change;
+import com.google.gwtorm.client.OrmException;
+
+import junit.framework.TestCase;
+
+import java.util.Arrays;
+
+public class RegexFilePredicateTest extends TestCase {
+ public void testPrefixOnlyOptimization() throws OrmException {
+ RegexFilePredicate p = predicate("^a/b/.*");
+ assertTrue(p.match(change("a/b/source.c")));
+ assertFalse(p.match(change("source.c")));
+
+ assertTrue(p.match(change("a/b/source.c", "a/c/test")));
+ assertFalse(p.match(change("a/bb/source.c")));
+ }
+
+ public void testPrefixReducesSearchSpace() throws OrmException {
+ RegexFilePredicate p = predicate("^a/b/.*\\.[ch]");
+ assertTrue(p.match(change("a/b/source.c")));
+ assertFalse(p.match(change("a/b/source.res")));
+ assertFalse(p.match(change("source.res")));
+
+ assertTrue(p.match(change("a/b/a.a", "a/b/a.d", "a/b/a.h")));
+ }
+
+ public void testFileExtension_Constant() throws OrmException {
+ RegexFilePredicate p = predicate("^.*\\.res");
+ assertTrue(p.match(change("test.res")));
+ assertTrue(p.match(change("foo/bar/test.res")));
+ assertFalse(p.match(change("test.res.bar")));
+ }
+
+ public void testFileExtension_CharacterGroup() throws OrmException {
+ RegexFilePredicate p = predicate("^.*\\.[ch]");
+ assertTrue(p.match(change("test.c")));
+ assertTrue(p.match(change("test.h")));
+ assertFalse(p.match(change("test.C")));
+ }
+
+ public void testEndOfString() throws OrmException {
+ assertTrue(predicate("^a$").match(change("a")));
+ assertFalse(predicate("^a$").match(change("a$")));
+
+ assertFalse(predicate("^a\\$").match(change("a")));
+ assertTrue(predicate("^a\\$").match(change("a$")));
+ }
+
+ public void testExactMatch() throws OrmException {
+ RegexFilePredicate p = predicate("^foo.c");
+ assertTrue(p.match(change("foo.c")));
+ assertFalse(p.match(change("foo.cc")));
+ assertFalse(p.match(change("bar.c")));
+ }
+
+ private static RegexFilePredicate predicate(String pattern) {
+ return new RegexFilePredicate(null, null, pattern);
+ }
+
+ private static ChangeData change(String... files) {
+ Arrays.sort(files);
+ ChangeData cd = new ChangeData(new Change.Id(1));
+ cd.setCurrentFilePaths(files);
+ return cd;
+ }
+}