Skip to content

Commit

Permalink
support checking for match after the prefix (#1099)
Browse files Browse the repository at this point in the history
For patterns where the prefix is extracted and verified
before values are checked against the full matcher, we do
not need to recheck the prefix. This change adds a method
to the pattern matcher to allow bypassing the prefix and
just matching against the remaining part of the pattern.
  • Loading branch information
brharrington authored Dec 2, 2023
1 parent 44bd818 commit 62b1342
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,15 @@ public interface PatternMatcher {
*/
boolean matches(String str);

/**
* Returns true if the passed in string matches the pattern after the prefix. This method
* can be used to more efficiently check the value if the {@link #prefix()} was already
* verified.
*/
default boolean matchesAfterPrefix(String str) {
return matches(str);
}

/**
* Returns a fixed string prefix for the pattern if one is available. This can be used
* with indexed data to help select a subset of values that are possible matches. If the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,20 @@ public int matches(String str, int start, int length) {
return pos;
}

@Override
public boolean matchesAfterPrefix(String str) {
if (matchers[0] instanceof StartsWithMatcher) {
final int end = str.length();
int pos = matchers[0].prefix().length();
for (int i = 1; i < matchers.length && pos >= 0; ++i) {
pos = matchers[i].matches(str, pos, end - pos);
}
return pos >= 0;
} else {
return matches(str);
}
}

@Override
public String prefix() {
return matchers[0].prefix();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ public int matches(String str, int start, int length) {
return matched ? pattern.length() : Constants.NO_MATCH;
}

@Override
public boolean matchesAfterPrefix(String str) {
return true;
}

@Override
public String prefix() {
return pattern;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,18 @@ public void prefix() {
Assertions.assertEquals("abc", PatternMatcher.compile("^[a][b][c]+").prefix());
}

@Test
public void matchesAfterPrefix() {
// StartsWithMatcher, bar case should match because it will not get checked and we trust
// that the caller has already verified the prefix.
Assertions.assertTrue(PatternMatcher.compile("^abc").matchesAfterPrefix("abcdef"));
Assertions.assertTrue(PatternMatcher.compile("^abc").matchesAfterPrefix("bardef"));

// SeqMatcher
Assertions.assertTrue(PatternMatcher.compile("^abc").matchesAfterPrefix("abc[d-f]"));
Assertions.assertTrue(PatternMatcher.compile("^abc").matchesAfterPrefix("bar[d-f]"));
}

@Test
public void startAnchor() {
testRE("^abc", "abcdef");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ public void forEachMatch(Function<String, String> tags, Consumer<T> consumer) {
if (!otherChecks.isEmpty()) {
List<QueryIndex<T>> tmp = new ArrayList<>();
otherChecksTree.forEach(v, kq -> {
if (kq.matches(v)) {
if (matches(kq, v)) {
QueryIndex<T> idx = otherChecks.get(kq);
if (idx != null) {
tmp.add(idx);
Expand Down Expand Up @@ -536,6 +536,15 @@ public void forEachMatch(Function<String, String> tags, Consumer<T> consumer) {
}
}

private boolean matches(Query.KeyQuery kq, String value) {
if (kq instanceof Query.Regex) {
Query.Regex re = (Query.Regex) kq;
return re.pattern().matchesAfterPrefix(value);
} else {
return kq.matches(value);
}
}

/**
* Find hot spots in the index where there is a large set of linear matches, e.g. a bunch
* of regex queries for a given key.
Expand Down

0 comments on commit 62b1342

Please sign in to comment.