Skip to content

Commit

Permalink
support simplifying regex to expression (#982)
Browse files Browse the repository at this point in the history
Adds method to `PatternMatcher` that attempts to rewrite
the pattern to a set of simple pattern matches that can
be combined with AND, OR, and NOT to have the same matching
behavior as the original regex pattern. This can be useful
when working with data stores that have more restricted
pattern matching support such as RE2.
  • Loading branch information
brharrington authored Aug 2, 2022
1 parent d7e2f54 commit edb23a4
Show file tree
Hide file tree
Showing 10 changed files with 946 additions and 4 deletions.
3 changes: 3 additions & 0 deletions dependencies.properties
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@ com.amazonaws:aws-java-sdk-cloudwatch = 1.12.272
com.fasterxml.jackson.core:jackson-core = 2.13.3
com.fasterxml.jackson.core:jackson-databind = 2.13.3
com.fasterxml.jackson.dataformat:jackson-dataformat-smile = 2.13.3
com.github.ben-manes.caffeine:caffeine = 2.9.3
com.google.inject.extensions:guice-servlet = 5.1.0
com.google.inject:guice = 5.1.0
com.google.re2j:re2j = 1.7
com.netflix.archaius:archaius2-core = 2.3.16
com.netflix.frigga:frigga = 0.25.0
com.netflix.governator:governator = 1.17.12
Expand All @@ -23,6 +25,7 @@ org.apache.logging.log4j:log4j-core = 2.18.0
org.apache.logging.log4j:log4j-jcl = 2.18.0
org.apache.logging.log4j:log4j-jul = 2.18.0
org.apache.logging.log4j:log4j-slf4j-impl = 2.18.0
org.hsqldb:hsqldb = 2.6.1
org.junit.jupiter:junit-jupiter-engine = 5.9.0
org.slf4j:slf4j-api = 1.7.36
org.slf4j:slf4j-nop = 1.7.36
Expand Down
7 changes: 4 additions & 3 deletions spectator-api/build.gradle
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@

dependencies {
testImplementation files("$projectDir/src/test/lib/compatibility-0.68.0.jar")
testImplementation "org.hsqldb:hsqldb:2.6.1"
jmh "com.google.re2j:re2j:1.7"
jmh "com.github.ben-manes.caffeine:caffeine:2.9.3"
testImplementation "com.google.re2j:re2j"
testImplementation "org.hsqldb:hsqldb"
jmh "com.google.re2j:re2j"
jmh "com.github.ben-manes.caffeine:caffeine"
}

javadoc {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,291 @@
/*
* Copyright 2014-2022 Netflix, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.netflix.spectator.impl;

import java.util.List;
import java.util.Objects;
import java.util.StringJoiner;

/**
* Represents an expression of simpler patterns combined with AND, OR, and NOT clauses.
* This can be used for rewriting complex regular expressions to simpler patterns for
* data stores that either support a more limited set of pattern or have better optimizations
* in place for simpler operations like starts with or contains.
*/
public interface PatternExpr {

/**
* Returns true if the expression matches the value. This is a helper mostly used for testing
* to ensure the matching logic is consistent with the original regular expression.
*/
boolean matches(String value);

/**
* Convert this expression into a query string. A common example would be to implement
* an encoder that would convert it into part of a WHERE clause for a SQL DB.
*/
default String toQueryString(Encoder encoder) {
StringBuilder builder = new StringBuilder();
buildQueryString(encoder, builder);
return builder.toString();
}

/**
* Convert this expression into a query string. A common example would be to implement
* an encoder that would convert it into part of a WHERE clause for a SQL DB.
*/
default void buildQueryString(Encoder encoder, StringBuilder builder) {
if (this instanceof Regex) {
Regex re = (Regex) this;
builder.append(encoder.regex(re.matcher()));
} else if (this instanceof And) {
List<PatternExpr> exprs = ((And) this).exprs();
int size = exprs.size();
if (size == 1) {
exprs.get(0).buildQueryString(encoder, builder);
} else if (size > 1) {
builder.append(encoder.startAnd());
exprs.get(0).buildQueryString(encoder, builder);
for (int i = 1; i < size; ++i) {
builder.append(encoder.separatorAnd());
exprs.get(i).buildQueryString(encoder, builder);
}
builder.append(encoder.endAnd());
}
} else if (this instanceof Or) {
List<PatternExpr> exprs = ((Or) this).exprs();
int size = exprs.size();
if (size == 1) {
exprs.get(0).buildQueryString(encoder, builder);
} else if (size > 1) {
builder.append(encoder.startOr());
exprs.get(0).buildQueryString(encoder, builder);
for (int i = 1; i < size; ++i) {
builder.append(encoder.separatorOr());
exprs.get(i).buildQueryString(encoder, builder);
}
builder.append(encoder.endOr());
}
} else if (this instanceof Not) {
builder.append(encoder.startNot());
((Not) this).expr().buildQueryString(encoder, builder);
builder.append(encoder.endNot());
}
}

/**
* Encoder to map a pattern expression to an expression for some other language.
*/
interface Encoder {

/** Encode a simple pattern match. */
String regex(PatternMatcher matcher);

/** Encode the start for a chain of clauses combined with AND. */
String startAnd();

/** Encode the separator for a chain of clauses combined with AND. */
String separatorAnd();

/** Encode the end for a chain of clauses combined with AND. */
String endAnd();

/** Encode the start for a chain of clauses combined with OR. */
String startOr();

/** Encode the separator for a chain of clauses combined with OR. */
String separatorOr();

/** Encode the end for a chain of clauses combined with OR. */
String endOr();

/** Encode the start for a NOT clause. */
String startNot();

/** Encode the end for a NOT clause. */
String endNot();
}

/** A simple expression that performs a single pattern match. */
static PatternExpr simple(PatternMatcher matcher) {
return new Regex(matcher);
}

/** An expression that performs a logical AND of the listed sub-expressions. */
static PatternExpr and(List<PatternExpr> exprs) {
if (exprs == null)
return null;
int size = exprs.size();
Preconditions.checkArg(size > 0, "exprs list cannot be empty");
return size == 1 ? exprs.get(0) : new And(exprs);
}

/** An expression that performs a logical OR of the listed sub-expressions. */
static PatternExpr or(List<PatternExpr> exprs) {
if (exprs == null)
return null;
int size = exprs.size();
Preconditions.checkArg(size > 0, "exprs list cannot be empty");
return size == 1 ? exprs.get(0) : new Or(exprs);
}

/** An expression that inverts the match of the sub-expression. */
static PatternExpr not(PatternExpr expr) {
return new Not(expr);
}

final class Regex implements PatternExpr {

private final PatternMatcher matcher;

Regex(PatternMatcher matcher) {
this.matcher = Preconditions.checkNotNull(matcher, "matcher");
}

public PatternMatcher matcher() {
return matcher;
}

@Override public boolean matches(String str) {
return matcher.matches(str);
}

@Override public String toString() {
return "'" + matcher + "'";
}

@Override public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Regex)) return false;
Regex regex = (Regex) o;
return matcher.equals(regex.matcher);
}

@Override public int hashCode() {
return Objects.hash(matcher);
}
}

final class And implements PatternExpr {

private final List<PatternExpr> exprs;

And(List<PatternExpr> exprs) {
this.exprs = Preconditions.checkNotNull(exprs, "exprs");
}

public List<PatternExpr> exprs() {
return exprs;
}

@Override public boolean matches(String str) {
for (PatternExpr expr : exprs) {
if (!expr.matches(str)) {
return false;
}
}
return true;
}

@Override public String toString() {
StringJoiner joiner = new StringJoiner(" AND ", "(", ")");
exprs.forEach(expr -> joiner.add(expr.toString()));
return joiner.toString();
}

@Override public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof And)) return false;
And and = (And) o;
return exprs.equals(and.exprs);
}

@Override public int hashCode() {
return Objects.hash(exprs);
}
}

final class Or implements PatternExpr {

private final List<PatternExpr> exprs;

Or(List<PatternExpr> exprs) {
this.exprs = Preconditions.checkNotNull(exprs, "exprs");
}

public List<PatternExpr> exprs() {
return exprs;
}

@Override public boolean matches(String str) {
for (PatternExpr expr : exprs) {
if (expr.matches(str)) {
return true;
}
}
return false;
}

@Override public String toString() {
StringJoiner joiner = new StringJoiner(" OR ", "(", ")");
exprs.forEach(expr -> joiner.add(expr.toString()));
return joiner.toString();
}

@Override public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Or)) return false;
Or or = (Or) o;
return exprs.equals(or.exprs);
}

@Override public int hashCode() {
return Objects.hash(exprs);
}
}

final class Not implements PatternExpr {

private final PatternExpr expr;

Not(PatternExpr expr) {
this.expr = Preconditions.checkNotNull(expr, "expr");
}

public PatternExpr expr() {
return expr;
}

@Override public boolean matches(String str) {
return !expr.matches(str);
}

@Override public String toString() {
return "NOT " + expr;
}

@Override public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof Not)) return false;
Not not = (Not) o;
return expr.equals(not.expr);
}

@Override public int hashCode() {
return Objects.hash(expr);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,23 @@ default List<PatternMatcher> expandOrClauses(int max) {
return Collections.singletonList(this);
}

/**
* Attempts to rewrite this pattern to a set of simple pattern matches that can be combined
* with AND, OR, and NOT to have the same matching behavior as the original regex pattern.
* This can be useful when working with data stores that have more restricted pattern matching
* support such as RE2.
*
* @param max
* Maximum size of the expanded OR list which is needed as part of simplifying the
* overall expression. See {@link #expandOrClauses(int)} for more details.
* @return
* Expression that represents a set of simple pattern matches, or null if it is not
* possible to simplify the expression.
*/
default PatternExpr toPatternExpr(int max) {
return null;
}

/**
* Returns a pattern that can be used with a SQL LIKE clause or null if this expression
* cannot be expressed as a SQL pattern. Can be used to more optimally map the pattern
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/
package com.netflix.spectator.impl.matcher;

import com.netflix.spectator.impl.PatternExpr;
import com.netflix.spectator.impl.PatternMatcher;

import java.util.ArrayList;
Expand Down Expand Up @@ -67,6 +68,11 @@ default List<PatternMatcher> expandOrClauses(int max) {
return results;
}

@Override
default PatternExpr toPatternExpr(int max) {
return PatternUtils.toPatternExpr(this, max);
}

@Override
default String toSqlPattern() {
return PatternUtils.toSqlPattern(this);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,12 @@ private Matcher repeat(Matcher matcher) {
String[] numbers = tokens.subSequence(start, current - 1).toString().split(",");
int min = Integer.parseInt(numbers[0]);
int max = (numbers.length > 1) ? Integer.parseInt(numbers[1]) : min;
if (min < 0) {
throw error("illegal repetition, min < 0");
}
if (min > max) {
throw error("illegal repetition, min > max");
}
return new RepeatMatcher(matcher, min, max);
}

Expand Down
Loading

0 comments on commit edb23a4

Please sign in to comment.