Skip to content

Commit

Permalink
Cache matches “” for xml regex
Browse files Browse the repository at this point in the history
  • Loading branch information
alanpaxton authored and adamretter committed Dec 1, 2024
1 parent ac8b9f0 commit dae66ea
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 18 deletions.
48 changes: 43 additions & 5 deletions exist-core/src/main/java/org/exist/util/XmlRegexFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicReference;

/**
* A simple Xml Regular Expression Pattern Factory.
Expand All @@ -35,7 +36,7 @@ public class XmlRegexFactory {
private static final XmlRegexFactory instance = new XmlRegexFactory();
private static final ThreadLocal<List<String>> REGULAR_EXPRESSION_COMPILATION_WARNINGS = ThreadLocal.withInitial(() -> new ArrayList<>(1));

private final Cache<String, Either<XPathException, RegularExpression>> cache;
private final Cache<String, Either<XPathException, RegularExpressionEntry>> cache;

private XmlRegexFactory() {
this.cache = Caffeine.newBuilder()
Expand All @@ -47,13 +48,13 @@ public static XmlRegexFactory getInstance() {
return instance;
}

public RegularExpression getXmlRegex(final Expression regexExpr, final String pattern, @Nullable final String flags) throws XPathException {
public RegularExpressionEntry getXmlRegex(final Expression regexExpr, final String pattern, @Nullable final String flags) throws XPathException {
final String key = pattern + flags;
final Either<XPathException, RegularExpression> maybeRegex = cache.get(key, _key -> compile(regexExpr, pattern, flags));
final Either<XPathException, RegularExpressionEntry> maybeRegex = cache.get(key, _key -> compile(regexExpr, pattern, flags));
return Either.valueOrThrow(maybeRegex);
}

private static Either<XPathException, RegularExpression> compile(final Expression regexExpr, final String pattern, @Nullable final String flags) {
private static Either<XPathException, RegularExpressionEntry> compile(final Expression regexExpr, final String pattern, @Nullable final String flags) {
try {
final List<String> warnings = REGULAR_EXPRESSION_COMPILATION_WARNINGS.get();
if (!warnings.isEmpty()) {
Expand All @@ -69,7 +70,7 @@ private static Either<XPathException, RegularExpression> compile(final Expressio
LOG.warn(warning);
}

return Either.Right(regularExpression);
return Either.Right(new RegularExpressionEntry(regularExpression));

} catch (final net.sf.saxon.trans.XPathException e) {
return Either.Left(translateRegexException(regexExpr, e));
Expand Down Expand Up @@ -98,4 +99,41 @@ public static XPathException translateRegexException(final Expression regexExpr,
return new XPathException(regexExpr, xpathException.getMessage());
}
}

/**
* Regular Expression Entry.
*
* As well as the Regular Expression, it caches the common check for match of "".
*/
public static class RegularExpressionEntry {
private final RegularExpression regularExpression;
private final AtomicReference<Boolean> matchesEmpty = new AtomicReference<>();

private RegularExpressionEntry(final RegularExpression regularExpression) {
this.regularExpression = regularExpression;
}

/**
* Get the Regular Expression.
*
* @return the Regular Expression.
* */
public RegularExpression getRegularExpression() {
return this.regularExpression;
}

/**
* Return where the regular expression matches an empty string.
*
* @return true if the regular expression matches an empty string, false otherwise.
*/
public boolean matchesEmpty() {
@Nullable Boolean local = matchesEmpty.get();
if (local == null) {
local = regularExpression.matches("");
matchesEmpty.compareAndSet(null, local);
}
return local;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,8 @@
*/
package org.exist.xquery.functions.fn;

import java.util.ArrayList;
import java.util.List;

import net.sf.saxon.Configuration;
import net.sf.saxon.om.Item;
import net.sf.saxon.regex.RegexIterator;
import net.sf.saxon.regex.RegularExpression;
import org.exist.dom.QName;
import org.exist.dom.memtree.MemTreeBuilder;
import org.exist.util.XmlRegexFactory;
Expand Down Expand Up @@ -137,13 +132,13 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro
}

private void analyzeString(final MemTreeBuilder builder, final String input, String pattern, @Nullable final String flags) throws XPathException {
final RegularExpression regularExpression = XML_REGEX_FACTORY.getXmlRegex(this, pattern, flags);
if (regularExpression.matches("")) {
final XmlRegexFactory.RegularExpressionEntry regularExpressionEntry = XML_REGEX_FACTORY.getXmlRegex(this, pattern, flags);
if (regularExpressionEntry.matchesEmpty()) {
throw new XPathException(this, ErrorCodes.FORX0003, "regular expression could match empty string");
}

try {
final RegexIterator regexIterator = regularExpression.analyze(input);
final RegexIterator regexIterator = regularExpressionEntry.getRegularExpression().analyze(input);
Item item;
while ((item = regexIterator.next()) != null) {
if (regexIterator.isMatching()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@ private Sequence evalGeneric(final Sequence contextSequence, final Item contextI
}

private boolean matchXmlRegex(final String string, final String pattern, @Nullable final String flags) throws XPathException {
final RegularExpression regex = XML_REGEX_FACTORY.getXmlRegex(this, pattern, flags);
final RegularExpression regex = XML_REGEX_FACTORY.getXmlRegex(this, pattern, flags).getRegularExpression();
return regex.containsMatch(string);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
package org.exist.xquery.functions.fn;

import net.sf.saxon.functions.Replace;
import net.sf.saxon.regex.RegularExpression;
import org.exist.dom.QName;
import org.exist.util.XmlRegexFactory;
import org.exist.xquery.*;
Expand Down Expand Up @@ -122,8 +121,8 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro
final String pattern = args[1].itemAt(0).getStringValue();
final String replace = args[2].itemAt(0).getStringValue();

final RegularExpression regularExpression = XML_REGEX_FACTORY.getXmlRegex(this, pattern, flags);
if (regularExpression.matches("")) {
final XmlRegexFactory.RegularExpressionEntry regularExpressionEntry = XML_REGEX_FACTORY.getXmlRegex(this, pattern, flags);
if (regularExpressionEntry.matchesEmpty()) {
throw new XPathException(this, ErrorCodes.FORX0003, "regular expression could match empty string");
}

Expand All @@ -135,7 +134,7 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro
}

try {
final CharSequence res = regularExpression.replace(string, replace);
final CharSequence res = regularExpressionEntry.getRegularExpression().replace(string, replace);
result = new StringValue(this, res.toString());
} catch (final net.sf.saxon.trans.XPathException e) {
throw XmlRegexFactory.translateRegexException(this, e);
Expand Down

0 comments on commit dae66ea

Please sign in to comment.