From dae66ea997618a4e910c0c4982825d7f3dc7a83a Mon Sep 17 00:00:00 2001 From: Alan Paxton Date: Wed, 18 Oct 2023 15:45:59 +0100 Subject: [PATCH] =?UTF-8?q?Cache=20matches=20=E2=80=9C=E2=80=9D=20for=20xm?= =?UTF-8?q?l=20regex?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../java/org/exist/util/XmlRegexFactory.java | 48 +++++++++++++++++-- .../xquery/functions/fn/FunAnalyzeString.java | 11 ++--- .../exist/xquery/functions/fn/FunMatches.java | 2 +- .../exist/xquery/functions/fn/FunReplace.java | 7 ++- 4 files changed, 50 insertions(+), 18 deletions(-) diff --git a/exist-core/src/main/java/org/exist/util/XmlRegexFactory.java b/exist-core/src/main/java/org/exist/util/XmlRegexFactory.java index 0b92271574d..a6d9bdadedc 100644 --- a/exist-core/src/main/java/org/exist/util/XmlRegexFactory.java +++ b/exist-core/src/main/java/org/exist/util/XmlRegexFactory.java @@ -18,6 +18,7 @@ import javax.annotation.Nullable; import java.util.ArrayList; import java.util.List; +import java.util.concurrent.atomic.AtomicReference; /** * A simple Xml Regular Expression Pattern Factory. @@ -35,7 +36,7 @@ public class XmlRegexFactory { private static final XmlRegexFactory instance = new XmlRegexFactory(); private static final ThreadLocal> REGULAR_EXPRESSION_COMPILATION_WARNINGS = ThreadLocal.withInitial(() -> new ArrayList<>(1)); - private final Cache> cache; + private final Cache> cache; private XmlRegexFactory() { this.cache = Caffeine.newBuilder() @@ -47,13 +48,13 @@ public static XmlRegexFactory getInstance() { return instance; } - public RegularExpression getXmlRegex(final Expression regexExpr, final String pattern, @Nullable final String flags) throws XPathException { + public RegularExpressionEntry getXmlRegex(final Expression regexExpr, final String pattern, @Nullable final String flags) throws XPathException { final String key = pattern + flags; - final Either maybeRegex = cache.get(key, _key -> compile(regexExpr, pattern, flags)); + final Either maybeRegex = cache.get(key, _key -> compile(regexExpr, pattern, flags)); return Either.valueOrThrow(maybeRegex); } - private static Either compile(final Expression regexExpr, final String pattern, @Nullable final String flags) { + private static Either compile(final Expression regexExpr, final String pattern, @Nullable final String flags) { try { final List warnings = REGULAR_EXPRESSION_COMPILATION_WARNINGS.get(); if (!warnings.isEmpty()) { @@ -69,7 +70,7 @@ private static Either compile(final Expressio LOG.warn(warning); } - return Either.Right(regularExpression); + return Either.Right(new RegularExpressionEntry(regularExpression)); } catch (final net.sf.saxon.trans.XPathException e) { return Either.Left(translateRegexException(regexExpr, e)); @@ -98,4 +99,41 @@ public static XPathException translateRegexException(final Expression regexExpr, return new XPathException(regexExpr, xpathException.getMessage()); } } + + /** + * Regular Expression Entry. + * + * As well as the Regular Expression, it caches the common check for match of "". + */ + public static class RegularExpressionEntry { + private final RegularExpression regularExpression; + private final AtomicReference matchesEmpty = new AtomicReference<>(); + + private RegularExpressionEntry(final RegularExpression regularExpression) { + this.regularExpression = regularExpression; + } + + /** + * Get the Regular Expression. + * + * @return the Regular Expression. + * */ + public RegularExpression getRegularExpression() { + return this.regularExpression; + } + + /** + * Return where the regular expression matches an empty string. + * + * @return true if the regular expression matches an empty string, false otherwise. + */ + public boolean matchesEmpty() { + @Nullable Boolean local = matchesEmpty.get(); + if (local == null) { + local = regularExpression.matches(""); + matchesEmpty.compareAndSet(null, local); + } + return local; + } + } } diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunAnalyzeString.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunAnalyzeString.java index f31e86309d5..7e66f120b84 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunAnalyzeString.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunAnalyzeString.java @@ -30,13 +30,8 @@ */ package org.exist.xquery.functions.fn; -import java.util.ArrayList; -import java.util.List; - -import net.sf.saxon.Configuration; import net.sf.saxon.om.Item; import net.sf.saxon.regex.RegexIterator; -import net.sf.saxon.regex.RegularExpression; import org.exist.dom.QName; import org.exist.dom.memtree.MemTreeBuilder; import org.exist.util.XmlRegexFactory; @@ -137,13 +132,13 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro } private void analyzeString(final MemTreeBuilder builder, final String input, String pattern, @Nullable final String flags) throws XPathException { - final RegularExpression regularExpression = XML_REGEX_FACTORY.getXmlRegex(this, pattern, flags); - if (regularExpression.matches("")) { + final XmlRegexFactory.RegularExpressionEntry regularExpressionEntry = XML_REGEX_FACTORY.getXmlRegex(this, pattern, flags); + if (regularExpressionEntry.matchesEmpty()) { throw new XPathException(this, ErrorCodes.FORX0003, "regular expression could match empty string"); } try { - final RegexIterator regexIterator = regularExpression.analyze(input); + final RegexIterator regexIterator = regularExpressionEntry.getRegularExpression().analyze(input); Item item; while ((item = regexIterator.next()) != null) { if (regexIterator.isMatching()) { diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunMatches.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunMatches.java index a4cdd04e4bd..04ef8b55c18 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunMatches.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunMatches.java @@ -522,7 +522,7 @@ private Sequence evalGeneric(final Sequence contextSequence, final Item contextI } private boolean matchXmlRegex(final String string, final String pattern, @Nullable final String flags) throws XPathException { - final RegularExpression regex = XML_REGEX_FACTORY.getXmlRegex(this, pattern, flags); + final RegularExpression regex = XML_REGEX_FACTORY.getXmlRegex(this, pattern, flags).getRegularExpression(); return regex.containsMatch(string); } diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunReplace.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunReplace.java index d50a8f37eb9..3e6bdce4268 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunReplace.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunReplace.java @@ -31,7 +31,6 @@ package org.exist.xquery.functions.fn; import net.sf.saxon.functions.Replace; -import net.sf.saxon.regex.RegularExpression; import org.exist.dom.QName; import org.exist.util.XmlRegexFactory; import org.exist.xquery.*; @@ -122,8 +121,8 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro final String pattern = args[1].itemAt(0).getStringValue(); final String replace = args[2].itemAt(0).getStringValue(); - final RegularExpression regularExpression = XML_REGEX_FACTORY.getXmlRegex(this, pattern, flags); - if (regularExpression.matches("")) { + final XmlRegexFactory.RegularExpressionEntry regularExpressionEntry = XML_REGEX_FACTORY.getXmlRegex(this, pattern, flags); + if (regularExpressionEntry.matchesEmpty()) { throw new XPathException(this, ErrorCodes.FORX0003, "regular expression could match empty string"); } @@ -135,7 +134,7 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro } try { - final CharSequence res = regularExpression.replace(string, replace); + final CharSequence res = regularExpressionEntry.getRegularExpression().replace(string, replace); result = new StringValue(this, res.toString()); } catch (final net.sf.saxon.trans.XPathException e) { throw XmlRegexFactory.translateRegexException(this, e);