Skip to content

Commit

Permalink
[optimise] Extend 'Cache Xml Regex' to include fn:analyze-string
Browse files Browse the repository at this point in the history
  • Loading branch information
adamretter committed Dec 1, 2024
1 parent a0e642a commit 71d4dd0
Showing 1 changed file with 22 additions and 28 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
/*
* Copyright (C) 2014 Evolved Binary Ltd
*
* Changes made by Evolved Binary are proprietary and are not Open Source.
*
* NOTE: Parts of this file contain code from The eXist-db Authors.
* The original license header is included below.
*
* ----------------------------------------------------------------------------
*
* eXist-db Open Source Native XML Database
* Copyright (C) 2001 The eXist-db Authors
*
Expand Down Expand Up @@ -30,6 +39,7 @@
import net.sf.saxon.regex.RegularExpression;
import org.exist.dom.QName;
import org.exist.dom.memtree.MemTreeBuilder;
import org.exist.util.XmlRegexFactory;
import org.exist.xquery.*;
import org.exist.xquery.value.FunctionParameterSequenceType;
import org.exist.xquery.value.FunctionReturnSequenceType;
Expand All @@ -39,6 +49,7 @@
import org.exist.xquery.value.Type;
import org.xml.sax.helpers.AttributesImpl;

import javax.annotation.Nullable;
import javax.xml.XMLConstants;

/**
Expand All @@ -48,6 +59,8 @@
*/
public class FunAnalyzeString extends BasicFunction {

private static final XmlRegexFactory XML_REGEX_FACTORY = XmlRegexFactory.getInstance();

private final static QName fnAnalyzeString = new QName("analyze-string", Function.BUILTIN_FUNCTION_NS);

private final static QName QN_MATCH = new QName("match", Function.BUILTIN_FUNCTION_NS);
Expand Down Expand Up @@ -109,7 +122,7 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro
}
if (input != null && !input.isEmpty()) {
final String pattern = args[1].itemAt(0).getStringValue();
String flags = "";
@Nullable String flags = null;
if (args.length == 3) {
flags = args[2].itemAt(0).getStringValue();
}
Expand All @@ -123,19 +136,13 @@ public Sequence eval(final Sequence[] args, final Sequence contextSequence) thro
}
}

private void analyzeString(final MemTreeBuilder builder, final String input, String pattern, final String flags) throws XPathException {
final Configuration config = context.getBroker().getBrokerPool().getSaxonConfiguration();

final List<String> warnings = new ArrayList<>(1);
private void analyzeString(final MemTreeBuilder builder, final String input, String pattern, @Nullable final String flags) throws XPathException {
final RegularExpression regularExpression = XML_REGEX_FACTORY.getXmlRegex(this, pattern, flags);
if (regularExpression.matches("")) {
throw new XPathException(this, ErrorCodes.FORX0003, "regular expression could match empty string");
}

try {
final RegularExpression regularExpression = config.compileRegularExpression(pattern, flags, "XP30", warnings);
if (regularExpression.matches("")) {
throw new XPathException(this, ErrorCodes.FORX0003, "regular expression could match empty string");
}

//TODO(AR) cache the regular expression... might be possible through Saxon config

final RegexIterator regexIterator = regularExpression.analyze(input);
Item item;
while ((item = regexIterator.next()) != null) {
Expand All @@ -145,21 +152,8 @@ private void analyzeString(final MemTreeBuilder builder, final String input, Str
nonMatch(builder, item);
}
}

for (final String warning : warnings) {
LOG.warn(warning);
}
} catch (final net.sf.saxon.trans.XPathException e) {
switch (e.getErrorCodeLocalPart()) {
case "FORX0001":
throw new XPathException(this, ErrorCodes.FORX0001, e.getMessage());
case "FORX0002":
throw new XPathException(this, ErrorCodes.FORX0002, e.getMessage());
case "FORX0003":
throw new XPathException(this, ErrorCodes.FORX0003, e.getMessage());
default:
throw new XPathException(this, e.getMessage());
}
throw XmlRegexFactory.translateRegexException(this, e);
}
}

Expand All @@ -172,15 +166,15 @@ public void characters(final CharSequence s) {
}

@Override
public void onGroupStart(final int groupNumber) throws net.sf.saxon.trans.XPathException {
public void onGroupStart(final int groupNumber) {
final AttributesImpl attributes = new AttributesImpl();
attributes.addAttribute("", QN_NR.getLocalPart(), QN_NR.getLocalPart(), "int", Integer.toString(groupNumber));

builder.startElement(QN_GROUP, attributes);
}

@Override
public void onGroupEnd(final int groupNumber) throws net.sf.saxon.trans.XPathException {
public void onGroupEnd(final int groupNumber) {
builder.endElement();
}
});
Expand Down

0 comments on commit 71d4dd0

Please sign in to comment.