Skip to content

Commit

Permalink
Add ftAnyAllOption
Browse files Browse the repository at this point in the history
  • Loading branch information
rhubner committed Oct 18, 2024
1 parent 919dcc0 commit 1959fbb
Show file tree
Hide file tree
Showing 9 changed files with 169 additions and 26 deletions.
11 changes: 10 additions & 1 deletion exist-core/src/main/antlr/org/exist/xquery/parser/XQuery.g
Original file line number Diff line number Diff line change
Expand Up @@ -1094,13 +1094,22 @@ ftPrimary throws XPathException


ftWords throws XPathException
: ftWordsValue //ftAnyAllOption?
: ftWordsValue (ftAnyAllOption)?
;

ftWordsValue throws XPathException
: stringConcatExpr | (LCURLY! expr RCURLY!)
;

ftAnyAllOption throws XPathException
:
("any") => ("any" ("word")?)
|
("all") => ("all" ("words")?)
|
("phrase") => ("phrase")
;


stringConcatExpr throws XPathException
{ boolean isConcat = false; }
Expand Down
53 changes: 47 additions & 6 deletions exist-core/src/main/antlr/org/exist/xquery/parser/XQueryTree.g
Original file line number Diff line number Diff line change
Expand Up @@ -3270,24 +3270,65 @@ throws PermissionDeniedException, EXistException, XPathException
}
|
{
Expression literal = null;
}
literal = literalExpr [null]
Expression le = null;
AnyAllOptions anyAllOptions = null;
} // parameter on literalExpr is not used. Why? Historical?
(le=literalExpr[null]) (anyAllOptions = ftAnyAllOption)?
{
match = StringMatch.newInstance((LiteralValue)literal);
match = new FtExpressionMatch(le, anyAllOptions);
}
|
{
AnyAllOptions anyAllOptions = null;
PathExpr seqPath = new PathExpr(context);
seqPath.setASTNode(ftPrimary_AST_in);
}
expr [seqPath]
expr [seqPath] (anyAllOptions = ftAnyAllOption)?
{
match = new FtExpressionMatch(seqPath);
match = new FtExpressionMatch(seqPath, anyAllOptions);
}
;


ftAnyAllOption
returns [AnyAllOptions option]
throws PermissionDeniedException, EXistException, XPathException
{
option = null;
}
:
(
"any"
{
option = xyz.elemental.xquery.AnyAllOptions.ANY;
}
( "word"
{
option = xyz.elemental.xquery.AnyAllOptions.ANY_WORD;
}
)?
)
|
(
"all"
{
option = xyz.elemental.xquery.AnyAllOptions.ALL;
}
( "words"
{
option = xyz.elemental.xquery.AnyAllOptions.ALL_WORDS;
}
)?
)
|
"phrase"
{
option = xyz.elemental.xquery.AnyAllOptions.PHRASE;
}
;



generalComp [PathExpr path]
returns [Expression step]
throws PermissionDeniedException, EXistException, XPathException
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package xyz.elemental.xquery;

public enum AnyAllOptions {
ANY,
ANY_WORD,
ALL,
ALL_WORDS,
PHRASE
}
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ public Float evalScore(Sequence contextSequence, Item contextItem) throws XPathE
var luceneQuery = ftSelection.evaluateToQuery(contextSequence, contextItem);

if (luceneQuery.isEmpty()) { //If the sequence is empty, the FTWords yields no matches, Section 3.2
score = 0f;
return 0f;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,44 +5,105 @@
*/
package xyz.elemental.xquery;

import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.exist.xquery.Expression;
import org.exist.xquery.PathExpr;
import org.exist.xquery.XPathException;
import org.exist.xquery.value.Item;
import org.exist.xquery.value.Sequence;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.Optional;


public class FtExpressionMatch extends FTMatch {

final Expression expression;
final Optional<AnyAllOptions> anyAllOptions;

public FtExpressionMatch(PathExpr expression) {
public FtExpressionMatch(Expression expression, AnyAllOptions anyAllOptions) {
this.expression = expression;
this.anyAllOptions = Optional.ofNullable(anyAllOptions);
}

@Override
public Optional<Query> evaluateToQuery(Sequence contextSequence, Item contextItem) throws XPathException {
return switch (anyAllOptions.orElse(AnyAllOptions.ANY)) {
case ANY -> evaluateToQueryAnyAll(contextSequence, contextItem, false);
case ALL -> evaluateToQueryAnyAll(contextSequence, contextItem, true);
case PHRASE -> evaluateToQueryPhrase(contextSequence, contextItem);
case ANY_WORD -> evaluateToQueryAnyAllWords(contextSequence, contextItem, BooleanClause.Occur.SHOULD);
case ALL_WORDS -> evaluateToQueryAnyAllWords(contextSequence, contextItem, BooleanClause.Occur.MUST);
};
}

public Optional<Query> evaluateToQueryAnyAllWords(Sequence contextSequence, Item contextItem, BooleanClause.Occur occur) throws XPathException {

var exprSeqResult = expression.eval(contextSequence, contextItem);

var tokens = new HashSet<String>();

for (var iterator = exprSeqResult.iterate(); iterator.hasNext(); ) {
var stringValue = iterator.nextItem().getStringValue();
tokens.addAll(LuceneQueryProducer.tokenize(stringValue));
}

if (tokens.isEmpty()) {
return Optional.empty();
} else if (tokens.size() == 1) {
return Optional.of(new TermQuery(new Term(FIELD_NAME, tokens.stream().findFirst().get())));
} else {
var builder = new BooleanQuery.Builder();
tokens.stream().forEach(token -> {
builder.add(new TermQuery(new Term(FIELD_NAME, token)), occur);
});
return Optional.of(builder.build());
}

}

public Optional<Query> evaluateToQueryPhrase(Sequence contextSequence, Item contextItem) throws XPathException {
var exprSeqResult = expression.eval(contextSequence, contextItem);

var gg = new ArrayList<Query>();
var tokens = new ArrayList<String>();

for (var iterator = exprSeqResult.iterate(); iterator.hasNext(); ) {
var stringValue = iterator.nextItem().getStringValue();
tokens.addAll(LuceneQueryProducer.tokenize(stringValue));
}

if (tokens.isEmpty()) {
return Optional.empty();
} else if (tokens.size() == 1) {
return Optional.of(new TermQuery(new Term(FIELD_NAME, tokens.get(0))));
} else {
return Optional.of(new PhraseQuery(FIELD_NAME, tokens.toArray(new String[0])));
}

for(var iterator = exprSeqResult.iterate(); iterator.hasNext(); ) {
}

public Optional<Query> evaluateToQueryAnyAll(Sequence contextSequence, Item contextItem, boolean noMatchOnEmpty) throws XPathException {
var exprSeqResult = expression.eval(contextSequence, contextItem);

var queries = new ArrayList<Query>();

for (var iterator = exprSeqResult.iterate(); iterator.hasNext(); ) {
var stringifies = iterator.nextItem().getStringValue();
var maybeQuery = LuceneQueryProducer.stringToQuery(stringifies);
maybeQuery.ifPresent(gg::add);
if (noMatchOnEmpty && maybeQuery.isEmpty()) {
return Optional.empty();
}
maybeQuery.ifPresent(queries::add);
}

if(gg.isEmpty()) {
if (queries.isEmpty()) {
return Optional.empty();
}else if (gg.size() == 1) {
return Optional.of(gg.get(0));
}else {
} else if (queries.size() == 1) {
return Optional.of(queries.get(0));
} else {
var builder = new BooleanQuery.Builder();
gg.forEach(x -> builder.add(x, BooleanClause.Occur.SHOULD));
queries.forEach(x -> builder.add(x, BooleanClause.Occur.SHOULD));
return Optional.of(builder.build());
}
}
Expand All @@ -53,5 +114,4 @@ public String toString() {
}



}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;

public interface LuceneQueryProducer {
Expand All @@ -37,18 +38,23 @@ public interface LuceneQueryProducer {
*/
public static final Analyzer analyzer = new StandardAnalyzer(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);

public static Optional<Query> stringToQuery(String phrase) {
public static List<String> tokenize(String phrase) {
var tokens = new ArrayList<String>();

try (var tokenStream = analyzer.tokenStream(FIELD_NAME, phrase)) {
var charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
for (tokenStream.reset(); tokenStream.incrementToken(); ) {
tokens.add(charTermAttribute.toString());
}
} catch (IOException e) {
throw new RuntimeException(e);
throw new RuntimeException("Unable to tokenize String", e);
}

return tokens;
}

public static Optional<Query> stringToQuery(String phrase) {
var tokens = tokenize(phrase);

if(tokens.isEmpty()) {
return Optional.empty();
}else {
Expand Down
11 changes: 8 additions & 3 deletions exist-core/src/main/java/xyz/elemental/xquery/StringMatch.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,28 @@

import org.apache.lucene.search.*;
import org.exist.xquery.LiteralValue;
import org.exist.xquery.XPathException;
import org.exist.xquery.XQuery;
import org.exist.xquery.value.Item;
import org.exist.xquery.value.Sequence;
import org.exist.xquery.value.StringValue;

import javax.xml.xquery.XQQueryException;
import java.util.Optional;

public class StringMatch extends FTMatch {

private final String term;
private final Optional<AnyAllOptions> anyOptions;

public StringMatch(String term) {
public StringMatch(String term, Optional<AnyAllOptions> anyOptions) {
this.term = term;
this.anyOptions = anyOptions;
}

public static final StringMatch newInstance(LiteralValue literalValue) {
public static final StringMatch newInstance(LiteralValue literalValue, AnyAllOptions anyAllOptions) {
final var stringValue = (StringValue) literalValue.getValue();
return new StringMatch(stringValue.getStringValue());
return new StringMatch(stringValue.getStringValue(), Optional.ofNullable(anyAllOptions));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,18 @@ void parseFtOrFtOr() throws Exception {
}


@Test
void parserAnyWord() throws Exception {
final var query = """
xquery version "3.1";
for $w in ('aaaa', 'bbbb', 'ccccc')
where $w contains text {"aaaa", "bbbb", "cccc"} any word
return <text>{ $w }</text>
""";
parse(query);
}



private PathExpr parse(String query) throws TokenStreamException, XPathException, RecognitionException {
// parse the query into the internal syntax tree
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public class StringMatchTest {

@Test
public void simpleTest() {
var m = new StringMatch("Hello WorlD");
var m = new StringMatch("Hello WorlD", null);
var result = m.evaluateToQuery(null, null);
assertThat(result)
.isPresent()
Expand All @@ -24,7 +24,7 @@ public void simpleTest() {

@Test
public void testStopWord() {
var m = new StringMatch("a as at Hello");
var m = new StringMatch("a as at Hello", null);
var result = m.evaluateToQuery(null, null);
assertThat(result)
.isPresent()
Expand Down

0 comments on commit 1959fbb

Please sign in to comment.