Skip to content

Commit

Permalink
Patched argument parsing bug.
Browse files Browse the repository at this point in the history
  • Loading branch information
jackluo923 committed Apr 30, 2024
1 parent 8df04e6 commit 3129017
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -116,24 +116,24 @@ private static List<String> parseEntryAsString(@Nullable Map<String, String> col
public static Analyzer getAnalyzer(TextIndexConfig config) throws ReflectiveOperationException {
String luceneAnalyzerClassName = config.getLuceneAnalyzerClass();
List<String> luceneAnalyzerClassArgs = config.getLuceneAnalyzerClassArgs();
List<String> luceneAnalyzerClassArgsTypes = config.getLuceneAnalyzerClassArgTypes();
List<String> luceneAnalyzerClassArgTypes = config.getLuceneAnalyzerClassArgTypes();

if (null == luceneAnalyzerClassName || luceneAnalyzerClassName.isEmpty()
|| (luceneAnalyzerClassName.equals(StandardAnalyzer.class.getName())
&& luceneAnalyzerClassArgs.isEmpty() && luceneAnalyzerClassArgsTypes.isEmpty())) {
&& luceneAnalyzerClassArgs.isEmpty() && luceneAnalyzerClassArgTypes.isEmpty())) {
// When there is no analyzer defined, or when StandardAnalyzer (default) is used without arguments,
// use existing logic to obtain an instance of StandardAnalyzer with customized stop words
return TextIndexUtils.getStandardAnalyzerWithCustomizedStopWords(
config.getStopWordsInclude(), config.getStopWordsExclude());
} else {
// Custom analyzer + custom configs via reflection
if (luceneAnalyzerClassArgs.size() != luceneAnalyzerClassArgsTypes.size()) {
if (luceneAnalyzerClassArgs.size() != luceneAnalyzerClassArgTypes.size()) {
throw new ReflectiveOperationException("Mismatch of the number of analyzer arguments and arguments types.");
}

// Generate args type list
List<Class<?>> argClasses = new ArrayList<>();
for (String argType : luceneAnalyzerClassArgsTypes) {
for (String argType : luceneAnalyzerClassArgTypes) {
argClasses.add(parseSupportedTypes(argType));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.SearcherManager;
import org.apache.pinot.segment.local.segment.index.text.TextIndexConfigBuilder;
import org.apache.pinot.segment.spi.index.TextIndexConfig;
import org.apache.pinot.spi.config.table.FSTType;
import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
import org.roaringbitmap.buffer.MutableRoaringBitmap;
import org.testng.annotations.AfterClass;
Expand Down Expand Up @@ -164,8 +166,21 @@ private String[][] getRepeatedData() {

private void configureIndex(String analyzerClass, String analyzerClassArgs, String analyzerClassArgTypes,
String queryParserClass) {
TextIndexConfig config = new TextIndexConfig(false, null, null, false, false, null, null, true, 500,
analyzerClass, analyzerClassArgs, analyzerClassArgTypes, queryParserClass, false);
FSTType fstType = null;
TextIndexConfigBuilder builder = new TextIndexConfigBuilder(fstType);
if (null != analyzerClass) {
builder.withLuceneAnalyzerClass(analyzerClass);
}
if (null != analyzerClassArgs) {
builder.withLuceneAnalyzerClassArgs(analyzerClassArgs);
}
if (null != analyzerClassArgTypes) {
builder.withLuceneAnalyzerClassArgTypes(analyzerClassArgTypes);
}
if (null != queryParserClass) {
builder.withLuceneQueryParserClass(queryParserClass);
}
TextIndexConfig config = builder.build();

// Note that segment name must be unique on each query setup, otherwise `testQueryCancellationIsSuccessful` method
// will cause unit test to fail due to inability to release a lock.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ public AbstractBuilder(TextIndexConfig other) {
public TextIndexConfig build() {
return new TextIndexConfig(false, _fstType, _rawValueForTextIndex, _enableQueryCache, _useANDForMultiTermQueries,
_stopWordsInclude, _stopWordsExclude, _luceneUseCompoundFile, _luceneMaxBufferSizeMB, _luceneAnalyzerClass,
CsvParser.serialize(_luceneAnalyzerClassArgs, false, false),
CsvParser.serialize(_luceneAnalyzerClassArgs, true, false),
CsvParser.serialize(_luceneAnalyzerClassArgTypes, false, false),
_luceneQueryParserClass, _enablePrefixSuffixMatchingInPhraseQueries);
}
Expand Down Expand Up @@ -266,7 +266,7 @@ public AbstractBuilder withLuceneAnalyzerClassArgs(List<String> luceneAnalyzerCl
}

public AbstractBuilder withLuceneAnalyzerClassArgTypes(String luceneAnalyzerClassArgTypes) {
_luceneAnalyzerClassArgs = CsvParser.parse(luceneAnalyzerClassArgTypes, false, true);
_luceneAnalyzerClassArgTypes = CsvParser.parse(luceneAnalyzerClassArgTypes, false, true);
return this;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.annotation.Nullable;
Expand Down Expand Up @@ -70,7 +71,7 @@ public static List<String> parse(@Nullable String input, boolean escapeComma, bo
public static String serialize(List<String> input, boolean escapeComma, boolean trim) {
Stream<String> tokenStream = input.stream();
if (escapeComma) {
tokenStream = tokenStream.map(s -> s.replaceAll(",", "\\,"));
tokenStream = tokenStream.map(s -> s.replaceAll(",", Matcher.quoteReplacement("\\,")));
}
if (trim) {
tokenStream = tokenStream.map(String::trim);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@ public class CsvParserTest {
@Test
public void testEscapeTrueTrimFalse() {
String input = " \\,.\n\t()[]{}\"':=-_$\\?@&|#+/,:=[]$@&|#";
List<String> expectedOutput = Arrays.asList(" ,.\n\t()[]{}\"':=-_$\\?@&|#+/", ":=[]$@&|#");
Assert.assertEquals(CsvParser.parse(input, true, false), expectedOutput);
List<String> actualParsedOutput = CsvParser.parse(input, true, false);
List<String> expectedParsedOutput = Arrays.asList(" ,.\n\t()[]{}\"':=-_$\\?@&|#+/", ":=[]$@&|#");
Assert.assertEquals(actualParsedOutput, expectedParsedOutput);
Assert.assertEquals(CsvParser.serialize(actualParsedOutput, true, false), input);
}

@Test
Expand Down

0 comments on commit 3129017

Please sign in to comment.