From 540b38146d13f8776273aa93da5061008bfd707d Mon Sep 17 00:00:00 2001
From: Xiaobing <61892277+klsince@users.noreply.github.com>
Date: Mon, 25 Mar 2024 19:54:44 -0700
Subject: [PATCH 01/50] refine when to registerSegment while doing addSegment
and replaceSegment for upsert tables for better data consistency (#12709)
---
.../realtime/RealtimeTableDataManager.java | 18 ++++++++++++++----
1 file changed, 14 insertions(+), 4 deletions(-)
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java
index 9e4ae84dba1..0c62ab9b4d7 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java
@@ -548,10 +548,9 @@ private void handleUpsert(ImmutableSegment immutableSegment) {
immutableSegment.getSegmentMetadata().getTotalDocs());
_serverMetrics.addValueToTableGauge(_tableNameWithType, ServerGauge.SEGMENT_COUNT, 1L);
ImmutableSegmentDataManager newSegmentManager = new ImmutableSegmentDataManager(immutableSegment);
- // Register the new segment after it is fully initialized by partitionUpsertMetadataManager, e.g. to fill up its
- // validDocId bitmap. Otherwise, the query can return wrong results, if accessing the premature segment.
if (partitionUpsertMetadataManager.isPreloading()) {
- // Preloading segment is ensured to be handled by a single thread, so no need to take a lock.
+ // Preloading segment is ensured to be handled by a single thread, so no need to take the segment upsert lock.
+ // Besides, preloading happens before the table partition is made ready for any queries.
partitionUpsertMetadataManager.preloadSegment(immutableSegment);
registerSegment(segmentName, newSegmentManager);
_logger.info("Preloaded immutable segment: {} to upsert-enabled table: {}", segmentName, _tableNameWithType);
@@ -574,10 +573,21 @@ private void handleUpsert(ImmutableSegment immutableSegment) {
try {
SegmentDataManager oldSegmentManager = _segmentDataManagerMap.get(segmentName);
if (oldSegmentManager == null) {
- partitionUpsertMetadataManager.addSegment(immutableSegment);
+ // When adding a new segment, we should register it 'before' it is fully initialized by
+ // partitionUpsertMetadataManager. Because when processing docs in the new segment, the docs in the other
+ // segments may be invalidated, making the queries see less valid docs than expected. We should let query
+ // access the new segment asap even though its validDocId bitmap is still being filled by
+ // partitionUpsertMetadataManager.
registerSegment(segmentName, newSegmentManager);
+ partitionUpsertMetadataManager.addSegment(immutableSegment);
_logger.info("Added new immutable segment: {} to upsert-enabled table: {}", segmentName, _tableNameWithType);
} else {
+ // When replacing a segment, we should register the new segment 'after' it is fully initialized by
+ // partitionUpsertMetadataManager to fill up its validDocId bitmap. Otherwise, the queries will lose the access
+ // to the valid docs in the old segment immediately, but the validDocId bitmap of the new segment is still
+ // being filled by partitionUpsertMetadataManager, making the queries see less valid docs than expected.
+ // When replacing a segment, the new and old segments are assumed to have same set of valid docs for data
+ // consistency, otherwise the new segment should be named differently to go through the addSegment flow above.
IndexSegment oldSegment = oldSegmentManager.getSegment();
partitionUpsertMetadataManager.replaceSegment(immutableSegment, oldSegment);
registerSegment(segmentName, newSegmentManager);
From 57f50d3dec2a2023effa67ca044f61dfd6561dad Mon Sep 17 00:00:00 2001
From: Saurabh Dubey
Date: Tue, 26 Mar 2024 09:22:29 +0530
Subject: [PATCH 02/50] Json extract index filter support (#12683)
---------
Co-authored-by: Saurabh Dubey
Co-authored-by: Saurabh Dubey
---
.../function/TransformFunctionType.java | 3 +-
.../JsonExtractIndexTransformFunction.java | 28 +++++----
...JsonExtractIndexTransformFunctionTest.java | 17 ++++++
.../impl/json/MutableJsonIndexImpl.java | 28 ++++++++-
.../json/ImmutableJsonIndexReader.java | 46 ++++++++++++++-
.../local/segment/index/JsonIndexTest.java | 59 ++++++++++++-------
.../spi/index/reader/JsonIndexReader.java | 3 +-
7 files changed, 147 insertions(+), 37 deletions(-)
diff --git a/pinot-common/src/main/java/org/apache/pinot/common/function/TransformFunctionType.java b/pinot-common/src/main/java/org/apache/pinot/common/function/TransformFunctionType.java
index 20bc26854cf..88c269c6aad 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/function/TransformFunctionType.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/function/TransformFunctionType.java
@@ -117,7 +117,8 @@ public enum TransformFunctionType {
ReturnTypes.cascade(opBinding -> positionalReturnTypeInferenceFromStringLiteral(opBinding, 2,
SqlTypeName.VARCHAR), SqlTypeTransforms.FORCE_NULLABLE),
OperandTypes.family(ImmutableList.of(SqlTypeFamily.ANY, SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER,
- SqlTypeFamily.CHARACTER), ordinal -> ordinal > 2), "json_extract_index"),
+ SqlTypeFamily.CHARACTER, SqlTypeFamily.CHARACTER), ordinal -> ordinal > 2), "json_extract_index"),
+
JSON_EXTRACT_KEY("jsonExtractKey", ReturnTypes.TO_ARRAY,
OperandTypes.family(ImmutableList.of(SqlTypeFamily.ANY, SqlTypeFamily.CHARACTER)), "json_extract_key"),
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java
index 160ed36b0ff..12e38ea5d60 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunction.java
@@ -53,11 +53,11 @@ public String getName() {
@Override
public void init(List arguments, Map columnContextMap) {
- // Check that there are exactly 3 or 4 arguments
- if (arguments.size() < 3 || arguments.size() > 4) {
+ // Check that there are exactly 3 or 4 or 5 arguments
+ if (arguments.size() < 3 || arguments.size() > 5) {
throw new IllegalArgumentException(
- "Expected 3/4 arguments for transform function: jsonExtractIndex(jsonFieldName, 'jsonPath', 'resultsType',"
- + " ['defaultValue'])");
+ "Expected 3/4/5 arguments for transform function: jsonExtractIndex(jsonFieldName, 'jsonPath', 'resultsType',"
+ + " ['defaultValue'], ['jsonFilterExpression'])");
}
TransformFunction firstArgument = arguments.get(0);
@@ -76,13 +76,12 @@ public void init(List arguments, Map c
if (!(secondArgument instanceof LiteralTransformFunction)) {
throw new IllegalArgumentException("JSON path argument must be a literal");
}
- String inputJsonPath = ((LiteralTransformFunction) secondArgument).getStringLiteral();
+ _jsonPathString = ((LiteralTransformFunction) secondArgument).getStringLiteral();
try {
- JsonPathCache.INSTANCE.getOrCompute(inputJsonPath);
+ JsonPathCache.INSTANCE.getOrCompute(_jsonPathString);
} catch (Exception e) {
throw new IllegalArgumentException("JSON path argument is not a valid JSON path");
}
- _jsonPathString = inputJsonPath.substring(1); // remove $ prefix
TransformFunction thirdArgument = arguments.get(2);
if (!(thirdArgument instanceof LiteralTransformFunction)) {
@@ -90,14 +89,14 @@ public void init(List arguments, Map c
}
String resultsType = ((LiteralTransformFunction) thirdArgument).getStringLiteral().toUpperCase();
boolean isSingleValue = !resultsType.endsWith("_ARRAY");
- if (isSingleValue && inputJsonPath.contains("[*]")) {
+ if (isSingleValue && _jsonPathString.contains("[*]")) {
throw new IllegalArgumentException(
"[*] syntax in json path is unsupported for singleValue field json_extract_index");
}
DataType dataType = isSingleValue ? DataType.valueOf(resultsType)
: DataType.valueOf(resultsType.substring(0, resultsType.length() - 6));
- if (arguments.size() == 4) {
+ if (arguments.size() >= 4) {
TransformFunction fourthArgument = arguments.get(3);
if (!(fourthArgument instanceof LiteralTransformFunction)) {
throw new IllegalArgumentException("Default value must be a literal");
@@ -105,8 +104,17 @@ public void init(List arguments, Map c
_defaultValue = dataType.convert(((LiteralTransformFunction) fourthArgument).getStringLiteral());
}
+ String filterJsonPath = null;
+ if (arguments.size() == 5) {
+ TransformFunction fifthArgument = arguments.get(4);
+ if (!(fifthArgument instanceof LiteralTransformFunction)) {
+ throw new IllegalArgumentException("JSON path filter argument must be a literal");
+ }
+ filterJsonPath = ((LiteralTransformFunction) fifthArgument).getStringLiteral();
+ }
+
_resultMetadata = new TransformResultMetadata(dataType, isSingleValue, false);
- _valueToMatchingDocsMap = _jsonIndexReader.getMatchingFlattenedDocsMap(_jsonPathString);
+ _valueToMatchingDocsMap = _jsonIndexReader.getMatchingFlattenedDocsMap(_jsonPathString, filterJsonPath);
if (isSingleValue) {
// For single value result type, it's more efficient to use original docIDs map
_jsonIndexReader.convertFlattenedDocIdsToDocIds(_valueToMatchingDocsMap);
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunctionTest.java b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunctionTest.java
index c61084c430a..d2cd7921077 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunctionTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/JsonExtractIndexTransformFunctionTest.java
@@ -247,6 +247,23 @@ private void addMvTests(List
*/
public class PercentileKLLAggregationFunction
- extends BaseSingleInputAggregationFunction> {
+ extends NullableSingleInputAggregationFunction> {
protected static final int DEFAULT_K_VALUE = 200;
protected final double _percentile;
protected int _kValue;
- public PercentileKLLAggregationFunction(List arguments) {
- super(arguments.get(0));
+ public PercentileKLLAggregationFunction(List arguments, boolean nullHandlingEnabled) {
+ super(arguments.get(0), nullHandlingEnabled);
// Check that there are correct number of arguments
int numArguments = arguments.size();
@@ -107,14 +107,18 @@ public void aggregate(int length, AggregationResultHolder aggregationResultHolde
if (valueType == DataType.BYTES) {
// Assuming the column contains serialized data sketch
KllDoublesSketch[] deserializedSketches = deserializeSketches(blockValSetMap.get(_expression).getBytesValuesSV());
- for (int i = 0; i < length; i++) {
- sketch.merge(deserializedSketches[i]);
- }
+ forEachNotNull(length, valueSet, (from, to) -> {
+ for (int i = from; i < to; i++) {
+ sketch.merge(deserializedSketches[i]);
+ }
+ });
} else {
double[] values = valueSet.getDoubleValuesSV();
- for (int i = 0; i < length; i++) {
- sketch.update(values[i]);
- }
+ forEachNotNull(length, valueSet, (from, to) -> {
+ for (int i = from; i < to; i++) {
+ sketch.update(values[i]);
+ }
+ });
}
}
@@ -127,16 +131,20 @@ public void aggregateGroupBySV(int length, int[] groupKeyArray, GroupByResultHol
if (valueType == DataType.BYTES) {
// serialized sketch
KllDoublesSketch[] deserializedSketches = deserializeSketches(blockValSetMap.get(_expression).getBytesValuesSV());
- for (int i = 0; i < length; i++) {
- KllDoublesSketch sketch = getOrCreateSketch(groupByResultHolder, groupKeyArray[i]);
- sketch.merge(deserializedSketches[i]);
- }
+ forEachNotNull(length, valueSet, (from, to) -> {
+ for (int i = from; i < to; i++) {
+ KllDoublesSketch sketch = getOrCreateSketch(groupByResultHolder, groupKeyArray[i]);
+ sketch.merge(deserializedSketches[i]);
+ }
+ });
} else {
double[] values = valueSet.getDoubleValuesSV();
- for (int i = 0; i < length; i++) {
- KllDoublesSketch sketch = getOrCreateSketch(groupByResultHolder, groupKeyArray[i]);
- sketch.update(values[i]);
- }
+ forEachNotNull(length, valueSet, (from, to) -> {
+ for (int i = from; i < to; i++) {
+ KllDoublesSketch sketch = getOrCreateSketch(groupByResultHolder, groupKeyArray[i]);
+ sketch.update(values[i]);
+ }
+ });
}
}
@@ -149,20 +157,24 @@ public void aggregateGroupByMV(int length, int[][] groupKeysArray, GroupByResult
if (valueType == DataType.BYTES) {
// serialized sketch
KllDoublesSketch[] deserializedSketches = deserializeSketches(blockValSetMap.get(_expression).getBytesValuesSV());
- for (int i = 0; i < length; i++) {
- for (int groupKey : groupKeysArray[i]) {
- KllDoublesSketch sketch = getOrCreateSketch(groupByResultHolder, groupKey);
- sketch.merge(deserializedSketches[i]);
+ forEachNotNull(length, valueSet, (from, to) -> {
+ for (int i = from; i < to; i++) {
+ for (int groupKey : groupKeysArray[i]) {
+ KllDoublesSketch sketch = getOrCreateSketch(groupByResultHolder, groupKey);
+ sketch.merge(deserializedSketches[i]);
+ }
}
- }
+ });
} else {
double[] values = valueSet.getDoubleValuesSV();
- for (int i = 0; i < length; i++) {
- for (int groupKey : groupKeysArray[i]) {
- KllDoublesSketch sketch = getOrCreateSketch(groupByResultHolder, groupKey);
- sketch.update(values[i]);
+ forEachNotNull(length, valueSet, (from, to) -> {
+ for (int i = from; i < to; i++) {
+ for (int groupKey : groupKeysArray[i]) {
+ KllDoublesSketch sketch = getOrCreateSketch(groupByResultHolder, groupKey);
+ sketch.update(values[i]);
+ }
}
- }
+ });
}
}
@@ -241,6 +253,9 @@ public String getResultColumnName() {
@Override
public Comparable> extractFinalResult(KllDoublesSketch sketch) {
+ if (sketch.isEmpty() && _nullHandlingEnabled) {
+ return null;
+ }
return sketch.getQuantile(_percentile / 100);
}
}
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileKLLMVAggregationFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileKLLMVAggregationFunction.java
index 4653e9051d3..26af8dea447 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileKLLMVAggregationFunction.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileKLLMVAggregationFunction.java
@@ -32,7 +32,7 @@
public class PercentileKLLMVAggregationFunction extends PercentileKLLAggregationFunction {
public PercentileKLLMVAggregationFunction(List arguments) {
- super(arguments);
+ super(arguments, false);
}
@Override
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileMVAggregationFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileMVAggregationFunction.java
index 794a9896a7d..620763ea759 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileMVAggregationFunction.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileMVAggregationFunction.java
@@ -30,11 +30,11 @@
public class PercentileMVAggregationFunction extends PercentileAggregationFunction {
public PercentileMVAggregationFunction(ExpressionContext expression, int percentile) {
- super(expression, percentile);
+ super(expression, percentile, false);
}
public PercentileMVAggregationFunction(ExpressionContext expression, double percentile) {
- super(expression, percentile);
+ super(expression, percentile, false);
}
@Override
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileRawEstAggregationFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileRawEstAggregationFunction.java
index 063359ec960..04787e7d559 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileRawEstAggregationFunction.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileRawEstAggregationFunction.java
@@ -37,12 +37,14 @@ public class PercentileRawEstAggregationFunction
extends BaseSingleInputAggregationFunction {
private final PercentileEstAggregationFunction _percentileEstAggregationFunction;
- public PercentileRawEstAggregationFunction(ExpressionContext expressionContext, double percentile) {
- this(expressionContext, new PercentileEstAggregationFunction(expressionContext, percentile));
+ public PercentileRawEstAggregationFunction(ExpressionContext expressionContext, double percentile,
+ boolean nullHandlingEnabled) {
+ this(expressionContext, new PercentileEstAggregationFunction(expressionContext, percentile, nullHandlingEnabled));
}
- public PercentileRawEstAggregationFunction(ExpressionContext expressionContext, int percentile) {
- this(expressionContext, new PercentileEstAggregationFunction(expressionContext, percentile));
+ public PercentileRawEstAggregationFunction(ExpressionContext expressionContext, int percentile,
+ boolean nullHandlingEnabled) {
+ this(expressionContext, new PercentileEstAggregationFunction(expressionContext, percentile, nullHandlingEnabled));
}
protected PercentileRawEstAggregationFunction(ExpressionContext expression,
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileRawKLLAggregationFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileRawKLLAggregationFunction.java
index 39c2022ff02..7e88cf009d8 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileRawKLLAggregationFunction.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileRawKLLAggregationFunction.java
@@ -28,8 +28,8 @@
public class PercentileRawKLLAggregationFunction extends PercentileKLLAggregationFunction {
- public PercentileRawKLLAggregationFunction(List arguments) {
- super(arguments);
+ public PercentileRawKLLAggregationFunction(List arguments, boolean nullHandlingEnabled) {
+ super(arguments, nullHandlingEnabled);
}
@Override
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileRawTDigestAggregationFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileRawTDigestAggregationFunction.java
index 99a096c1306..fc618027a5f 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileRawTDigestAggregationFunction.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileRawTDigestAggregationFunction.java
@@ -37,17 +37,22 @@ public class PercentileRawTDigestAggregationFunction
extends BaseSingleInputAggregationFunction {
private final PercentileTDigestAggregationFunction _percentileTDigestAggregationFunction;
- public PercentileRawTDigestAggregationFunction(ExpressionContext expressionContext, int percentile) {
- this(expressionContext, new PercentileTDigestAggregationFunction(expressionContext, percentile));
+ public PercentileRawTDigestAggregationFunction(ExpressionContext expressionContext, int percentile,
+ boolean nullHandlingEnabled) {
+ this(expressionContext, new PercentileTDigestAggregationFunction(expressionContext, percentile,
+ nullHandlingEnabled));
}
- public PercentileRawTDigestAggregationFunction(ExpressionContext expressionContext, double percentile) {
- this(expressionContext, new PercentileTDigestAggregationFunction(expressionContext, percentile));
+ public PercentileRawTDigestAggregationFunction(ExpressionContext expressionContext, double percentile,
+ boolean nullHandlingEnabled) {
+ this(expressionContext, new PercentileTDigestAggregationFunction(expressionContext, percentile,
+ nullHandlingEnabled));
}
public PercentileRawTDigestAggregationFunction(ExpressionContext expressionContext, double percentile,
- int compressionFactor) {
- this(expressionContext, new PercentileTDigestAggregationFunction(expressionContext, percentile, compressionFactor));
+ int compressionFactor, boolean nullHandlingEnabled) {
+ this(expressionContext, new PercentileTDigestAggregationFunction(expressionContext, percentile, compressionFactor,
+ nullHandlingEnabled));
}
protected PercentileRawTDigestAggregationFunction(ExpressionContext expression,
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileSmartTDigestAggregationFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileSmartTDigestAggregationFunction.java
index 92cd5fa09b9..20d5372ca56 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileSmartTDigestAggregationFunction.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileSmartTDigestAggregationFunction.java
@@ -50,15 +50,15 @@
* - compression: Compression for the converted TDigest, 100 by default.
* Example of third argument: 'threshold=10000;compression=50'
*/
-public class PercentileSmartTDigestAggregationFunction extends BaseSingleInputAggregationFunction {
+public class PercentileSmartTDigestAggregationFunction extends NullableSingleInputAggregationFunction {
private static final double DEFAULT_FINAL_RESULT = Double.NEGATIVE_INFINITY;
private final double _percentile;
private final int _threshold;
private final int _compression;
- public PercentileSmartTDigestAggregationFunction(List arguments) {
- super(arguments.get(0));
+ public PercentileSmartTDigestAggregationFunction(List arguments, boolean nullHandlingEnabled) {
+ super(arguments.get(0), nullHandlingEnabled);
try {
_percentile = arguments.get(1).getLiteral().getDoubleValue();
} catch (Exception e) {
@@ -128,39 +128,53 @@ private static void validateValueType(BlockValSet blockValSet) {
blockValSet.isSingleValue() ? "" : "_MV");
}
- private static void aggregateIntoTDigest(int length, AggregationResultHolder aggregationResultHolder,
+ private void aggregateIntoTDigest(int length, AggregationResultHolder aggregationResultHolder,
BlockValSet blockValSet) {
TDigest tDigest = aggregationResultHolder.getResult();
if (blockValSet.isSingleValue()) {
double[] doubleValues = blockValSet.getDoubleValuesSV();
- for (int i = 0; i < length; i++) {
- tDigest.add(doubleValues[i]);
- }
+ forEachNotNull(length, blockValSet, (from, to) -> {
+ for (int i = from; i < to; i++) {
+ tDigest.add(doubleValues[i]);
+ }
+ });
} else {
double[][] doubleValues = blockValSet.getDoubleValuesMV();
- for (int i = 0; i < length; i++) {
- for (double value : doubleValues[i]) {
- tDigest.add(value);
+ forEachNotNull(length, blockValSet, (from, to) -> {
+ for (int i = from; i < to; i++) {
+ for (double value : doubleValues[i]) {
+ tDigest.add(value);
+ }
}
- }
+ });
}
}
- private void aggregateIntoValueList(int length, AggregationResultHolder aggregationResultHolder,
- BlockValSet blockValSet) {
+ private DoubleArrayList getOrCreateList(int length, AggregationResultHolder aggregationResultHolder) {
DoubleArrayList valueList = aggregationResultHolder.getResult();
if (valueList == null) {
valueList = new DoubleArrayList(length);
aggregationResultHolder.setValue(valueList);
}
+ return valueList;
+ }
+
+ private void aggregateIntoValueList(int length, AggregationResultHolder aggregationResultHolder,
+ BlockValSet blockValSet) {
+ DoubleArrayList valueList = getOrCreateList(length, aggregationResultHolder);
if (blockValSet.isSingleValue()) {
double[] doubleValues = blockValSet.getDoubleValuesSV();
- valueList.addElements(valueList.size(), doubleValues, 0, length);
+ forEachNotNull(length, blockValSet, (from, toEx) ->
+ valueList.addElements(valueList.size(), doubleValues, from, toEx - from)
+ );
} else {
double[][] doubleValues = blockValSet.getDoubleValuesMV();
- for (int i = 0; i < length; i++) {
- valueList.addElements(valueList.size(), doubleValues[i]);
- }
+ forEachNotNull(length, blockValSet, (from, toEx) -> {
+ for (int i = 0; i < length; i++) {
+ valueList.addElements(valueList.size(), doubleValues[i]);
+ }
+ }
+ );
}
if (valueList.size() > _threshold) {
aggregationResultHolder.setValue(convertValueListToTDigest(valueList));
@@ -183,16 +197,20 @@ public void aggregateGroupBySV(int length, int[] groupKeyArray, GroupByResultHol
validateValueType(blockValSet);
if (blockValSet.isSingleValue()) {
double[] doubleValues = blockValSet.getDoubleValuesSV();
- for (int i = 0; i < length; i++) {
- DoubleArrayList valueList = getValueList(groupByResultHolder, groupKeyArray[i]);
- valueList.add(doubleValues[i]);
- }
+ forEachNotNull(length, blockValSet, (from, to) -> {
+ for (int i = from; i < to; i++) {
+ DoubleArrayList valueList = getValueList(groupByResultHolder, groupKeyArray[i]);
+ valueList.add(doubleValues[i]);
+ }
+ });
} else {
double[][] doubleValues = blockValSet.getDoubleValuesMV();
- for (int i = 0; i < length; i++) {
- DoubleArrayList valueList = getValueList(groupByResultHolder, groupKeyArray[i]);
- valueList.addElements(valueList.size(), doubleValues[i]);
- }
+ forEachNotNull(length, blockValSet, (from, to) -> {
+ for (int i = from; i < to; i++) {
+ DoubleArrayList valueList = getValueList(groupByResultHolder, groupKeyArray[i]);
+ valueList.addElements(valueList.size(), doubleValues[i]);
+ }
+ });
}
}
@@ -212,19 +230,23 @@ public void aggregateGroupByMV(int length, int[][] groupKeysArray, GroupByResult
validateValueType(blockValSet);
if (blockValSet.isSingleValue()) {
double[] doubleValues = blockValSet.getDoubleValuesSV();
- for (int i = 0; i < length; i++) {
- for (int groupKey : groupKeysArray[i]) {
- getValueList(groupByResultHolder, groupKey).add(doubleValues[i]);
+ forEachNotNull(length, blockValSet, (from, to) -> {
+ for (int i = from; i < to; i++) {
+ for (int groupKey : groupKeysArray[i]) {
+ getValueList(groupByResultHolder, groupKey).add(doubleValues[i]);
+ }
}
- }
+ });
} else {
double[][] doubleValues = blockValSet.getDoubleValuesMV();
- for (int i = 0; i < length; i++) {
- for (int groupKey : groupKeysArray[i]) {
- DoubleArrayList valueList = getValueList(groupByResultHolder, groupKey);
- valueList.addElements(valueList.size(), doubleValues[i]);
+ forEachNotNull(length, blockValSet, (from, to) -> {
+ for (int i = from; i < to; i++) {
+ for (int groupKey : groupKeysArray[i]) {
+ DoubleArrayList valueList = getValueList(groupByResultHolder, groupKey);
+ valueList.addElements(valueList.size(), doubleValues[i]);
+ }
}
- }
+ });
}
}
@@ -285,7 +307,11 @@ public Double extractFinalResult(Object intermediateResult) {
DoubleArrayList valueList = (DoubleArrayList) intermediateResult;
int size = valueList.size();
if (size == 0) {
- return DEFAULT_FINAL_RESULT;
+ if (_nullHandlingEnabled) {
+ return null;
+ } else {
+ return DEFAULT_FINAL_RESULT;
+ }
} else {
double[] values = valueList.elements();
Arrays.sort(values, 0, size);
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileTDigestAggregationFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileTDigestAggregationFunction.java
index d4224739c6e..c831e52d224 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileTDigestAggregationFunction.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileTDigestAggregationFunction.java
@@ -39,7 +39,7 @@
* extra handling for two argument PERCENTILE functions to assess if v0 or v1. This can be revisited later if the
* need arises
*/
-public class PercentileTDigestAggregationFunction extends BaseSingleInputAggregationFunction {
+public class PercentileTDigestAggregationFunction extends NullableSingleInputAggregationFunction {
public static final int DEFAULT_TDIGEST_COMPRESSION = 100;
// version 0 functions specified in the of form PERCENTILETDIGEST<2-digits>(column). Uses default compression of 100
@@ -48,23 +48,25 @@ public class PercentileTDigestAggregationFunction extends BaseSingleInputAggrega
protected final double _percentile;
protected final int _compressionFactor;
- public PercentileTDigestAggregationFunction(ExpressionContext expression, int percentile) {
- super(expression);
+ public PercentileTDigestAggregationFunction(ExpressionContext expression, int percentile,
+ boolean nullHandlingEnabled) {
+ super(expression, nullHandlingEnabled);
_version = 0;
_percentile = percentile;
_compressionFactor = DEFAULT_TDIGEST_COMPRESSION;
}
- public PercentileTDigestAggregationFunction(ExpressionContext expression, double percentile) {
- super(expression);
+ public PercentileTDigestAggregationFunction(ExpressionContext expression, double percentile,
+ boolean nullHandlingEnabled) {
+ super(expression, nullHandlingEnabled);
_version = 1;
_percentile = percentile;
_compressionFactor = DEFAULT_TDIGEST_COMPRESSION;
}
public PercentileTDigestAggregationFunction(ExpressionContext expression, double percentile,
- int compressionFactor) {
- super(expression);
+ int compressionFactor, boolean nullHandlingEnabled) {
+ super(expression, nullHandlingEnabled);
_version = 1;
_percentile = percentile;
_compressionFactor = compressionFactor;
@@ -104,24 +106,28 @@ public void aggregate(int length, AggregationResultHolder aggregationResultHolde
if (blockValSet.getValueType() != DataType.BYTES) {
double[] doubleValues = blockValSet.getDoubleValuesSV();
TDigest tDigest = getDefaultTDigest(aggregationResultHolder, _compressionFactor);
- for (int i = 0; i < length; i++) {
- tDigest.add(doubleValues[i]);
- }
+ forEachNotNull(length, blockValSet, (from, to) -> {
+ for (int i = from; i < to; i++) {
+ tDigest.add(doubleValues[i]);
+ }
+ });
} else {
// Serialized TDigest
byte[][] bytesValues = blockValSet.getBytesValuesSV();
- TDigest tDigest = aggregationResultHolder.getResult();
- if (tDigest != null) {
- for (int i = 0; i < length; i++) {
- tDigest.add(ObjectSerDeUtils.TDIGEST_SER_DE.deserialize(bytesValues[i]));
- }
- } else {
- tDigest = ObjectSerDeUtils.TDIGEST_SER_DE.deserialize(bytesValues[0]);
- aggregationResultHolder.setValue(tDigest);
- for (int i = 1; i < length; i++) {
- tDigest.add(ObjectSerDeUtils.TDIGEST_SER_DE.deserialize(bytesValues[i]));
+ foldNotNull(length, blockValSet, (TDigest) aggregationResultHolder.getResult(), (tDigest, from, toEx) -> {
+ if (tDigest != null) {
+ for (int i = from; i < toEx; i++) {
+ tDigest.add(ObjectSerDeUtils.TDIGEST_SER_DE.deserialize(bytesValues[i]));
+ }
+ } else {
+ tDigest = ObjectSerDeUtils.TDIGEST_SER_DE.deserialize(bytesValues[0]);
+ aggregationResultHolder.setValue(tDigest);
+ for (int i = 1; i < length; i++) {
+ tDigest.add(ObjectSerDeUtils.TDIGEST_SER_DE.deserialize(bytesValues[i]));
+ }
}
- }
+ return tDigest;
+ });
}
}
@@ -131,22 +137,26 @@ public void aggregateGroupBySV(int length, int[] groupKeyArray, GroupByResultHol
BlockValSet blockValSet = blockValSetMap.get(_expression);
if (blockValSet.getValueType() != DataType.BYTES) {
double[] doubleValues = blockValSet.getDoubleValuesSV();
- for (int i = 0; i < length; i++) {
- getDefaultTDigest(groupByResultHolder, groupKeyArray[i], _compressionFactor).add(doubleValues[i]);
- }
+ forEachNotNull(length, blockValSet, (from, to) -> {
+ for (int i = from; i < to; i++) {
+ getDefaultTDigest(groupByResultHolder, groupKeyArray[i], _compressionFactor).add(doubleValues[i]);
+ }
+ });
} else {
// Serialized TDigest
byte[][] bytesValues = blockValSet.getBytesValuesSV();
- for (int i = 0; i < length; i++) {
- TDigest value = ObjectSerDeUtils.TDIGEST_SER_DE.deserialize(bytesValues[i]);
- int groupKey = groupKeyArray[i];
- TDigest tDigest = groupByResultHolder.getResult(groupKey);
- if (tDigest != null) {
- tDigest.add(value);
- } else {
- groupByResultHolder.setValueForKey(groupKey, value);
+ forEachNotNull(length, blockValSet, (from, to) -> {
+ for (int i = from; i < to; i++) {
+ TDigest value = ObjectSerDeUtils.TDIGEST_SER_DE.deserialize(bytesValues[i]);
+ int groupKey = groupKeyArray[i];
+ TDigest tDigest = groupByResultHolder.getResult(groupKey);
+ if (tDigest != null) {
+ tDigest.add(value);
+ } else {
+ groupByResultHolder.setValueForKey(groupKey, value);
+ }
}
- }
+ });
}
}
@@ -156,27 +166,31 @@ public void aggregateGroupByMV(int length, int[][] groupKeysArray, GroupByResult
BlockValSet blockValSet = blockValSetMap.get(_expression);
if (blockValSet.getValueType() != DataType.BYTES) {
double[] doubleValues = blockValSet.getDoubleValuesSV();
- for (int i = 0; i < length; i++) {
- double value = doubleValues[i];
- for (int groupKey : groupKeysArray[i]) {
- getDefaultTDigest(groupByResultHolder, groupKey, _compressionFactor).add(value);
+ forEachNotNull(length, blockValSet, (from, to) -> {
+ for (int i = from; i < to; i++) {
+ double value = doubleValues[i];
+ for (int groupKey : groupKeysArray[i]) {
+ getDefaultTDigest(groupByResultHolder, groupKey, _compressionFactor).add(value);
+ }
}
- }
+ });
} else {
// Serialized QuantileDigest
byte[][] bytesValues = blockValSet.getBytesValuesSV();
- for (int i = 0; i < length; i++) {
- TDigest value = ObjectSerDeUtils.TDIGEST_SER_DE.deserialize(bytesValues[i]);
- for (int groupKey : groupKeysArray[i]) {
- TDigest tDigest = groupByResultHolder.getResult(groupKey);
- if (tDigest != null) {
- tDigest.add(value);
- } else {
- // Create a new TDigest for the group
- groupByResultHolder.setValueForKey(groupKey, ObjectSerDeUtils.TDIGEST_SER_DE.deserialize(bytesValues[i]));
+ forEachNotNull(length, blockValSet, (from, to) -> {
+ for (int i = from; i < to; i++) {
+ TDigest value = ObjectSerDeUtils.TDIGEST_SER_DE.deserialize(bytesValues[i]);
+ for (int groupKey : groupKeysArray[i]) {
+ TDigest tDigest = groupByResultHolder.getResult(groupKey);
+ if (tDigest != null) {
+ tDigest.add(value);
+ } else {
+ // Create a new TDigest for the group
+ groupByResultHolder.setValueForKey(groupKey, ObjectSerDeUtils.TDIGEST_SER_DE.deserialize(bytesValues[i]));
+ }
}
}
- }
+ });
}
}
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileTDigestMVAggregationFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileTDigestMVAggregationFunction.java
index 571f2ae9126..a6b7884e6e8 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileTDigestMVAggregationFunction.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/PercentileTDigestMVAggregationFunction.java
@@ -30,16 +30,16 @@
public class PercentileTDigestMVAggregationFunction extends PercentileTDigestAggregationFunction {
public PercentileTDigestMVAggregationFunction(ExpressionContext expression, int percentile) {
- super(expression, percentile);
+ super(expression, percentile, false);
}
public PercentileTDigestMVAggregationFunction(ExpressionContext expression, double percentile) {
- super(expression, percentile);
+ super(expression, percentile, false);
}
public PercentileTDigestMVAggregationFunction(ExpressionContext expression, double percentile,
int compressionFactor) {
- super(expression, percentile, compressionFactor);
+ super(expression, percentile, compressionFactor, false);
}
@Override
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/AbstractPercentileAggregationFunctionTest.java b/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/AbstractPercentileAggregationFunctionTest.java
new file mode 100644
index 00000000000..fe9cc09f26a
--- /dev/null
+++ b/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/AbstractPercentileAggregationFunctionTest.java
@@ -0,0 +1,333 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.pinot.core.query.aggregation.function;
+
+import org.apache.pinot.queries.FluentQueryTest;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+
+public abstract class AbstractPercentileAggregationFunctionTest extends AbstractAggregationFunctionTest {
+
+ @DataProvider(name = "scenarios")
+ Object[] scenarios() {
+ return new Object[] {
+ new Scenario(FieldSpec.DataType.INT),
+ new Scenario(FieldSpec.DataType.LONG),
+ new Scenario(FieldSpec.DataType.FLOAT),
+ new Scenario(FieldSpec.DataType.DOUBLE),
+ };
+ }
+
+ public abstract String callStr(String column, int percent);
+
+ public String getFinalResultColumnType() {
+ return "DOUBLE";
+ }
+
+ public class Scenario {
+ private final FieldSpec.DataType _dataType;
+
+ public Scenario(FieldSpec.DataType dataType) {
+ _dataType = dataType;
+ }
+
+ public FieldSpec.DataType getDataType() {
+ return _dataType;
+ }
+
+ public FluentQueryTest.DeclaringTable getDeclaringTable(boolean nullHandlingEnabled) {
+ return givenSingleNullableFieldTable(_dataType, nullHandlingEnabled);
+ }
+
+ @Override
+ public String toString() {
+ return "Scenario{" + "dt=" + _dataType + '}';
+ }
+ }
+
+ FluentQueryTest.TableWithSegments withDefaultData(Scenario scenario, boolean nullHandlingEnabled) {
+ return scenario.getDeclaringTable(nullHandlingEnabled)
+ .onFirstInstance("myField",
+ "null",
+ "0",
+ "null",
+ "1",
+ "null",
+ "2",
+ "null",
+ "3",
+ "null",
+ "4",
+ "null"
+ ).andSegment("myField",
+ "null",
+ "5",
+ "null",
+ "6",
+ "null",
+ "7",
+ "null",
+ "8",
+ "null",
+ "9",
+ "null"
+ );
+ }
+
+ String minValue(FieldSpec.DataType dataType) {
+ switch (dataType) {
+ case INT: return "-2.147483648E9";
+ case LONG: return "-9.223372036854776E18";
+ case FLOAT: return "-Infinity";
+ case DOUBLE: return "-Infinity";
+ default:
+ throw new IllegalArgumentException("Unexpected type " + dataType);
+ }
+ }
+
+ String expectedAggrWithoutNull10(Scenario scenario) {
+ return minValue(scenario._dataType);
+ }
+
+ String expectedAggrWithoutNull15(Scenario scenario) {
+ return minValue(scenario._dataType);
+ }
+
+ String expectedAggrWithoutNull30(Scenario scenario) {
+ return minValue(scenario._dataType);
+ }
+
+ String expectedAggrWithoutNull35(Scenario scenario) {
+ return minValue(scenario._dataType);
+ }
+
+ String expectedAggrWithoutNull50(Scenario scenario) {
+ return minValue(scenario._dataType);
+ }
+
+ String expectedAggrWithoutNull55(Scenario scenario) {
+ return "0";
+ }
+
+ String expectedAggrWithoutNull70(Scenario scenario) {
+ return "3";
+ }
+
+ String expectedAggrWithoutNull75(Scenario scenario) {
+ return "4";
+ }
+
+ String expectedAggrWithoutNull90(Scenario scenario) {
+ return "7";
+ }
+
+ String expectedAggrWithoutNull100(Scenario scenario) {
+ return "9";
+ }
+
+ @Test(dataProvider = "scenarios")
+ void aggrWithoutNull(Scenario scenario) {
+
+ FluentQueryTest.TableWithSegments instance = withDefaultData(scenario, false);
+
+ instance
+ .whenQuery("select " + callStr("myField", 10) + " from testTable")
+ .thenResultIs(getFinalResultColumnType(), expectedAggrWithoutNull10(scenario));
+
+ instance
+ .whenQuery("select " + callStr("myField", 15) + " from testTable")
+ .thenResultIs(getFinalResultColumnType(), expectedAggrWithoutNull15(scenario));
+
+ instance
+ .whenQuery("select " + callStr("myField", 30) + " from testTable")
+ .thenResultIs(getFinalResultColumnType(), expectedAggrWithoutNull30(scenario));
+ instance
+ .whenQuery("select " + callStr("myField", 35) + " from testTable")
+ .thenResultIs(getFinalResultColumnType(), expectedAggrWithoutNull35(scenario));
+
+ instance
+ .whenQuery("select " + callStr("myField", 50) + " from testTable")
+ .thenResultIs(getFinalResultColumnType(), expectedAggrWithoutNull50(scenario));
+ instance
+ .whenQuery("select " + callStr("myField", 55) + " from testTable")
+ .thenResultIs(getFinalResultColumnType(), expectedAggrWithoutNull55(scenario));
+
+ instance
+ .whenQuery("select " + callStr("myField", 70) + " from testTable")
+ .thenResultIs(getFinalResultColumnType(), expectedAggrWithoutNull70(scenario));
+
+ instance
+ .whenQuery("select " + callStr("myField", 75) + " from testTable")
+ .thenResultIs(getFinalResultColumnType(), expectedAggrWithoutNull75(scenario));
+
+ instance
+ .whenQuery("select " + callStr("myField", 90) + " from testTable")
+ .thenResultIs(getFinalResultColumnType(), expectedAggrWithoutNull90(scenario));
+
+ instance
+ .whenQuery("select " + callStr("myField", 100) + " from testTable")
+ .thenResultIs(getFinalResultColumnType(), expectedAggrWithoutNull100(scenario));
+ }
+
+ String expectedAggrWithNull10(Scenario scenario) {
+ return "1";
+ }
+
+ @Test(dataProvider = "scenarios")
+ void aggrWithNull10(Scenario scenario) {
+ withDefaultData(scenario, true)
+ .whenQuery("select " + callStr("myField", 10) + " from testTable")
+ .thenResultIs(getFinalResultColumnType(), expectedAggrWithNull10(scenario));
+ }
+
+ String expectedAggrWithNull15(Scenario scenario) {
+ return "1";
+ }
+
+ @Test(dataProvider = "scenarios")
+ void aggrWithNull15(Scenario scenario) {
+ withDefaultData(scenario, true)
+ .whenQuery("select " + callStr("myField", 15) + " from testTable")
+ .thenResultIs(getFinalResultColumnType(), expectedAggrWithNull15(scenario));
+ }
+
+ String expectedAggrWithNull30(Scenario scenario) {
+ return "3";
+ }
+
+ @Test(dataProvider = "scenarios")
+ void aggrWithNull30(Scenario scenario) {
+ withDefaultData(scenario, true)
+ .whenQuery("select " + callStr("myField", 30) + " from testTable")
+ .thenResultIs(getFinalResultColumnType(), expectedAggrWithNull30(scenario));
+ }
+
+ String expectedAggrWithNull35(Scenario scenario) {
+ return "3";
+ }
+
+ @Test(dataProvider = "scenarios")
+ void aggrWithNull35(Scenario scenario) {
+ withDefaultData(scenario, true)
+ .whenQuery("select " + callStr("myField", 35) + " from testTable")
+ .thenResultIs(getFinalResultColumnType(), expectedAggrWithNull35(scenario));
+ }
+
+ String expectedAggrWithNull50(Scenario scenario) {
+ return "5";
+ }
+
+ @Test(dataProvider = "scenarios")
+ void aggrWithNull50(Scenario scenario) {
+ withDefaultData(scenario, true)
+ .whenQuery("select " + callStr("myField", 50) + " from testTable")
+ .thenResultIs(getFinalResultColumnType(), expectedAggrWithNull50(scenario));
+ }
+
+ String expectedAggrWithNull55(Scenario scenario) {
+ return "5";
+ }
+
+ @Test(dataProvider = "scenarios")
+ void aggrWithNull55(Scenario scenario) {
+ withDefaultData(scenario, true)
+ .whenQuery("select " + callStr("myField", 55) + " from testTable")
+ .thenResultIs(getFinalResultColumnType(), expectedAggrWithNull55(scenario));
+ }
+
+ String expectedAggrWithNull70(Scenario scenario) {
+ return "7";
+ }
+
+ @Test(dataProvider = "scenarios")
+ void aggrWithNull70(Scenario scenario) {
+ withDefaultData(scenario, true)
+ .whenQuery("select " + callStr("myField", 70) + " from testTable")
+ .thenResultIs(getFinalResultColumnType(), expectedAggrWithNull70(scenario));
+ }
+
+ String expectedAggrWithNull75(Scenario scenario) {
+ return "7";
+ }
+
+ @Test(dataProvider = "scenarios")
+ void aggrWithNull75(Scenario scenario) {
+ withDefaultData(scenario, true)
+ .whenQuery("select " + callStr("myField", 75) + " from testTable")
+ .thenResultIs(getFinalResultColumnType(), expectedAggrWithNull75(scenario));
+ }
+
+ String expectedAggrWithNull100(Scenario scenario) {
+ return "9";
+ }
+
+ @Test(dataProvider = "scenarios")
+ void aggrWithNull100(Scenario scenario) {
+ withDefaultData(scenario, true)
+ .whenQuery("select " + callStr("myField", 100) + " from testTable")
+ .thenResultIs(getFinalResultColumnType(), expectedAggrWithNull100(scenario));
+ }
+
+ @Test(dataProvider = "scenarios")
+ void aggrSvWithoutNull(Scenario scenario) {
+ scenario.getDeclaringTable(false)
+ .onFirstInstance("myField",
+ "null",
+ "1",
+ "null"
+ ).andSegment("myField",
+ "9"
+ ).andSegment("myField",
+ "null",
+ "null",
+ "null"
+ ).whenQuery("select $segmentName, " + callStr("myField", 50) + " from testTable "
+ + "group by $segmentName order by $segmentName")
+ .thenResultIs("STRING | " + getFinalResultColumnType(),
+ "testTable_0 | " + minValue(scenario._dataType),
+ "testTable_1 | 9",
+ "testTable_2 | " + minValue(scenario._dataType)
+ );
+ }
+
+ @Test(dataProvider = "scenarios")
+ void aggrSvWithNull(Scenario scenario) {
+ scenario.getDeclaringTable(true)
+ .onFirstInstance("myField",
+ "null",
+ "1",
+ "null"
+ ).andSegment("myField",
+ "9"
+ ).andSegment("myField",
+ "null",
+ "null",
+ "null"
+ ).whenQuery("select $segmentName, " + callStr("myField", 50) + " from testTable "
+ + "group by $segmentName order by $segmentName")
+ .thenResultIs("STRING | " + getFinalResultColumnType(),
+ "testTable_0 | 1",
+ "testTable_1 | 9",
+ "testTable_2 | null"
+ );
+ }
+}
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/PercentileAggregationFunctionTest.java b/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/PercentileAggregationFunctionTest.java
new file mode 100644
index 00000000000..3c2ecdde011
--- /dev/null
+++ b/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/PercentileAggregationFunctionTest.java
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.pinot.core.query.aggregation.function;
+
+public class PercentileAggregationFunctionTest extends AbstractPercentileAggregationFunctionTest {
+ @Override
+ public String callStr(String column, int percent) {
+ return "PERCENTILE(" + column + ", " + percent + ")";
+ }
+}
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/PercentileEstAggregationFunctionTest.java b/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/PercentileEstAggregationFunctionTest.java
new file mode 100644
index 00000000000..4dda1614b7c
--- /dev/null
+++ b/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/PercentileEstAggregationFunctionTest.java
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.query.aggregation.function;
+
+import org.apache.pinot.spi.data.FieldSpec;
+
+
+public class PercentileEstAggregationFunctionTest extends AbstractPercentileAggregationFunctionTest {
+ @Override
+ public String callStr(String column, int percent) {
+ return "PERCENTILEEST(" + column + ", " + percent + ")";
+ }
+
+ @Override
+ public String getFinalResultColumnType() {
+ return "LONG";
+ }
+
+ String minValue(FieldSpec.DataType dataType) {
+ switch (dataType) {
+ case INT: return "-2147483648";
+ case LONG: return "-9223372036854775808";
+ case FLOAT: return "-9223372036854775808";
+ case DOUBLE: return "-9223372036854775808";
+ default:
+ throw new IllegalArgumentException("Unexpected type " + dataType);
+ }
+ }
+}
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/PercentileKLLAggregationFunctionTest.java b/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/PercentileKLLAggregationFunctionTest.java
new file mode 100644
index 00000000000..1eb6c991c22
--- /dev/null
+++ b/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/PercentileKLLAggregationFunctionTest.java
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.query.aggregation.function;
+
+
+public class PercentileKLLAggregationFunctionTest extends AbstractPercentileAggregationFunctionTest {
+ @Override
+ public String callStr(String column, int percent) {
+ return "PERCENTILEKLL(" + column + ", " + percent + ")";
+ }
+
+ @Override
+ String expectedAggrWithNull10(Scenario scenario) {
+ return "0";
+ }
+
+ @Override
+ String expectedAggrWithNull30(Scenario scenario) {
+ return "2";
+ }
+
+ @Override
+ String expectedAggrWithNull50(Scenario scenario) {
+ return "4";
+ }
+
+ @Override
+ String expectedAggrWithNull70(Scenario scenario) {
+ return "6";
+ }
+}
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/PercentileSmartTDigestAggregationFunctionTest.java b/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/PercentileSmartTDigestAggregationFunctionTest.java
new file mode 100644
index 00000000000..b1eb471c704
--- /dev/null
+++ b/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/PercentileSmartTDigestAggregationFunctionTest.java
@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.query.aggregation.function;
+
+
+public class PercentileSmartTDigestAggregationFunctionTest {
+
+ public static class WithHighThreshold extends AbstractPercentileAggregationFunctionTest {
+ @Override
+ public String callStr(String column, int percent) {
+ return "PERCENTILESMARTTDIGEST(" + column + ", " + percent + ", 'THRESHOLD=10000')";
+ }
+ }
+
+ public static class WithSmallThreshold extends AbstractPercentileAggregationFunctionTest {
+ @Override
+ public String callStr(String column, int percent) {
+ return "PERCENTILESMARTTDIGEST(" + column + ", " + percent + ", 'THRESHOLD=1')";
+ }
+
+ @Override
+ String expectedAggrWithNull10(Scenario scenario) {
+ return "0.5";
+ }
+
+ @Override
+ String expectedAggrWithNull30(Scenario scenario) {
+ return "2.5";
+ }
+
+ @Override
+ String expectedAggrWithNull50(Scenario scenario) {
+ return "4.5";
+ }
+
+ @Override
+ String expectedAggrWithNull70(Scenario scenario) {
+ return "6.5";
+ }
+
+ @Override
+ String expectedAggrWithoutNull55(Scenario scenario) {
+ switch (scenario.getDataType()) {
+ case INT:
+ return "-6.442450943999939E8";
+ case LONG:
+ return "-2.7670116110564065E18";
+ case FLOAT:
+ case DOUBLE:
+ return "-Infinity";
+ default:
+ throw new IllegalArgumentException("Unsupported datatype " + scenario.getDataType());
+ }
+ }
+
+ @Override
+ String expectedAggrWithoutNull75(Scenario scenario) {
+ return "4.0";
+ }
+
+ @Override
+ String expectedAggrWithoutNull90(Scenario scenario) {
+ return "7.100000000000001";
+ }
+
+ @Override
+ String expectedAggrWithoutNull100(Scenario scenario) {
+ return super.expectedAggrWithoutNull100(scenario);
+ }
+ }
+}
diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/FluentQueryTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/FluentQueryTest.java
index ba6d22c429c..8bd93cd42e3 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/FluentQueryTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/FluentQueryTest.java
@@ -112,7 +112,7 @@ public OnFirstInstance onFirstInstance(Object[]... content) {
}
}
- static class TableWithSegments {
+ public static class TableWithSegments {
protected final TableConfig _tableConfig;
protected final Schema _schema;
protected final File _indexDir;
From 62b97ef3b41f1f00dfc8272d855e1eac802c0106 Mon Sep 17 00:00:00 2001
From: Ting Chen
Date: Mon, 1 Apr 2024 10:34:11 -0700
Subject: [PATCH 21/50] Add support for phrase search with wildcard and prefix
matching for Lucene indexed tables (#12680)
* Intial commit to support phrase search with regex matching for the terms in the phrase
* Increase max clause limit for SpanOr queries.
* Fix the lint errors.
* Fix lint
* Fix based on comments.
* Fix lint.
* Fix lint
* Remove unused imports.
* Revise based on comments.
---
.../pinot/queries/TextSearchQueriesTest.java | 40 +++++++
.../RealtimeLuceneTextIndex.java | 12 ++-
.../readers/text/LuceneTextIndexReader.java | 14 ++-
.../index/text/TextIndexConfigBuilder.java | 2 +
.../local/utils/LuceneTextIndexUtils.java | 75 +++++++++++++
.../LuceneMutableTextIndexTest.java | 2 +-
.../NativeAndLuceneMutableTextIndexTest.java | 2 +-
.../store/FilePerIndexDirectoryTest.java | 4 +-
.../store/SingleFileIndexDirectoryTest.java | 4 +-
.../local/utils/LuceneTextIndexUtilsTest.java | 100 ++++++++++++++++++
.../segment/spi/index/TextIndexConfig.java | 31 +++++-
.../pinot/spi/config/table/FieldConfig.java | 2 +
12 files changed, 277 insertions(+), 11 deletions(-)
create mode 100644 pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/LuceneTextIndexUtils.java
create mode 100644 pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/LuceneTextIndexUtilsTest.java
diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/TextSearchQueriesTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/TextSearchQueriesTest.java
index df1b8a790fb..217e099003d 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/TextSearchQueriesTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/TextSearchQueriesTest.java
@@ -153,6 +153,7 @@ public void setUp()
props = new HashMap<>();
props.put(FieldConfig.TEXT_INDEX_STOP_WORD_INCLUDE_KEY, "coordinator");
props.put(FieldConfig.TEXT_INDEX_STOP_WORD_EXCLUDE_KEY, "it, those");
+ props.put(FieldConfig.TEXT_INDEX_ENABLE_PREFIX_SUFFIX_PHRASE_QUERIES, "true");
columnProperties.put(SKILLS_TEXT_COL_NAME, props);
props = new HashMap<>();
props.put(FieldConfig.TEXT_INDEX_STOP_WORD_EXCLUDE_KEY, "");
@@ -207,6 +208,7 @@ private void buildSegment()
addTextIndexProp(config, SKILLS_TEXT_COL_NAME, ImmutableMap.builder()
.put(FieldConfig.TEXT_INDEX_STOP_WORD_INCLUDE_KEY, "coordinator")
.put(FieldConfig.TEXT_INDEX_STOP_WORD_EXCLUDE_KEY, "it, those")
+ .put(FieldConfig.TEXT_INDEX_ENABLE_PREFIX_SUFFIX_PHRASE_QUERIES, "true")
.build());
addTextIndexProp(config, SKILLS_TEXT_COL_DICT_NAME,
Collections.singletonMap(FieldConfig.TEXT_INDEX_STOP_WORD_EXCLUDE_KEY, ""));
@@ -280,6 +282,44 @@ private List createTestData()
return rows;
}
+ @Test
+ public void testMultiTermRegexSearch()
+ throws Exception {
+ // Search in SKILLS_TEXT_COL column to look for documents that have the /.*ealtime stream system.*/ regex pattern
+ List expected = new ArrayList<>();
+ expected.add(new Object[]{1010,
+ "Distributed systems, Java, realtime streaming systems, Machine learning, spark, Kubernetes, distributed "
+ + "storage, concurrency, multi-threading"});
+ expected.add(new Object[]{1019,
+ "C++, Java, Python, realtime streaming systems, Machine learning, spark, Kubernetes, transaction processing, "
+ + "distributed storage, concurrency, multi-threading, apache airflow"});
+
+ String query =
+ "SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL, '*ealtime streaming system*') "
+ + "LIMIT 50000";
+ testTextSearchSelectQueryHelper(query, expected.size(), false, expected);
+
+ // Search /*java realtime stream system*, only 1 result left./
+ List expected1 = new ArrayList<>();
+ expected1.add(new Object[]{1010,
+ "Distributed systems, Java, realtime streaming systems, Machine learning, spark, Kubernetes, distributed "
+ + "storage, concurrency, multi-threading"});
+ String query1 =
+ "SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL, '*ava realtime streaming "
+ + "system*') LIMIT 50000";
+ testTextSearchSelectQueryHelper(query1, expected1.size(), false, expected1);
+
+ String query2 =
+ "SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL, '*ava realtime streaming "
+ + "system* AND *chine learn*') LIMIT 50000";
+ testTextSearchSelectQueryHelper(query2, expected1.size(), false, expected1);
+
+ String query3 =
+ "SELECT INT_COL, SKILLS_TEXT_COL FROM MyTable WHERE TEXT_MATCH(SKILLS_TEXT_COL, '*ava realtime streaming "
+ + "system* AND *chine learner*') LIMIT 50000";
+ testTextSearchSelectQueryHelper(query3, 0, false, new ArrayList<>());
+ }
+
/**
* Tests for phrase, term, regex, composite (using AND/OR) text search queries.
* Both selection and aggregation queries are used.
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/RealtimeLuceneTextIndex.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/RealtimeLuceneTextIndex.java
index 2a35b2da60d..a71d2663ed7 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/RealtimeLuceneTextIndex.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/RealtimeLuceneTextIndex.java
@@ -30,6 +30,7 @@
import org.apache.lucene.search.SearcherManager;
import org.apache.pinot.segment.local.indexsegment.mutable.MutableSegmentImpl;
import org.apache.pinot.segment.local.segment.creator.impl.text.LuceneTextIndexCreator;
+import org.apache.pinot.segment.local.utils.LuceneTextIndexUtils;
import org.apache.pinot.segment.spi.index.TextIndexConfig;
import org.apache.pinot.segment.spi.index.mutable.MutableTextIndex;
import org.roaringbitmap.IntIterator;
@@ -53,6 +54,7 @@ public class RealtimeLuceneTextIndex implements MutableTextIndex {
private Analyzer _analyzer;
private final String _column;
private final String _segmentName;
+ private boolean _enablePrefixSuffixMatchingInPhraseQueries = false;
/**
* Created by {@link MutableSegmentImpl}
@@ -80,6 +82,7 @@ public RealtimeLuceneTextIndex(String column, File segmentIndexDir, String segme
IndexWriter indexWriter = _indexCreator.getIndexWriter();
_searcherManager = new SearcherManager(indexWriter, false, false, null);
_analyzer = _indexCreator.getIndexWriter().getConfig().getAnalyzer();
+ _enablePrefixSuffixMatchingInPhraseQueries = config.isEnablePrefixSuffixMatchingInPhraseQueries();
} catch (Exception e) {
LOGGER.error("Failed to instantiate realtime Lucene index reader for column {}, exception {}", column,
e.getMessage());
@@ -119,7 +122,14 @@ public MutableRoaringBitmap getDocIds(String searchQuery) {
Callable searchCallable = () -> {
IndexSearcher indexSearcher = null;
try {
- Query query = new QueryParser(_column, _analyzer).parse(searchQuery);
+ QueryParser parser = new QueryParser(_column, _analyzer);
+ if (_enablePrefixSuffixMatchingInPhraseQueries) {
+ parser.setAllowLeadingWildcard(true);
+ }
+ Query query = parser.parse(searchQuery);
+ if (_enablePrefixSuffixMatchingInPhraseQueries) {
+ query = LuceneTextIndexUtils.convertToMultiTermSpanQuery(query);
+ }
indexSearcher = _searcherManager.acquire();
indexSearcher.search(query, docIDCollector);
return getPinotDocIds(indexSearcher, docIDs);
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/text/LuceneTextIndexReader.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/text/LuceneTextIndexReader.java
index 9b971d51424..3a0efabe8c9 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/text/LuceneTextIndexReader.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/text/LuceneTextIndexReader.java
@@ -40,6 +40,7 @@
import org.apache.pinot.segment.local.segment.index.column.PhysicalColumnIndexContainer;
import org.apache.pinot.segment.local.segment.index.text.TextIndexConfigBuilder;
import org.apache.pinot.segment.local.segment.store.TextIndexUtils;
+import org.apache.pinot.segment.local.utils.LuceneTextIndexUtils;
import org.apache.pinot.segment.spi.V1Constants;
import org.apache.pinot.segment.spi.index.TextIndexConfig;
import org.apache.pinot.segment.spi.index.reader.TextIndexReader;
@@ -66,6 +67,7 @@ public class LuceneTextIndexReader implements TextIndexReader {
private final DocIdTranslator _docIdTranslator;
private final Analyzer _analyzer;
private boolean _useANDForMultiTermQueries = false;
+ private boolean _enablePrefixSuffixMatchingInPhraseQueries = false;
public LuceneTextIndexReader(String column, File indexDir, int numDocs, TextIndexConfig config) {
_column = column;
@@ -82,6 +84,9 @@ public LuceneTextIndexReader(String column, File indexDir, int numDocs, TextInde
if (config.isUseANDForMultiTermQueries()) {
_useANDForMultiTermQueries = true;
}
+ if (config.isEnablePrefixSuffixMatchingInPhraseQueries()) {
+ _enablePrefixSuffixMatchingInPhraseQueries = true;
+ }
// TODO: consider using a threshold of num docs per segment to decide between building
// mapping file upfront on segment load v/s on-the-fly during query processing
_docIdTranslator = new DocIdTranslator(indexDir, _column, numDocs, _indexSearcher);
@@ -150,10 +155,18 @@ public MutableRoaringBitmap getDocIds(String searchQuery) {
// be instantiated per query. Analyzer on the other hand is stateless
// and can be created upfront.
QueryParser parser = new QueryParser(_column, _analyzer);
+ // Phrase search with prefix/suffix matching may have leading *. E.g., `*pache pinot` which can be stripped by
+ // the query parser. To support the feature, we need to explicitly set the config to be true.
+ if (_enablePrefixSuffixMatchingInPhraseQueries) {
+ parser.setAllowLeadingWildcard(true);
+ }
if (_useANDForMultiTermQueries) {
parser.setDefaultOperator(QueryParser.Operator.AND);
}
Query query = parser.parse(searchQuery);
+ if (_enablePrefixSuffixMatchingInPhraseQueries) {
+ query = LuceneTextIndexUtils.convertToMultiTermSpanQuery(query);
+ }
_indexSearcher.search(query, docIDCollector);
return docIds;
} catch (Exception e) {
@@ -162,7 +175,6 @@ public MutableRoaringBitmap getDocIds(String searchQuery) {
throw new RuntimeException(msg, e);
}
}
-
/**
* When we destroy the loaded ImmutableSegment, all the indexes
* (for each column) are destroyed and as part of that
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/TextIndexConfigBuilder.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/TextIndexConfigBuilder.java
index 1c14226c0bb..5d07fb788de 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/TextIndexConfigBuilder.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/TextIndexConfigBuilder.java
@@ -50,6 +50,8 @@ public TextIndexConfig.AbstractBuilder withProperties(@Nullable Map spanQueryLst = new ArrayList<>();
+ boolean prefixOrSuffixQueryFound = false;
+ for (BooleanClause clause : ((BooleanQuery) query).clauses()) {
+ Query q = clause.getQuery();
+ if (q instanceof WildcardQuery || q instanceof PrefixQuery) {
+ prefixOrSuffixQueryFound = true;
+ spanQueryLst.add(new SpanMultiTermQueryWrapper<>((AutomatonQuery) q));
+ } else if (q instanceof TermQuery) {
+ spanQueryLst.add(new SpanTermQuery(((TermQuery) q).getTerm()));
+ } else {
+ LOGGER.info("query can not be handled currently {} ", q);
+ return query;
+ }
+ }
+ if (!prefixOrSuffixQueryFound) {
+ return query;
+ }
+ SpanNearQuery spanNearQuery = new SpanNearQuery(spanQueryLst.toArray(new SpanQuery[0]), 0, true);
+ LOGGER.debug("The phrase query {} is re-written as {}", query, spanNearQuery);
+ return spanNearQuery;
+ }
+}
diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/LuceneMutableTextIndexTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/LuceneMutableTextIndexTest.java
index e8066bb9e7b..c485a3dd39b 100644
--- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/LuceneMutableTextIndexTest.java
+++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/LuceneMutableTextIndexTest.java
@@ -59,7 +59,7 @@ private String[][] getRepeatedData() {
public void setUp()
throws Exception {
TextIndexConfig config =
- new TextIndexConfig(false, null, null, false, false, null, null, true, 500, null);
+ new TextIndexConfig(false, null, null, false, false, null, null, true, 500, null, false);
_realtimeLuceneTextIndex =
new RealtimeLuceneTextIndex(TEXT_COLUMN_NAME, INDEX_DIR, "fooBar", config);
String[][] documents = getTextData();
diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/NativeAndLuceneMutableTextIndexTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/NativeAndLuceneMutableTextIndexTest.java
index 211614b5b2c..ca1c94ceb8f 100644
--- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/NativeAndLuceneMutableTextIndexTest.java
+++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/NativeAndLuceneMutableTextIndexTest.java
@@ -72,7 +72,7 @@ private String[][] getMVTextData() {
public void setUp()
throws Exception {
TextIndexConfig config =
- new TextIndexConfig(false, null, null, false, false, null, null, true, 500, null);
+ new TextIndexConfig(false, null, null, false, false, null, null, true, 500, null, false);
_realtimeLuceneTextIndex =
new RealtimeLuceneTextIndex(TEXT_COLUMN_NAME, INDEX_DIR, "fooBar", config);
_nativeMutableTextIndex = new NativeMutableTextIndex(TEXT_COLUMN_NAME);
diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java
index 61fa9f0319e..f60de6d12d2 100644
--- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java
+++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java
@@ -202,7 +202,7 @@ public void nativeTextIndexIsDeleted()
public void testRemoveTextIndices()
throws IOException {
TextIndexConfig config =
- new TextIndexConfig(false, null, null, false, false, null, null, true, 500, null);
+ new TextIndexConfig(false, null, null, false, false, null, null, true, 500, null, false);
try (FilePerIndexDirectory fpi = new FilePerIndexDirectory(TEMP_DIR, _segmentMetadata, ReadMode.mmap);
LuceneTextIndexCreator fooCreator = new LuceneTextIndexCreator("foo", TEMP_DIR, true, config);
LuceneTextIndexCreator barCreator = new LuceneTextIndexCreator("bar", TEMP_DIR, true, config)) {
@@ -265,7 +265,7 @@ public void testRemoveTextIndices()
public void testGetColumnIndices()
throws IOException {
TextIndexConfig config =
- new TextIndexConfig(false, null, null, false, false, null, null, true, 500, null);
+ new TextIndexConfig(false, null, null, false, false, null, null, true, 500, null, false);
// Write sth to buffers and flush them to index files on disk
try (FilePerIndexDirectory fpi = new FilePerIndexDirectory(TEMP_DIR, _segmentMetadata, ReadMode.mmap);
LuceneTextIndexCreator fooCreator = new LuceneTextIndexCreator("foo", TEMP_DIR, true, config);
diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java
index 2a03044abe6..28494666362 100644
--- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java
+++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java
@@ -235,7 +235,7 @@ public void testCleanupRemovedIndices()
public void testRemoveTextIndices()
throws IOException, ConfigurationException {
TextIndexConfig config =
- new TextIndexConfig(false, null, null, false, false, null, null, true, 500, null);
+ new TextIndexConfig(false, null, null, false, false, null, null, true, 500, null, false);
try (SingleFileIndexDirectory sfd = new SingleFileIndexDirectory(TEMP_DIR, _segmentMetadata, ReadMode.mmap);
LuceneTextIndexCreator fooCreator = new LuceneTextIndexCreator("foo", TEMP_DIR, true, config);
LuceneTextIndexCreator barCreator = new LuceneTextIndexCreator("bar", TEMP_DIR, true, config)) {
@@ -341,7 +341,7 @@ public void testPersistIndexMaps() {
public void testGetColumnIndices()
throws Exception {
TextIndexConfig config =
- new TextIndexConfig(false, null, null, false, false, null, null, true, 500, null);
+ new TextIndexConfig(false, null, null, false, false, null, null, true, 500, null, false);
try (SingleFileIndexDirectory sfd = new SingleFileIndexDirectory(TEMP_DIR, _segmentMetadata, ReadMode.mmap);
LuceneTextIndexCreator fooCreator = new LuceneTextIndexCreator("foo", TEMP_DIR, true, config);
LuceneTextIndexCreator barCreator = new LuceneTextIndexCreator("bar", TEMP_DIR, true, config)) {
diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/LuceneTextIndexUtilsTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/LuceneTextIndexUtilsTest.java
new file mode 100644
index 00000000000..004c3ed04c7
--- /dev/null
+++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/LuceneTextIndexUtilsTest.java
@@ -0,0 +1,100 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.utils;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.spans.SpanMultiTermQueryWrapper;
+import org.apache.lucene.queries.spans.SpanNearQuery;
+import org.apache.lucene.queries.spans.SpanQuery;
+import org.apache.lucene.queries.spans.SpanTermQuery;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.PrefixQuery;
+import org.apache.lucene.search.RegexpQuery;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.WildcardQuery;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+
+public class LuceneTextIndexUtilsTest {
+ @Test
+ public void testBooleanQueryRewrittenToSpanQuery() {
+ // Test 1: The input is a boolean query with 2 clauses: "*pache pino*"
+ BooleanQuery.Builder builder = new BooleanQuery.Builder();
+ WildcardQuery wildcardQuery = new WildcardQuery(new Term("field", "*apche"));
+ PrefixQuery prefixQuery = new PrefixQuery(new Term("field", "pino"));
+ builder.add(new BooleanClause(wildcardQuery, BooleanClause.Occur.SHOULD))
+ .add(new BooleanClause(prefixQuery, BooleanClause.Occur.SHOULD));
+
+ SpanQuery[] spanQueries1 =
+ {new SpanMultiTermQueryWrapper<>(wildcardQuery), new SpanMultiTermQueryWrapper<>(prefixQuery)};
+ SpanQuery expectedQuery = new SpanNearQuery(spanQueries1, 0, true);
+ Assert.assertEquals(expectedQuery, LuceneTextIndexUtils.convertToMultiTermSpanQuery(builder.build()));
+
+ // Test 2: The input is a boolean query with 3 clauses: "*pache real pino*"
+ builder = new BooleanQuery.Builder();
+ Term term = new Term("field", "real");
+ builder.add(new BooleanClause(wildcardQuery, BooleanClause.Occur.SHOULD))
+ .add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.SHOULD))
+ .add(new BooleanClause(prefixQuery, BooleanClause.Occur.SHOULD));
+
+ SpanQuery[] spanQueries2 =
+ {new SpanMultiTermQueryWrapper<>(wildcardQuery), new SpanTermQuery(term), new SpanMultiTermQueryWrapper<>(
+ prefixQuery)};
+ expectedQuery = new SpanNearQuery(spanQueries2, 0, true);
+ Assert.assertEquals(expectedQuery, LuceneTextIndexUtils.convertToMultiTermSpanQuery(builder.build()));
+
+ // Test 3: The input is a boolean query with 3 clauses: "*pache real* pino*"
+ builder = new BooleanQuery.Builder();
+ builder.add(new BooleanClause(wildcardQuery, BooleanClause.Occur.SHOULD))
+ .add(new BooleanClause(prefixQuery, BooleanClause.Occur.SHOULD))
+ .add(new BooleanClause(prefixQuery, BooleanClause.Occur.SHOULD));
+
+ SpanQuery[] spanQueries3 = {new SpanMultiTermQueryWrapper<>(wildcardQuery), new SpanMultiTermQueryWrapper<>(
+ prefixQuery), new SpanMultiTermQueryWrapper<>(prefixQuery)};
+ expectedQuery = new SpanNearQuery(spanQueries3, 0, true);
+ Assert.assertEquals(expectedQuery, LuceneTextIndexUtils.convertToMultiTermSpanQuery(builder.build()));
+
+ // Test 4: The input is a boolean query with 1 clause: "*pino*".
+ WildcardQuery wildcardQuery1 = new WildcardQuery(new Term("field", "*pino*"));
+ builder = new BooleanQuery.Builder();
+ builder.add(new BooleanClause(wildcardQuery1, BooleanClause.Occur.SHOULD));
+ SpanQuery[] spanQueries4 = {new SpanMultiTermQueryWrapper<>(wildcardQuery1)};
+ expectedQuery = new SpanNearQuery(spanQueries4, 0, true);
+ Assert.assertEquals(expectedQuery, LuceneTextIndexUtils.convertToMultiTermSpanQuery(builder.build()));
+
+ // Test 5: Boolean queries without any wildcard/prefix subqueries are left unchanged.
+ builder = new BooleanQuery.Builder();
+ builder.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.SHOULD))
+ .add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.SHOULD));
+ BooleanQuery q = builder.build();
+ Assert.assertEquals(q, LuceneTextIndexUtils.convertToMultiTermSpanQuery(q));
+ }
+
+ @Test
+ public void testQueryIsNotRewritten() {
+ // Test 1: Term query is not re-written.
+ TermQuery termQuery = new TermQuery(new Term("field", "real"));
+ Assert.assertEquals(termQuery, LuceneTextIndexUtils.convertToMultiTermSpanQuery(termQuery));
+ // Test 2: Regex query is not re-written.
+ RegexpQuery regexpQuery = new RegexpQuery(new Term("field", "\\d+"));
+ Assert.assertEquals(regexpQuery, LuceneTextIndexUtils.convertToMultiTermSpanQuery(regexpQuery));
+ }
+}
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/TextIndexConfig.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/TextIndexConfig.java
index 0b31e70e1ef..afbf7eb876a 100644
--- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/TextIndexConfig.java
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/TextIndexConfig.java
@@ -37,7 +37,8 @@ public class TextIndexConfig extends IndexConfig {
private static final boolean LUCENE_INDEX_DEFAULT_USE_COMPOUND_FILE = true;
public static final TextIndexConfig DISABLED =
new TextIndexConfig(true, null, null, false, false, Collections.emptyList(), Collections.emptyList(), false,
- LUCENE_INDEX_DEFAULT_MAX_BUFFER_SIZE_MB, null);
+ LUCENE_INDEX_DEFAULT_MAX_BUFFER_SIZE_MB, null, false);
+ private static final boolean LUCENE_INDEX_ENABLE_PREFIX_SUFFIX_MATCH_IN_PHRASE_SEARCH = false;
private final FSTType _fstType;
@Nullable
private final Object _rawValueForTextIndex;
@@ -48,6 +49,7 @@ public class TextIndexConfig extends IndexConfig {
private final boolean _luceneUseCompoundFile;
private final int _luceneMaxBufferSizeMB;
private final String _luceneAnalyzerClass;
+ private final boolean _enablePrefixSuffixMatchingInPhraseQueries;
@JsonCreator
public TextIndexConfig(@JsonProperty("disabled") Boolean disabled, @JsonProperty("fst") FSTType fstType,
@@ -58,7 +60,8 @@ public TextIndexConfig(@JsonProperty("disabled") Boolean disabled, @JsonProperty
@JsonProperty("stopWordsExclude") List stopWordsExclude,
@JsonProperty("luceneUseCompoundFile") Boolean luceneUseCompoundFile,
@JsonProperty("luceneMaxBufferSizeMB") Integer luceneMaxBufferSizeMB,
- @JsonProperty("luceneAnalyzerClass") String luceneAnalyzerClass) {
+ @JsonProperty("luceneAnalyzerClass") String luceneAnalyzerClass,
+ @JsonProperty("enablePrefixSuffixMatchingInPhraseQueries") Boolean enablePrefixSuffixMatchingInPhraseQueries) {
super(disabled);
_fstType = fstType;
_rawValueForTextIndex = rawValueForTextIndex;
@@ -72,6 +75,9 @@ public TextIndexConfig(@JsonProperty("disabled") Boolean disabled, @JsonProperty
luceneMaxBufferSizeMB == null ? LUCENE_INDEX_DEFAULT_MAX_BUFFER_SIZE_MB : luceneMaxBufferSizeMB;
_luceneAnalyzerClass = (luceneAnalyzerClass == null || luceneAnalyzerClass.isEmpty())
? FieldConfig.TEXT_INDEX_DEFAULT_LUCENE_ANALYZER_CLASS : luceneAnalyzerClass;
+ _enablePrefixSuffixMatchingInPhraseQueries =
+ enablePrefixSuffixMatchingInPhraseQueries == null ? LUCENE_INDEX_ENABLE_PREFIX_SUFFIX_MATCH_IN_PHRASE_SEARCH
+ : enablePrefixSuffixMatchingInPhraseQueries;
}
public FSTType getFstType() {
@@ -125,6 +131,16 @@ public String getLuceneAnalyzerClass() {
return _luceneAnalyzerClass;
}
+ /**
+ * Whether to enable prefix and suffix wildcard term matching (i.e., .*value for prefix and value.* for suffix
+ * term matching) in a phrase query. By default, Pinot today treats .* in a phrase query like ".*value str1 value.*"
+ * as literal. If this flag is enabled, .*value will be treated as suffix matching and value.* will be treated as
+ * prefix matching.
+ */
+ public boolean isEnablePrefixSuffixMatchingInPhraseQueries() {
+ return _enablePrefixSuffixMatchingInPhraseQueries;
+ }
+
public static abstract class AbstractBuilder {
@Nullable
protected FSTType _fstType;
@@ -137,6 +153,7 @@ public static abstract class AbstractBuilder {
protected boolean _luceneUseCompoundFile = LUCENE_INDEX_DEFAULT_USE_COMPOUND_FILE;
protected int _luceneMaxBufferSizeMB = LUCENE_INDEX_DEFAULT_MAX_BUFFER_SIZE_MB;
protected String _luceneAnalyzerClass = FieldConfig.TEXT_INDEX_DEFAULT_LUCENE_ANALYZER_CLASS;
+ protected boolean _enablePrefixSuffixMatchingInPhraseQueries = false;
public AbstractBuilder(@Nullable FSTType fstType) {
_fstType = fstType;
@@ -151,11 +168,13 @@ public AbstractBuilder(TextIndexConfig other) {
_luceneUseCompoundFile = other._luceneUseCompoundFile;
_luceneMaxBufferSizeMB = other._luceneMaxBufferSizeMB;
_luceneAnalyzerClass = other._luceneAnalyzerClass;
+ _enablePrefixSuffixMatchingInPhraseQueries = other._enablePrefixSuffixMatchingInPhraseQueries;
}
public TextIndexConfig build() {
return new TextIndexConfig(false, _fstType, _rawValueForTextIndex, _enableQueryCache, _useANDForMultiTermQueries,
- _stopWordsInclude, _stopWordsExclude, _luceneUseCompoundFile, _luceneMaxBufferSizeMB, _luceneAnalyzerClass);
+ _stopWordsInclude, _stopWordsExclude, _luceneUseCompoundFile, _luceneMaxBufferSizeMB, _luceneAnalyzerClass,
+ _enablePrefixSuffixMatchingInPhraseQueries);
}
public abstract AbstractBuilder withProperties(@Nullable Map textIndexProperties);
@@ -189,6 +208,12 @@ public AbstractBuilder withLuceneAnalyzerClass(String luceneAnalyzerClass) {
_luceneAnalyzerClass = luceneAnalyzerClass;
return this;
}
+
+ public AbstractBuilder withEnablePrefixSuffixMatchingInPhraseQueries(
+ boolean enablePrefixSuffixMatchingInPhraseQueries) {
+ _enablePrefixSuffixMatchingInPhraseQueries = enablePrefixSuffixMatchingInPhraseQueries;
+ return this;
+ }
}
@Override
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/FieldConfig.java b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/FieldConfig.java
index 201edeb39aa..8a01646da99 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/FieldConfig.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/FieldConfig.java
@@ -61,6 +61,8 @@ public class FieldConfig extends BaseJsonConfig {
// Config to disable forward index
public static final String FORWARD_INDEX_DISABLED = "forwardIndexDisabled";
public static final String DEFAULT_FORWARD_INDEX_DISABLED = Boolean.FALSE.toString();
+ public static final String TEXT_INDEX_ENABLE_PREFIX_SUFFIX_PHRASE_QUERIES =
+ "enablePrefixSuffixMatchingInPhraseQueries";
private final String _name;
private final EncodingType _encodingType;
From e71c3dc0233f086425f5a3bcc52e84ee855d7762 Mon Sep 17 00:00:00 2001
From: sullis
Date: Mon, 1 Apr 2024 11:43:56 -0700
Subject: [PATCH 22/50] upgrade to slf4j 2.0.12 (#12761)
---
pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pom.xml b/pom.xml
index fc4f5c7648a..35f6de5315f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -156,7 +156,7 @@
1.5.5-111.8.02.23.1
- 2.0.9
+ 2.0.124.1.108.Final1.0.41.19.0
From 4f0bc11b95fa26df3fac8efe9845e4d71f7351dd Mon Sep 17 00:00:00 2001
From: "Xiaotian (Jackie) Jiang"
<17555551+Jackie-Jiang@users.noreply.github.com>
Date: Mon, 1 Apr 2024 15:05:25 -0700
Subject: [PATCH 23/50] Cleanup Javax and Jakarta dependencies (#12760)
---
pinot-broker/pom.xml | 35 ---
pinot-common/pom.xml | 37 ++-
pinot-compatibility-verifier/pom.xml | 6 -
.../pinot-flink-connector/pom.xml | 8 -
.../pinot-spark-3-connector/pom.xml | 15 -
pinot-controller/pom.xml | 45 +--
pinot-core/pom.xml | 79 +----
pinot-distribution/pom.xml | 8 -
pinot-integration-test-base/pom.xml | 4 -
pinot-integration-tests/pom.xml | 12 -
pinot-minion/pom.xml | 4 -
.../pinot-batch-ingestion-spark-2.4/pom.xml | 16 +-
.../pinot-batch-ingestion-spark-3/pom.xml | 16 +-
.../pinot-file-system/pinot-s3/pom.xml | 6 -
.../pinot-kafka-2.0/pom.xml | 4 -
.../pinot-pulsar/pom.xml | 49 +---
.../SerializedFrequentLongsSketch.java | 3 +-
.../SerializedFrequentStringsSketch.java | 3 +-
pinot-server/pom.xml | 63 ----
pinot-spi/pom.xml | 26 ++
pinot-tools/pom.xml | 12 -
pom.xml | 275 +++++++++++-------
22 files changed, 233 insertions(+), 493 deletions(-)
diff --git a/pinot-broker/pom.xml b/pinot-broker/pom.xml
index ccc20628d42..f3fd18485a3 100644
--- a/pinot-broker/pom.xml
+++ b/pinot-broker/pom.xml
@@ -48,45 +48,10 @@
pinot-query-runtime
-
-
- org.glassfish.jersey.containers
- jersey-container-grizzly2-http
-
-
- org.glassfish.jersey.inject
- jersey-hk2
-
-
- org.glassfish.jersey.media
- jersey-media-json-jackson
-
-
- org.glassfish.jersey.core
- jersey-common
-
-
- io.swagger
- swagger-jaxrs
-
-
- javax.ws.rs
- jsr311-api
-
-
-
-
- io.swagger
- swagger-jersey2-jaxrs
- com.jcabijcabi-log
-
- org.glassfish.hk2
- hk2-locator
- com.fasterxml.jackson.corejackson-databind
diff --git a/pinot-common/pom.xml b/pinot-common/pom.xml
index f9a3f64c415..396a7d676fa 100644
--- a/pinot-common/pom.xml
+++ b/pinot-common/pom.xml
@@ -217,6 +217,34 @@
org.apache.calcitecalcite-babel
+
+ org.glassfish.jersey.core
+ jersey-server
+
+
+ org.glassfish.jersey.containers
+ jersey-container-grizzly2-http
+
+
+ org.glassfish.jersey.media
+ jersey-media-multipart
+
+
+ org.glassfish.jersey.media
+ jersey-media-json-jackson
+
+
+ org.glassfish.jersey.inject
+ jersey-hk2
+
+
+ org.glassfish.hk2
+ hk2-metadata-generator
+
+
+ io.swagger
+ swagger-jersey2-jaxrs
+ org.testngtestng
@@ -312,15 +340,6 @@
org.apache.zookeeperzookeeper
-
- javax.servlet
- javax.servlet-api
- compile
-
-
- org.glassfish.jersey.core
- jersey-server
- org.reflectionsreflections
diff --git a/pinot-compatibility-verifier/pom.xml b/pinot-compatibility-verifier/pom.xml
index e93397675da..cbccbda4185 100644
--- a/pinot-compatibility-verifier/pom.xml
+++ b/pinot-compatibility-verifier/pom.xml
@@ -94,12 +94,6 @@
pinot-controller${project.version}test-jar
-
-
- jakarta.activation
- jakarta.activation-api
-
-
diff --git a/pinot-connectors/pinot-flink-connector/pom.xml b/pinot-connectors/pinot-flink-connector/pom.xml
index 5a0e15e26e3..a69cf4ad986 100644
--- a/pinot-connectors/pinot-flink-connector/pom.xml
+++ b/pinot-connectors/pinot-flink-connector/pom.xml
@@ -41,14 +41,6 @@
com.google.guavaguava
-
- org.glassfish.jersey.inject
- jersey-hk2
-
-
- org.glassfish.jersey.media
- jersey-media-json-jackson
- org.apache.flinkflink-streaming-java_${scala.compat.version}
diff --git a/pinot-connectors/pinot-spark-3-connector/pom.xml b/pinot-connectors/pinot-spark-3-connector/pom.xml
index 6e78532fc28..6e53637cb27 100644
--- a/pinot-connectors/pinot-spark-3-connector/pom.xml
+++ b/pinot-connectors/pinot-spark-3-connector/pom.xml
@@ -200,24 +200,9 @@
-
- org.apache.pinot
- pinot-common
-
-
- org.apache.pinot
- pinot-core
- org.apache.pinotpinot-spark-common
-
- test
- javax.servlet
- javax.servlet-api
- 3.0.1
-
-
diff --git a/pinot-controller/pom.xml b/pinot-controller/pom.xml
index b2b52171a06..e0c750631c8 100644
--- a/pinot-controller/pom.xml
+++ b/pinot-controller/pom.xml
@@ -55,12 +55,6 @@
org.apache.pinotpinot-servertest
-
-
- javax.servlet
- servlet-api
-
- org.apache.pinot
@@ -94,34 +88,7 @@
test-jartest
-
- org.glassfish.jersey.containers
- jersey-container-grizzly2-http
-
-
- org.glassfish.jersey.core
- jersey-server
-
-
- org.glassfish.jersey.inject
- jersey-hk2
-
-
- org.glassfish.jersey.media
- jersey-media-multipart
-
-
- org.glassfish.jersey.core
- jersey-common
-
-
- org.glassfish.jersey.media
- jersey-media-json-jackson
-
-
- io.swagger
- swagger-jersey2-jaxrs
-
+
com.fasterxml.jackson.corejackson-annotations
@@ -134,16 +101,6 @@
com.fasterxml.jackson.corejackson-core
-
- io.swagger
- swagger-jaxrs
-
-
- javax.ws.rs
- jsr311-api
-
-
- org.slf4jjcl-over-slf4j
diff --git a/pinot-core/pom.xml b/pinot-core/pom.xml
index d782c39659a..bd6217c2410 100644
--- a/pinot-core/pom.xml
+++ b/pinot-core/pom.xml
@@ -36,18 +36,6 @@
-
- com.yscope.clp
- clp-ffi
-
-
- com.uber
- h3
-
-
- org.roaringbitmap
- RoaringBitmap
- org.apache.pinotpinot-spi
@@ -64,24 +52,7 @@
org.apache.pinotpinot-common
-
- joda-time
- joda-time
-
-
- org.slf4j
- jcl-over-slf4j
-
-
+
io.nettynetty-transport-native-epoll
@@ -126,54 +97,6 @@
io.nettynetty-all
-
- org.slf4j
- slf4j-api
-
-
- com.clearspring.analytics
- stream
-
-
- org.apache.datasketches
- datasketches-java
-
-
- com.dynatrace.hash4j
- hash4j
-
-
- com.tdunning
- t-digest
-
-
- org.xerial.larray
- larray-mmap
-
-
- net.sf.jopt-simple
- jopt-simple
-
-
- com.jayway.jsonpath
- json-path
-
-
- org.locationtech.jts
- jts-core
-
-
- org.glassfish.jersey.containers
- jersey-container-grizzly2-http
-
-
- org.glassfish.grizzly
- grizzly-http-server
-
-
- org.glassfish.hk2
- hk2-locator
-
diff --git a/pinot-distribution/pom.xml b/pinot-distribution/pom.xml
index a9bb9f5ecd8..540420cc22e 100644
--- a/pinot-distribution/pom.xml
+++ b/pinot-distribution/pom.xml
@@ -114,14 +114,6 @@
-
- javax.servlet
- javax.servlet-api
-
-
- javax.activation
- activation
-
diff --git a/pinot-integration-test-base/pom.xml b/pinot-integration-test-base/pom.xml
index 6f07a8f5d4d..1aba58b3fd8 100644
--- a/pinot-integration-test-base/pom.xml
+++ b/pinot-integration-test-base/pom.xml
@@ -149,10 +149,6 @@
org.testngtestng
-
- javax.servlet
- javax.servlet-api
- com.h2databaseh2
diff --git a/pinot-integration-tests/pom.xml b/pinot-integration-tests/pom.xml
index 3280f652cab..995e423c666 100644
--- a/pinot-integration-tests/pom.xml
+++ b/pinot-integration-tests/pom.xml
@@ -202,14 +202,6 @@
com.101teczkclient
-
- org.glassfish.hk2
- hk2-locator
-
-
- org.glassfish.hk2
- hk2-metadata-generator
- org.apache.pinotpinot-server
@@ -330,10 +322,6 @@
org.testngtestng
-
- javax.servlet
- javax.servlet-api
- com.h2databaseh2
diff --git a/pinot-minion/pom.xml b/pinot-minion/pom.xml
index bdb7ed1b46a..ffd11f307b0 100644
--- a/pinot-minion/pom.xml
+++ b/pinot-minion/pom.xml
@@ -76,9 +76,5 @@
pinot-yammertest
-
- io.swagger
- swagger-jersey2-jaxrs
-
diff --git a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-2.4/pom.xml b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-2.4/pom.xml
index 07cc979ccfa..45568ae319a 100644
--- a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-2.4/pom.xml
+++ b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-2.4/pom.xml
@@ -54,8 +54,8 @@
provided
- org.scala-lang
- scala-library
+ com.zaxxer
+ HikariCP-java7com.twitter
@@ -77,18 +77,6 @@
org.slf4jslf4j-log4j12
-
- com.zaxxer
- HikariCP-java7
-
-
- org.glassfish.hk2.external
- jakarta.inject
-
-
- jakarta.ws.rs
- jakarta.ws.rs-api
-
diff --git a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-3/pom.xml b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-3/pom.xml
index f50b384ee11..fd36cd868ce 100644
--- a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-3/pom.xml
+++ b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-3/pom.xml
@@ -52,8 +52,8 @@
provided
- org.scala-lang
- scala-library
+ com.zaxxer
+ HikariCP-java7com.twitter
@@ -75,18 +75,6 @@
org.slf4jslf4j-log4j12
-
- com.zaxxer
- HikariCP-java7
-
-
- org.glassfish.hk2.external
- jakarta.inject
-
-
- jakarta.ws.rs
- jakarta.ws.rs-api
- commons-loggingcommons-logging
diff --git a/pinot-plugins/pinot-file-system/pinot-s3/pom.xml b/pinot-plugins/pinot-file-system/pinot-s3/pom.xml
index 428138fc28a..b0322f92b07 100644
--- a/pinot-plugins/pinot-file-system/pinot-s3/pom.xml
+++ b/pinot-plugins/pinot-file-system/pinot-s3/pom.xml
@@ -38,7 +38,6 @@
4.5.144.4.132.12.2
- 3.1.0package
@@ -135,11 +134,6 @@
${s3mock.version}test
-
- javax.servlet
- javax.servlet-api
- ${javax.version}
- com.fasterxml.woodstoxwoodstox-core
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/pom.xml b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/pom.xml
index 71567e099df..533e18f90e2 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/pom.xml
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/pom.xml
@@ -59,10 +59,6 @@
net.sf.jopt-simplejopt-simple
-
- org.scala-lang
- scala-library
-
diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml
index 4ccd6b1d0c3..e9d4696e71f 100644
--- a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml
+++ b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml
@@ -37,16 +37,11 @@
package${basedir}/../../..
- 3.1.0
- 2.1
- 2.390.16.0
- 2.29.01.60.1
+ 1.62.22.6.21.17
- 1.2
- 1.62.2
@@ -60,10 +55,6 @@
org.apache.pulsarpulsar-client-original
-
- javax.ws.rs
- javax.ws.rs-api
- commons-configurationcommons-configuration
@@ -87,29 +78,16 @@
pulsar-client-admin-original
- javax.servlet
- javax.servlet-api
- ${javax.servlet-api.version}
-
-
- javax.ws.rs
- javax.ws.rs-api
- ${javax.ws.rs-api.version}
+ org.glassfish.jersey.core
+ jersey-serverorg.glassfish.jersey.containersjersey-container-grizzly2-http
- ${jersey-container-grizzly2-http.version}
-
-
- org.glassfish.jersey.core
- jersey-server
- ${jersey-container-grizzly2-http.version}org.glassfish.jersey.containersjersey-container-servlet-core
- ${jersey-container-grizzly2-http.version}io.netty
@@ -129,6 +107,11 @@
grpc-context${grpc-context.version}
+
+ io.grpc
+ grpc-protobuf-lite
+ ${grpc-protobuf-lite.version}
+ io.prometheussimpleclient
@@ -151,22 +134,6 @@
simpleclient_hotspot${simpleclient_common.version}
-
- io.grpc
- grpc-protobuf-lite
- ${grpc-protobuf-lite.version}
-
-
- io.grpc
- grpc-context
-
-
-
-
- javax.annotation
- javax.annotation-api
- ${javax.annotation-api.version}
- org.codehaus.mojoanimal-sniffer-annotations
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/customobject/SerializedFrequentLongsSketch.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/customobject/SerializedFrequentLongsSketch.java
index 53124e473b3..203515c67bb 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/customobject/SerializedFrequentLongsSketch.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/customobject/SerializedFrequentLongsSketch.java
@@ -19,7 +19,6 @@
package org.apache.pinot.segment.local.customobject;
import java.util.Base64;
-import javax.validation.constraints.NotNull;
import org.apache.datasketches.frequencies.LongsSketch;
@@ -31,7 +30,7 @@ public SerializedFrequentLongsSketch(LongsSketch sketch) {
}
@Override
- public int compareTo(@NotNull LongsSketch other) {
+ public int compareTo(LongsSketch other) {
// There is no well-defined ordering for these sketches
// numActiveItems is just a placeholder, which can be changed later
return _sketch.getNumActiveItems() - other.getNumActiveItems();
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/customobject/SerializedFrequentStringsSketch.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/customobject/SerializedFrequentStringsSketch.java
index 40f89bc83df..040692a553f 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/customobject/SerializedFrequentStringsSketch.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/customobject/SerializedFrequentStringsSketch.java
@@ -19,7 +19,6 @@
package org.apache.pinot.segment.local.customobject;
import java.util.Base64;
-import javax.validation.constraints.NotNull;
import org.apache.datasketches.common.ArrayOfStringsSerDe;
import org.apache.datasketches.frequencies.ItemsSketch;
@@ -31,7 +30,7 @@ public SerializedFrequentStringsSketch(ItemsSketch sketch) {
}
@Override
- public int compareTo(@NotNull ItemsSketch other) {
+ public int compareTo(ItemsSketch other) {
// There is no well-defined ordering for these sketches
// numActiveItems is just a placeholder, which can be changed later
return _sketch.getNumActiveItems() - other.getNumActiveItems();
diff --git a/pinot-server/pom.xml b/pinot-server/pom.xml
index 39c95e01a2e..b39ec13324d 100644
--- a/pinot-server/pom.xml
+++ b/pinot-server/pom.xml
@@ -74,11 +74,6 @@
testngtest
-
- javax.servlet
- javax.servlet-api
- compile
- org.apache.helixhelix-core
@@ -91,54 +86,6 @@
com.jcabijcabi-log
-
- org.glassfish.jersey.containers
- jersey-container-grizzly2-http
-
-
- org.glassfish.jersey.core
- jersey-server
-
-
- org.javassist
- javassist
-
-
-
-
- org.javassist
- javassist
-
-
- org.glassfish.jersey.core
- jersey-common
-
-
- org.glassfish.jersey.inject
- jersey-hk2
-
-
- org.glassfish.jersey.media
- jersey-media-json-jackson
-
-
- io.swagger
- swagger-jersey2-jaxrs
-
-
- javax.ws.rs
- javax.ws.rs-api
-
-
- org.glassfish.hk2.external
- javax.inject
-
-
-
-
- javax.ws.rs
- javax.ws.rs-api
- com.fasterxml.jackson.corejackson-annotations
@@ -151,16 +98,6 @@
com.fasterxml.jackson.corejackson-databind
-
- io.swagger
- swagger-jaxrs
-
-
- javax.ws.rs
- jsr311-api
-
-
- org.webjarsswagger-ui
diff --git a/pinot-spi/pom.xml b/pinot-spi/pom.xml
index 99ee1929054..d1c76c9e888 100644
--- a/pinot-spi/pom.xml
+++ b/pinot-spi/pom.xml
@@ -90,6 +90,32 @@
commons-math
+
+
+ jakarta.servlet
+ jakarta.servlet-api
+
+
+ jakarta.ws.rs
+ jakarta.ws.rs-api
+
+
+
+
+ javax.servlet
+ javax.servlet-api
+
+
+
+ javax.ws.rs
+ javax.ws.rs-api
+
+
+
+ javax.annotation
+ javax.annotation-api
+
+
org.slf4jjcl-over-slf4j
diff --git a/pinot-tools/pom.xml b/pinot-tools/pom.xml
index a47c828f7b4..115a79d6d26 100644
--- a/pinot-tools/pom.xml
+++ b/pinot-tools/pom.xml
@@ -151,10 +151,6 @@
com.google.errorproneerror_prone_annotations
-
- javax.annotation
- javax.annotation-api
- org.codehaus.mojoanimal-sniffer-annotations
@@ -179,10 +175,6 @@
io.grpcgrpc-context
-
- jakarta.activation
- jakarta.activation-api
- com.typesafe.nettynetty-reactive-streams
@@ -229,10 +221,6 @@
testngtest
-
- org.glassfish.jersey.containers
- jersey-container-grizzly2-http
- org.glassfish.tyrus.bundlestyrus-standalone-client
diff --git a/pom.xml b/pom.xml
index 35f6de5315f..bf687489111 100644
--- a/pom.xml
+++ b/pom.xml
@@ -134,13 +134,13 @@
1.11.31.13.1
+ 1.5.91.3.10.112.12.7.202210123.9.22.12.32.39
- 2.4.42.6.11.6.93.3.6
@@ -189,6 +189,25 @@
3.2.22.2
+
+ 6.0.0
+ 3.0.2
+ 2.1.1
+ 4.0.2
+ 3.1.0
+ 2.1.3
+ 3.1.1
+
+ 4.0.1
+ 2.0.1.Final
+ 1.3.2
+ 2.3.1
+ 1.0-2
+ 2.1.1
+ 1.1.1
+ 1.1.1
+ 2.2
+
26.34.01.23.0
@@ -496,11 +515,26 @@
avro${avro.version}
+
+ org.apache.avro
+ avro-mapred
+ ${avro.version}
+ org.apache.parquetparquet-avro${parquet.version}
+
+ org.apache.orc
+ orc-core
+ ${orc.version}
+
+
+ org.apache.orc
+ orc-mapreduce
+ ${orc.version}
+ org.xerial.snappysnappy-java
@@ -521,32 +555,11 @@
libthrift0.15.0
-
- javax.servlet
- javax.servlet-api
- 3.0.1
- compile
-
-
- javax.ws.rs
- javax.ws.rs-api
- 2.0.1
- org.quartz-schedulerquartz${quartz.version}
-
- javax.validation
- validation-api
- 2.0.1.Final
-
-
- javax.activation
- activation
- 1.1.1
- org.apache.helixhelix-core
@@ -743,6 +756,89 @@
${commons-math.version}
+
+
+ jakarta.servlet
+ jakarta.servlet-api
+ ${jakarta.servlet-api.version}
+
+
+ jakarta.validation
+ jakarta.validation-api
+ ${jakarta.validation-api.version}
+
+
+ jakarta.annotation
+ jakarta.annotation-api
+ ${jakarta.annotation-api.version}
+
+
+ jakarta.xml.bind
+ jakarta.xml.bind-api
+ ${jakarta.xml.bind-api.version}
+
+
+ jakarta.ws.rs
+ jakarta.ws.rs-api
+ ${jakarta.ws.rs-api.version}
+
+
+ jakarta.activation
+ jakarta.activation-api
+ ${jakarta.activation-api.version}
+
+
+ jakarta.servlet.jsp
+ jakarta.servlet.jsp-api
+ ${jakarta.servlet.jsp-api.version}
+
+
+
+ javax.servlet
+ javax.servlet-api
+ ${javax.servlet-api.version}
+
+
+ javax.validation
+ validation-api
+ ${javax.validation-api.version}
+
+
+ javax.annotation
+ javax.annotation-api
+ ${javax.annotation-api.version}
+
+
+ javax.xml.bind
+ jaxb-api
+ ${javax.jaxb-api.version}
+
+
+ javax.xml.bind
+ stax-api
+ ${javax.stax-api.version}
+
+
+ javax.ws.rs
+ javax.ws.rs-api
+ ${javax.ws.rs-api.version}
+
+
+ javax.ws.rs
+ jsr311-api
+ ${javax.jsr311-api.version}
+
+
+ javax.activation
+ activation
+ ${javax.activation.version}
+
+
+ javax.servlet.jsp
+ javax.servlet.jsp-api
+ ${javax.jsp-api.version}
+
+
com.google.cloud
@@ -956,6 +1052,22 @@
org.eclipse.jettyjetty-util
+
+ com.sun.jersey
+ jersey-core
+
+
+ com.sun.jersey
+ jersey-client
+
+
+ com.sun.jersey
+ jersey-server
+
+
+ com.sun.jersey.contribs
+ jersey-guice
+ commons-loggingcommons-logging
@@ -978,12 +1090,6 @@
reload4j1.2.25
-
-
- javax.xml.bind
- jaxb-api
- 2.3.1
- org.apache.kerby
@@ -1026,12 +1132,6 @@
org.eclipse.jettyjetty-server${eclipse.jetty.version}
-
-
- javax.servlet
- javax.servlet-api
-
- org.eclipse.jetty
@@ -1088,21 +1188,6 @@
${dropwizard-metrics.version}
-
- org.apache.orc
- orc-core
- 1.5.9
-
-
- org.apache.avro
- avro-mapred
- ${avro.version}
-
-
- org.apache.orc
- orc-mapreduce
- 1.5.9
- org.webjarsswagger-ui
@@ -1181,35 +1266,16 @@
jcabi-log0.24.1
-
- org.glassfish.jersey.containers
- jersey-container-grizzly2-http
- ${jersey.version}
-
-
- org.glassfish.grizzly
- grizzly-http-server
- ${grizzly.version}
-
-
- org.glassfish.jersey.core
- jersey-server
- ${jersey.version}
-
-
- org.javassist
- javassist
-
-
- org.javassistjavassist3.19.0-GA
+
+
org.glassfish.jersey.core
- jersey-common
+ jersey-server${jersey.version}
@@ -1218,39 +1284,19 @@
${jersey.version}
- org.glassfish.jersey.inject
- jersey-hk2
+ org.glassfish.jersey.core
+ jersey-common${jersey.version}
- org.glassfish.hk2
- hk2-locator
- ${hk2.version}
-
-
- jakarta.annotation
- jakarta.annotation-api
-
-
- jakarta.ws.rs
- jakarta.ws.rs-api
-
-
+ org.glassfish.jersey.containers
+ jersey-container-grizzly2-http
+ ${jersey.version}
- org.glassfish.hk2
- hk2-metadata-generator
- ${hk2.version}
-
-
- jakarta.annotation
- jakarta.annotation-api
-
-
- jakarta.ws.rs
- jakarta.ws.rs-api
-
-
+ org.glassfish.jersey.containers
+ jersey-container-servlet-core
+ ${jersey.version}org.glassfish.jersey.media
@@ -1263,25 +1309,26 @@
${jersey.version}
- io.swagger
- swagger-jaxrs
- ${swagger.version}
+ org.glassfish.jersey.inject
+ jersey-hk2
+ ${jersey.version}
+
+
+ org.glassfish.hk2
+ hk2-locator
+ ${hk2.version}
+
+
+ org.glassfish.hk2
+ hk2-metadata-generator
+ ${hk2.version}io.swaggerswagger-jersey2-jaxrs${swagger.version}
-
-
- javax.ws.rs
- jsr311-api
-
-
- org.glassfish.hk2.external
- javax.inject
-
-
+
org.apache.maven.surefiresurefire-testng
@@ -1708,7 +1755,11 @@
+
commons-logging:commons-logging
+
+ javax.inject:javax.inject
+ jakarta.inject:jakarta.inject-api
From 08fc2c7d62ba17d64b6cb680017e29b4480c8f22 Mon Sep 17 00:00:00 2001
From: sullis
Date: Mon, 1 Apr 2024 17:39:09 -0700
Subject: [PATCH 24/50] upgrade lmax disruptor to 4.0.0 (#12769)
---
pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pom.xml b/pom.xml
index bf687489111..a9649153685 100644
--- a/pom.xml
+++ b/pom.xml
@@ -629,7 +629,7 @@
com.lmaxdisruptor
- 3.3.4
+ 4.0.0org.asynchttpclient
From 4abb2d18f733781539d2d72ab75e1bb03c197489 Mon Sep 17 00:00:00 2001
From: Erich <134291879+ege-st@users.noreply.github.com>
Date: Tue, 2 Apr 2024 03:37:57 -0400
Subject: [PATCH 25/50] Set column major builder to be on by default (#12770)
---
.../java/org/apache/pinot/spi/config/table/IndexingConfig.java | 2 +-
.../pinot/spi/config/table/ingestion/StreamIngestionConfig.java | 2 +-
.../org/apache/pinot/spi/utils/builder/TableConfigBuilder.java | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/IndexingConfig.java b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/IndexingConfig.java
index a433c845cad..ce5bc79dda7 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/IndexingConfig.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/IndexingConfig.java
@@ -62,7 +62,7 @@ public class IndexingConfig extends BaseJsonConfig {
private SegmentPartitionConfig _segmentPartitionConfig;
private boolean _aggregateMetrics;
private boolean _nullHandlingEnabled;
- private boolean _columnMajorSegmentBuilderEnabled = false;
+ private boolean _columnMajorSegmentBuilderEnabled = true;
/**
* If `optimizeDictionary` enabled, dictionary is not created for the high-cardinality
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/ingestion/StreamIngestionConfig.java b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/ingestion/StreamIngestionConfig.java
index 365911ee69f..2d832dd4b2f 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/ingestion/StreamIngestionConfig.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/ingestion/StreamIngestionConfig.java
@@ -35,7 +35,7 @@ public class StreamIngestionConfig extends BaseJsonConfig {
private final List