From df38aa6a38f1e4712778ffd96ce9582c2670ca28 Mon Sep 17 00:00:00 2001 From: Chaitanya Deepthi <45308220+deepthi912@users.noreply.github.com> Date: Fri, 12 Apr 2024 01:09:59 -0400 Subject: [PATCH 001/102] swagger-ui upgrade to 5.15.0 Fixes (#12908) --- .../main/java/org/apache/pinot/spi/utils/CommonConstants.java | 2 +- pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java index a59948c5f985..bbf3b30342fa 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java @@ -51,7 +51,7 @@ private CommonConstants() { "org.apache.pinot.spi.eventlistener.query.NoOpBrokerQueryEventListener"; public static final String SWAGGER_AUTHORIZATION_KEY = "oauth"; - public static final String CONFIG_OF_SWAGGER_RESOURCES_PATH = "META-INF/resources/webjars/swagger-ui/5.13.0/"; + public static final String CONFIG_OF_SWAGGER_RESOURCES_PATH = "META-INF/resources/webjars/swagger-ui/5.15.0/"; public static final String CONFIG_OF_TIMEZONE = "pinot.timezone"; public static final String DATABASE = "database"; diff --git a/pom.xml b/pom.xml index 255f13e13911..85d282e65e08 100644 --- a/pom.xml +++ b/pom.xml @@ -144,7 +144,7 @@ 2.6.1 3.30.2-GA 1.6.14 - 5.13.0 + 5.15.0 3.3.6 2.9.0 2.5.1 From 4c514725af9a6acc1ea92bc820d5f669b074103c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 12 Apr 2024 13:53:34 -0700 Subject: [PATCH 002/102] Bump javax.servlet.jsp:javax.servlet.jsp-api from 2.2 to 2.3.3 (#12919) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 85d282e65e08..01e08307015b 100644 --- a/pom.xml +++ b/pom.xml @@ -210,7 +210,7 @@ 2.1.1 1.1.1 1.1.1 - 2.2 + 2.3.3 4.5.14 From 2c88d09466a6e9c11d5c35be202691871534dca1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 12 Apr 2024 13:53:54 -0700 Subject: [PATCH 003/102] Bump org.apache.maven.plugins:maven-gpg-plugin from 3.2.2 to 3.2.3 (#12918) --- pinot-connectors/pinot-spark-2-connector/pom.xml | 2 +- pinot-connectors/pinot-spark-3-connector/pom.xml | 2 +- pinot-connectors/pinot-spark-common/pom.xml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pinot-connectors/pinot-spark-2-connector/pom.xml b/pinot-connectors/pinot-spark-2-connector/pom.xml index 14904aad2884..c2a0ea3f4f5d 100644 --- a/pinot-connectors/pinot-spark-2-connector/pom.xml +++ b/pinot-connectors/pinot-spark-2-connector/pom.xml @@ -152,7 +152,7 @@ Thus, explicitly adding this plugin to a new profile to sign the files at the end all at once. --> org.apache.maven.plugins maven-gpg-plugin - 3.2.2 + 3.2.3 diff --git a/pinot-connectors/pinot-spark-3-connector/pom.xml b/pinot-connectors/pinot-spark-3-connector/pom.xml index a39548d22019..2cf4a3fe2e0f 100644 --- a/pinot-connectors/pinot-spark-3-connector/pom.xml +++ b/pinot-connectors/pinot-spark-3-connector/pom.xml @@ -148,7 +148,7 @@ Thus, explicitly adding this plugin to a new profile to sign the files at the end all at once. --> org.apache.maven.plugins maven-gpg-plugin - 3.2.2 + 3.2.3 diff --git a/pinot-connectors/pinot-spark-common/pom.xml b/pinot-connectors/pinot-spark-common/pom.xml index a4f72ed076e4..5c8f812cf31b 100644 --- a/pinot-connectors/pinot-spark-common/pom.xml +++ b/pinot-connectors/pinot-spark-common/pom.xml @@ -163,7 +163,7 @@ Thus, explicitly adding this plugin to a new profile to sign the files at the end all at once. --> org.apache.maven.plugins maven-gpg-plugin - 3.2.2 + 3.2.3 From 21275580d40cc3f70eb53b2766397ef8577f3977 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 12 Apr 2024 13:54:12 -0700 Subject: [PATCH 004/102] Bump io.github.hakky54:sslcontext-kickstart-for-netty (#12917) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 01e08307015b..9abfd6067fd8 100644 --- a/pom.xml +++ b/pom.xml @@ -173,7 +173,7 @@ 3.1.12 7.10.1 6.6.2 - 8.2.0 + 8.3.4 3.14.0 From 2c22980c460bb677fb952d966acf06c713113d31 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 12 Apr 2024 13:54:30 -0700 Subject: [PATCH 005/102] Bump it.unimi.dsi:fastutil from 8.2.3 to 8.5.13 (#12916) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 9abfd6067fd8..b01a3c870668 100644 --- a/pom.xml +++ b/pom.xml @@ -492,7 +492,7 @@ it.unimi.dsi fastutil - 8.2.3 + 8.5.13 joda-time From 94b2f3f07b5dc190956a1efb4bb129eb097b9ab3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 12 Apr 2024 13:57:36 -0700 Subject: [PATCH 006/102] Bump aws.sdk.version from 2.25.29 to 2.25.30 (#12914) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index b01a3c870668..11961af564b7 100644 --- a/pom.xml +++ b/pom.xml @@ -168,7 +168,7 @@ 0.15.0 0.4.4 4.2.2 - 2.25.29 + 2.25.30 2.12.7 3.1.12 7.10.1 From f86928d74433c48807c72f8af601f4aa19bb9449 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 12 Apr 2024 13:57:52 -0700 Subject: [PATCH 007/102] Bump com.mycila:license-maven-plugin from 4.2 to 4.3 (#12912) --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 11961af564b7..6c836c52c93a 100644 --- a/pom.xml +++ b/pom.xml @@ -1809,7 +1809,7 @@ com.mycila license-maven-plugin - 4.2 + 4.3 org.apache.maven.plugins @@ -2068,7 +2068,7 @@ com.mycila license-maven-plugin - 4.2 + 4.3 From 159aca6bc9ed43fef1e8fe9a06e13529e6acdaa7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 12 Apr 2024 13:58:21 -0700 Subject: [PATCH 008/102] Bump org.codehaus.mojo:exec-maven-plugin from 3.1.0 to 3.2.0 (#12911) --- pinot-distribution/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pinot-distribution/pom.xml b/pinot-distribution/pom.xml index 540420cc22e6..5a024f142d56 100644 --- a/pinot-distribution/pom.xml +++ b/pinot-distribution/pom.xml @@ -260,7 +260,7 @@ org.codehaus.mojo exec-maven-plugin - 3.1.0 + 3.2.0 remove-build-directory From 4040a19875a33d212485720e71f1f8278857750b Mon Sep 17 00:00:00 2001 From: Jayesh Choudhary Date: Mon, 15 Apr 2024 11:27:31 +0530 Subject: [PATCH 009/102] fix(build): update node version to 16 (#12924) --- .github/workflows/pinot_compatibility_tests.yml | 4 ++-- .github/workflows/pinot_tests.yml | 4 ++-- pinot-controller/pom.xml | 4 ++-- pinot-controller/src/main/resources/.nvmrc | 1 + 4 files changed, 7 insertions(+), 6 deletions(-) create mode 100644 pinot-controller/src/main/resources/.nvmrc diff --git a/.github/workflows/pinot_compatibility_tests.yml b/.github/workflows/pinot_compatibility_tests.yml index e9d242fede91..ecbc2300e01e 100644 --- a/.github/workflows/pinot_compatibility_tests.yml +++ b/.github/workflows/pinot_compatibility_tests.yml @@ -46,12 +46,12 @@ jobs: - name: Setup node uses: actions/setup-node@v4 with: - node-version: v10.16.1 + node-version: v16.15.0 cache: 'npm' cache-dependency-path: pinot-controller/src/main/resources/package-lock.json - name: Install npm run: | - npm install -g npm@6.10.0 + npm install -g npm@8.5.5 npm --version - name: Pinot Compatibility Regression Testing if : ${{github.event_name == 'workflow_dispatch'}} diff --git a/.github/workflows/pinot_tests.yml b/.github/workflows/pinot_tests.yml index 1d90331719c4..2b97445d2b9a 100644 --- a/.github/workflows/pinot_tests.yml +++ b/.github/workflows/pinot_tests.yml @@ -281,12 +281,12 @@ jobs: - name: Setup node uses: actions/setup-node@v4 with: - node-version: v10.16.1 + node-version: v16.15.0 cache: 'npm' cache-dependency-path: pinot-controller/src/main/resources/package-lock.json - name: Install npm run: | - npm install -g npm@6.10.0 + npm install -g npm@8.5.5 npm --version # Step that does that actual cache save and restore - uses: actions/cache@v4 diff --git a/pinot-controller/pom.xml b/pinot-controller/pom.xml index f70dde647129..0f49f928a11f 100644 --- a/pinot-controller/pom.xml +++ b/pinot-controller/pom.xml @@ -159,8 +159,8 @@ install-node-and-npm - v10.16.1 - 6.10.0 + v16.15.0 + 8.5.5 diff --git a/pinot-controller/src/main/resources/.nvmrc b/pinot-controller/src/main/resources/.nvmrc new file mode 100644 index 000000000000..7fd023741b20 --- /dev/null +++ b/pinot-controller/src/main/resources/.nvmrc @@ -0,0 +1 @@ +v16.15.0 From 848fe9c4ac18b9e670bf947aba21db59d629e9f9 Mon Sep 17 00:00:00 2001 From: Abhishek Sharma Date: Mon, 15 Apr 2024 01:59:20 -0400 Subject: [PATCH 010/102] Added PR compatability test against release 1.1.0 (#12921) --- .github/workflows/pinot_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pinot_tests.yml b/.github/workflows/pinot_tests.yml index 2b97445d2b9a..ce170f31900a 100644 --- a/.github/workflows/pinot_tests.yml +++ b/.github/workflows/pinot_tests.yml @@ -267,7 +267,7 @@ jobs: matrix: test_suite: [ "compatibility-verifier/sample-test-suite" ] old_commit: [ - "release-0.12.1", "release-1.0.0", "master" + "release-0.12.1", "release-1.0.0", "release-1.1.0", "master" ] name: Pinot Compatibility Regression Testing against ${{ matrix.old_commit }} on ${{ matrix.test_suite }} steps: From 2c6a84b8ec840ed679c0a98230c518522b6591dd Mon Sep 17 00:00:00 2001 From: Christopher Peck <27231838+itschrispeck@users.noreply.github.com> Date: Mon, 15 Apr 2024 09:34:37 -0700 Subject: [PATCH 011/102] Improved segment build time for Lucene text index realtime to offline conversion (#12744) * reuse mutable lucene index during segment conversion * realtime segment conversion only * add RealtimeSegmentConverter test for index reuse path * clarify naming * fix missed renaming * address comments, close all resources --- .../mutable/MutableSegmentImpl.java | 13 ++ .../converter/RealtimeSegmentConverter.java | 3 + .../RealtimeLuceneTextIndex.java | 13 +- .../impl/SegmentColumnarIndexCreator.java | 5 +- .../impl/SegmentIndexCreationDriverImpl.java | 45 +++++- .../impl/text/LuceneTextIndexCreator.java | 153 +++++++++++++++++- .../readers/text/LuceneTextIndexReader.java | 2 +- .../RealtimeSegmentConverterTest.java | 153 +++++++++++++++++- .../store/FilePerIndexDirectoryTest.java | 11 +- .../store/SingleFileIndexDirectoryTest.java | 8 +- .../spi/creator/IndexCreationContext.java | 48 +++++- .../segment/spi/creator/SegmentCreator.java | 3 +- .../spi/creator/SegmentGeneratorConfig.java | 9 ++ .../segment/spi/index/TextIndexConfig.java | 2 +- .../spi/index/mutable/MutableIndex.java | 8 + 15 files changed, 442 insertions(+), 34 deletions(-) diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/indexsegment/mutable/MutableSegmentImpl.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/indexsegment/mutable/MutableSegmentImpl.java index 55e0aec072ae..b336b30f2016 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/indexsegment/mutable/MutableSegmentImpl.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/indexsegment/mutable/MutableSegmentImpl.java @@ -932,6 +932,19 @@ public Object getValue(int docId, String column) { } } + /** + * Calls commit() on all mutable indexes. This is used in preparation for realtime segment conversion. + * .commit() can be implemented per index to perform any required actions before using mutable segment + * artifacts to optimize immutable segment build. + */ + public void commit() { + for (IndexContainer indexContainer : _indexContainerMap.values()) { + for (MutableIndex mutableIndex : indexContainer._mutableIndexes.values()) { + mutableIndex.commit(); + } + } + } + @Override public void destroy() { _logger.info("Trying to close RealtimeSegmentImpl : {}", _segmentName); diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/converter/RealtimeSegmentConverter.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/converter/RealtimeSegmentConverter.java index ffb9bfc23f99..0bf8fe571f18 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/converter/RealtimeSegmentConverter.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/converter/RealtimeSegmentConverter.java @@ -118,6 +118,9 @@ public void build(@Nullable SegmentVersion segmentVersion, ServerMetrics serverM genConfig.setNullHandlingEnabled(_nullHandlingEnabled); genConfig.setSegmentZKPropsConfig(_segmentZKPropsConfig); + // flush any artifacts to disk to improve mutable to immutable segment conversion + _realtimeSegmentImpl.commit(); + SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl(); try (PinotSegmentRecordReader recordReader = new PinotSegmentRecordReader()) { int[] sortedDocIds = _columnIndicesForRealtimeTable.getSortedColumn() != null diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/RealtimeLuceneTextIndex.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/RealtimeLuceneTextIndex.java index a71d2663ed74..8d2e43c8a563 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/RealtimeLuceneTextIndex.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/RealtimeLuceneTextIndex.java @@ -78,7 +78,7 @@ public RealtimeLuceneTextIndex(String column, File segmentIndexDir, String segme // for realtime _indexCreator = new LuceneTextIndexCreator(column, new File(segmentIndexDir.getAbsolutePath() + "/" + segmentName), - false /* commitOnClose */, config); + false /* commitOnClose */, true, null, config); IndexWriter indexWriter = _indexCreator.getIndexWriter(); _searcherManager = new SearcherManager(indexWriter, false, false, null); _analyzer = _indexCreator.getIndexWriter().getConfig().getAnalyzer(); @@ -181,6 +181,17 @@ private MutableRoaringBitmap getPinotDocIds(IndexSearcher indexSearcher, Mutable return actualDocIDs; } + @Override + public void commit() { + try { + _indexCreator.getIndexWriter().commit(); + } catch (Exception e) { + LOGGER.error("Failed to commit the realtime lucene text index for column {}, exception {}", _column, + e.getMessage()); + throw new RuntimeException(e); + } + } + @Override public void close() { try { diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java index 2d7909b40729..168490635a44 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java @@ -106,7 +106,8 @@ public class SegmentColumnarIndexCreator implements SegmentCreator { @Override public void init(SegmentGeneratorConfig segmentCreationSpec, SegmentIndexCreationInfo segmentIndexCreationInfo, - TreeMap indexCreationInfoMap, Schema schema, File outDir) + TreeMap indexCreationInfoMap, Schema schema, File outDir, + @Nullable int[] immutableToMutableIdMap) throws Exception { _docIdCounter = 0; _config = segmentCreationSpec; @@ -158,6 +159,8 @@ public void init(SegmentGeneratorConfig segmentCreationSpec, SegmentIndexCreatio .onHeap(segmentCreationSpec.isOnHeap()) .withForwardIndexDisabled(forwardIndexDisabled) .withTextCommitOnClose(true) + .withImmutableToMutableIdMap(immutableToMutableIdMap) + .withRealtimeConversion(segmentCreationSpec.isRealtimeConversion()) .build(); //@formatter:on diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentIndexCreationDriverImpl.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentIndexCreationDriverImpl.java index e99d89c8b42c..ecfea58ca788 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentIndexCreationDriverImpl.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentIndexCreationDriverImpl.java @@ -36,6 +36,7 @@ import javax.annotation.Nullable; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.io.FileUtils; +import org.apache.pinot.segment.local.realtime.converter.stats.RealtimeSegmentSegmentCreationDataSource; import org.apache.pinot.segment.local.recordtransformer.ComplexTypeTransformer; import org.apache.pinot.segment.local.recordtransformer.RecordTransformer; import org.apache.pinot.segment.local.segment.creator.RecordReaderSegmentCreationDataSource; @@ -191,6 +192,11 @@ public void init(SegmentGeneratorConfig config, SegmentCreationDataSource dataSo ((RecordReaderSegmentCreationDataSource) dataSource).setTransformPipeline(transformPipeline); } + // Optimization for realtime segment conversion + if (dataSource instanceof RealtimeSegmentSegmentCreationDataSource) { + _config.setRealtimeConversion(true); + } + // Initialize stats collection _segmentStats = dataSource.gatherStats( new StatsCollectorConfig(config.getTableConfig(), _dataSchema, config.getSegmentPartitionConfig())); @@ -218,6 +224,23 @@ public void init(SegmentGeneratorConfig config, SegmentCreationDataSource dataSo LOGGER.debug("tempIndexDir:{}", _tempIndexDir); } + /** + * Generate a mutable docId to immutable docId mapping from the sortedDocIds iteration order + * + * @param sortedDocIds used to map sortedDocIds[immutableId] = mutableId (based on RecordReader iteration order) + * @return int[] used to map output[mutableId] = immutableId, or null if sortedDocIds is null + */ + private int[] getImmutableToMutableIdMap(@Nullable int[] sortedDocIds) { + if (sortedDocIds == null) { + return null; + } + int[] res = new int[sortedDocIds.length]; + for (int i = 0; i < res.length; i++) { + res[sortedDocIds[i]] = i; + } + return res; + } + @Override public void build() throws Exception { @@ -229,10 +252,19 @@ public void build() int incompleteRowsFound = 0; try { + // TODO: Eventually pull the doc Id sorting logic out of Record Reader so that all row oriented logic can be + // removed from this code. + int[] immutableToMutableIdMap = null; + if (_recordReader instanceof PinotSegmentRecordReader) { + immutableToMutableIdMap = + getImmutableToMutableIdMap(((PinotSegmentRecordReader) _recordReader).getSortedDocIds()); + } + // Initialize the index creation using the per-column statistics information // TODO: _indexCreationInfoMap holds the reference to all unique values on heap (ColumnIndexCreationInfo -> // ColumnStatistics) throughout the segment creation. Find a way to release the memory early. - _indexCreator.init(_config, _segmentIndexCreationInfo, _indexCreationInfoMap, _dataSchema, _tempIndexDir); + _indexCreator.init(_config, _segmentIndexCreationInfo, _indexCreationInfoMap, _dataSchema, _tempIndexDir, + immutableToMutableIdMap); // Build the index _recordReader.rewind(); @@ -299,19 +331,22 @@ public void buildByColumn(IndexSegment indexSegment) LOGGER.info("Collected stats for {} documents", _totalDocs); try { + // TODO: Eventually pull the doc Id sorting logic out of Record Reader so that all row oriented logic can be + // removed from this code. + int[] sortedDocIds = ((PinotSegmentRecordReader) _recordReader).getSortedDocIds(); + int[] immutableToMutableIdMap = getImmutableToMutableIdMap(sortedDocIds); + // Initialize the index creation using the per-column statistics information // TODO: _indexCreationInfoMap holds the reference to all unique values on heap (ColumnIndexCreationInfo -> // ColumnStatistics) throughout the segment creation. Find a way to release the memory early. - _indexCreator.init(_config, _segmentIndexCreationInfo, _indexCreationInfoMap, _dataSchema, _tempIndexDir); + _indexCreator.init(_config, _segmentIndexCreationInfo, _indexCreationInfoMap, _dataSchema, _tempIndexDir, + immutableToMutableIdMap); // Build the indexes LOGGER.info("Start building Index by column"); TreeSet columns = _dataSchema.getPhysicalColumnNames(); - // TODO: Eventually pull the doc Id sorting logic out of Record Reader so that all row oriented logic can be - // removed from this code. - int[] sortedDocIds = ((PinotSegmentRecordReader) _recordReader).getSortedDocIds(); for (String col : columns) { _indexCreator.indexColumn(col, sortedDocIds, indexSegment); } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/LuceneTextIndexCreator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/LuceneTextIndexCreator.java index f14cf62bc63c..49306d9404af 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/LuceneTextIndexCreator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/LuceneTextIndexCreator.java @@ -20,8 +20,11 @@ import java.io.File; import java.io.IOException; +import java.nio.ByteOrder; import java.util.Arrays; import java.util.HashSet; +import javax.annotation.Nullable; +import org.apache.commons.io.FileUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.standard.StandardAnalyzer; @@ -29,8 +32,11 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.StoredField; import org.apache.lucene.document.TextField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.pinot.segment.local.realtime.impl.invertedindex.RealtimeLuceneTextIndex; @@ -41,6 +47,10 @@ import org.apache.pinot.segment.spi.creator.IndexCreationContext; import org.apache.pinot.segment.spi.index.TextIndexConfig; import org.apache.pinot.segment.spi.index.creator.DictionaryBasedInvertedIndexCreator; +import org.apache.pinot.segment.spi.memory.PinotDataBuffer; +import org.apache.pinot.segment.spi.store.SegmentDirectoryPaths; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** @@ -49,12 +59,15 @@ * and realtime from {@link RealtimeLuceneTextIndex} */ public class LuceneTextIndexCreator extends AbstractTextIndexCreator { + private static final Logger LOGGER = LoggerFactory.getLogger(LuceneTextIndexCreator.class); public static final String LUCENE_INDEX_DOC_ID_COLUMN_NAME = "DocID"; private final String _textColumn; - private final Directory _indexDirectory; - private final IndexWriter _indexWriter; - + private final boolean _commitOnClose; + private final boolean _reuseMutableIndex; + private final File _indexFile; + private Directory _indexDirectory; + private IndexWriter _indexWriter; private int _nextDocId = 0; public static HashSet getDefaultEnglishStopWordsSet() { @@ -75,6 +88,7 @@ public static HashSet getDefaultEnglishStopWordsSet() { * @param segmentIndexDir segment index directory * @param commit true if the index should be committed (at the end after all documents have * been added), false if index should not be committed + * @param immutableToMutableIdMap immutableToMutableIdMap from segment conversion * Note on commit: * Once {@link SegmentColumnarIndexCreator} * finishes indexing all documents/rows for the segment, we need to commit and close @@ -90,14 +104,19 @@ public static HashSet getDefaultEnglishStopWordsSet() { * to offline), we close this lucene index writer to release resources but don't commit. * @param config the text index config */ - public LuceneTextIndexCreator(String column, File segmentIndexDir, boolean commit, TextIndexConfig config) { + public LuceneTextIndexCreator(String column, File segmentIndexDir, boolean commit, boolean realtimeConversion, + @Nullable int[] immutableToMutableIdMap, TextIndexConfig config) { _textColumn = column; + _commitOnClose = commit; + + // to reuse the mutable index, it must be (1) not the realtime index, i.e. commit is set to false + // and (2) happens during realtime segment conversion + _reuseMutableIndex = commit && realtimeConversion; String luceneAnalyzerClass = config.getLuceneAnalyzerClass(); try { // segment generation is always in V1 and later we convert (as part of post creation processing) // to V3 if segmentVersion is set to V3 in SegmentGeneratorConfig. - File indexFile = getV1TextIndexFile(segmentIndexDir); - _indexDirectory = FSDirectory.open(indexFile.toPath()); + _indexFile = getV1TextIndexFile(segmentIndexDir); Analyzer luceneAnalyzer; if (luceneAnalyzerClass.isEmpty() || luceneAnalyzerClass.equals(StandardAnalyzer.class.getName())) { @@ -111,6 +130,15 @@ public LuceneTextIndexCreator(String column, File segmentIndexDir, boolean commi indexWriterConfig.setRAMBufferSizeMB(config.getLuceneMaxBufferSizeMB()); indexWriterConfig.setCommitOnClose(commit); indexWriterConfig.setUseCompoundFile(config.isLuceneUseCompoundFile()); + + if (_reuseMutableIndex) { + LOGGER.info("Reusing the realtime lucene index for segment {} and column {}", segmentIndexDir, column); + indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); + convertMutableSegment(segmentIndexDir, immutableToMutableIdMap, indexWriterConfig); + return; + } + + _indexDirectory = FSDirectory.open(_indexFile.toPath()); _indexWriter = new IndexWriter(_indexDirectory, indexWriterConfig); } catch (ReflectiveOperationException e) { throw new RuntimeException( @@ -122,15 +150,102 @@ public LuceneTextIndexCreator(String column, File segmentIndexDir, boolean commi } public LuceneTextIndexCreator(IndexCreationContext context, TextIndexConfig indexConfig) { - this(context.getFieldSpec().getName(), context.getIndexDir(), context.isTextCommitOnClose(), indexConfig); + this(context.getFieldSpec().getName(), context.getIndexDir(), context.isTextCommitOnClose(), + context.isRealtimeConversion(), context.getImmutableToMutableIdMap(), indexConfig); } public IndexWriter getIndexWriter() { return _indexWriter; } + /** + * Copy the mutable lucene index files to create an immutable lucene index + * @param segmentIndexDir segment index directory + * @param immutableToMutableIdMap immutableToMutableIdMap from segment conversion + * @param indexWriterConfig indexWriterConfig + */ + private void convertMutableSegment(File segmentIndexDir, @Nullable int[] immutableToMutableIdMap, + IndexWriterConfig indexWriterConfig) { + try { + // Copy the mutable index to the v1 index location + File dest = getV1TextIndexFile(segmentIndexDir); + File mutableDir = getMutableIndexDir(segmentIndexDir); + FileUtils.copyDirectory(mutableDir, dest); + + // Remove the copied write.lock file + File writeLock = new File(dest, "write.lock"); + FileUtils.delete(writeLock); + + // Call .forceMerge(1) on the copied index as the mutable index will likely contain many Lucene segments + try (Directory destDirectory = FSDirectory.open(dest.toPath()); + IndexWriter indexWriter = new IndexWriter(destDirectory, indexWriterConfig)) { + indexWriter.forceMerge(1, true); + indexWriter.commit(); + + buildMappingFile(segmentIndexDir, _textColumn, destDirectory, immutableToMutableIdMap); + } catch (Exception e) { + throw new RuntimeException("Failed to build the mapping file during segment conversion: " + e); + } + } catch (IOException e) { + throw new RuntimeException("Failed to convert the mutable lucene index: " + e); + } + } + + /** + * Generate the mapping file from mutable Pinot docId (stored within the Lucene index) to immutable Pinot docId using + * the immutableToMutableIdMap from segment conversion + * @param segmentIndexDir segment index directory + * @param column column name + * @param directory directory of the index + * @param immutableToMutableIdMap immutableToMutableIdMap from segment conversion + */ + private void buildMappingFile(File segmentIndexDir, String column, Directory directory, + @Nullable int[] immutableToMutableIdMap) + throws IOException { + IndexReader indexReader = DirectoryReader.open(directory); + IndexSearcher indexSearcher = new IndexSearcher(indexReader); + + int numDocs = indexSearcher.getIndexReader().numDocs(); + int length = Integer.BYTES * numDocs; + File docIdMappingFile = new File(SegmentDirectoryPaths.findSegmentDirectory(segmentIndexDir), + column + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION); + String desc = "Text index docId mapping buffer: " + column; + try (PinotDataBuffer buffer = PinotDataBuffer.mapFile(docIdMappingFile, /* readOnly */ false, 0, length, + ByteOrder.LITTLE_ENDIAN, desc)) { + try { + // If immutableToMutableIdMap is null, then docIds should not change between the mutable and immutable segments. + // Therefore, the mapping file can be built without doing an additional docId conversion + if (immutableToMutableIdMap == null) { + for (int i = 0; i < numDocs; i++) { + Document document = indexSearcher.doc(i); + int pinotDocId = Integer.parseInt(document.get(LuceneTextIndexCreator.LUCENE_INDEX_DOC_ID_COLUMN_NAME)); + buffer.putInt(i * Integer.BYTES, pinotDocId); + } + return; + } + + for (int i = 0; i < numDocs; i++) { + Document document = indexSearcher.doc(i); + int mutablePinotDocId = + Integer.parseInt(document.get(LuceneTextIndexCreator.LUCENE_INDEX_DOC_ID_COLUMN_NAME)); + int immutablePinotDocId = immutableToMutableIdMap[mutablePinotDocId]; + buffer.putInt(i * Integer.BYTES, immutablePinotDocId); + } + } catch (Exception e) { + throw new RuntimeException( + "Caught exception while building mutable to immutable doc id mapping for text index column: " + column, e); + } + } finally { + indexReader.close(); + } + } + @Override public void add(String document) { + if (_reuseMutableIndex) { + return; // no-op + } + // text index on SV column Document docToIndex = new Document(); docToIndex.add(new TextField(_textColumn, document, Field.Store.NO)); @@ -145,6 +260,10 @@ public void add(String document) { @Override public void add(String[] documents, int length) { + if (_reuseMutableIndex) { + return; // no-op + } + Document docToIndex = new Document(); // Whenever multiple fields with the same name appear in one document, both the @@ -165,6 +284,9 @@ public void add(String[] documents, int length) { @Override public void seal() { + if (_reuseMutableIndex) { + return; // no-op + } try { // Do this one time operation of combining the multiple lucene index files (if any) // into a single index file. Based on flush threshold and size of data, Lucene @@ -190,12 +312,20 @@ public void seal() { @Override public void close() throws IOException { + if (_reuseMutableIndex) { + return; // no-op + } try { // based on the commit flag set in IndexWriterConfig, this will decide to commit or not _indexWriter.close(); _indexDirectory.close(); } catch (Exception e) { throw new RuntimeException("Caught exception while closing the Lucene index for column: " + _textColumn, e); + } finally { + // remove leftover write.lock file, as well as artifacts from .commit() being called on the realtime index + if (!_commitOnClose) { + FileUtils.deleteQuietly(_indexFile); + } } } @@ -203,4 +333,13 @@ private File getV1TextIndexFile(File indexDir) { String luceneIndexDirectory = _textColumn + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION; return new File(indexDir, luceneIndexDirectory); } + + private File getMutableIndexDir(File indexDir) { + // tmpSegmentName format: tmp-tableName__9__1__20240227T0254Z-1709002522086 + String tmpSegmentName = indexDir.getParentFile().getName(); + String segmentName = tmpSegmentName.substring(tmpSegmentName.indexOf("tmp-") + 4, tmpSegmentName.lastIndexOf('-')); + String mutableDir = indexDir.getParentFile().getParentFile().getParent() + "/consumers/" + segmentName + "/" + + _textColumn + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION; + return new File(mutableDir); + } } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/text/LuceneTextIndexReader.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/text/LuceneTextIndexReader.java index 3a0efabe8c9a..07eb52f88b58 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/text/LuceneTextIndexReader.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/text/LuceneTextIndexReader.java @@ -171,7 +171,7 @@ public MutableRoaringBitmap getDocIds(String searchQuery) { return docIds; } catch (Exception e) { String msg = - "Caught excepttion while searching the text index for column:" + _column + " search query:" + searchQuery; + "Caught exception while searching the text index for column:" + _column + " search query:" + searchQuery; throw new RuntimeException(msg, e); } } diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/realtime/converter/RealtimeSegmentConverterTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/realtime/converter/RealtimeSegmentConverterTest.java index ded9e85b692c..e4ed4bb396f9 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/realtime/converter/RealtimeSegmentConverterTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/realtime/converter/RealtimeSegmentConverterTest.java @@ -24,6 +24,8 @@ import java.io.File; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.LinkedList; import java.util.List; @@ -36,6 +38,7 @@ import org.apache.pinot.segment.local.io.writer.impl.DirectMemoryManager; import org.apache.pinot.segment.local.realtime.impl.RealtimeSegmentConfig; import org.apache.pinot.segment.local.realtime.impl.RealtimeSegmentStatsHistory; +import org.apache.pinot.segment.local.realtime.impl.invertedindex.RealtimeLuceneTextIndexSearcherPool; import org.apache.pinot.segment.local.segment.index.column.PhysicalColumnIndexContainer; import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig; import org.apache.pinot.segment.local.segment.store.SegmentLocalFSDirectory; @@ -44,9 +47,12 @@ import org.apache.pinot.segment.spi.creator.SegmentVersion; import org.apache.pinot.segment.spi.index.DictionaryIndexConfig; import org.apache.pinot.segment.spi.index.StandardIndexes; +import org.apache.pinot.segment.spi.index.TextIndexConfig; import org.apache.pinot.segment.spi.index.column.ColumnIndexContainer; import org.apache.pinot.segment.spi.index.metadata.SegmentMetadataImpl; +import org.apache.pinot.segment.spi.index.reader.TextIndexReader; import org.apache.pinot.segment.spi.store.SegmentDirectory; +import org.apache.pinot.spi.config.table.FieldConfig; import org.apache.pinot.spi.config.table.IndexConfig; import org.apache.pinot.spi.config.table.IndexingConfig; import org.apache.pinot.spi.config.table.SegmentZKPropsConfig; @@ -58,6 +64,8 @@ import org.apache.pinot.spi.data.readers.GenericRow; import org.apache.pinot.spi.utils.ReadMode; import org.apache.pinot.spi.utils.builder.TableConfigBuilder; +import org.roaringbitmap.buffer.ImmutableRoaringBitmap; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import static org.testng.Assert.assertEquals; @@ -101,7 +109,7 @@ public void testNoRecordsIndexedRowMajorSegmentBuilder() throws Exception { File tmpDir = new File(TMP_DIR, "tmp_" + System.currentTimeMillis()); TableConfig tableConfig = - new TableConfigBuilder(TableType.OFFLINE).setTableName("testTable").setTimeColumnName(DATE_TIME_COLUMN) + new TableConfigBuilder(TableType.REALTIME).setTableName("testTable").setTimeColumnName(DATE_TIME_COLUMN) .setInvertedIndexColumns(Lists.newArrayList(STRING_COLUMN1)).setSortedColumn(LONG_COLUMN1) .setRangeIndexColumns(Lists.newArrayList(STRING_COLUMN2)) .setNoDictionaryColumns(Lists.newArrayList(LONG_COLUMN2)) @@ -167,7 +175,7 @@ public void test10RecordsIndexedRowMajorSegmentBuilder() throws Exception { File tmpDir = new File(TMP_DIR, "tmp_" + System.currentTimeMillis()); TableConfig tableConfig = - new TableConfigBuilder(TableType.OFFLINE).setTableName("testTable") + new TableConfigBuilder(TableType.REALTIME).setTableName("testTable") .setTimeColumnName(DATE_TIME_COLUMN) .setInvertedIndexColumns(Lists.newArrayList(STRING_COLUMN1, LONG_COLUMN1)) .setSortedColumn(LONG_COLUMN1) @@ -252,7 +260,7 @@ public void testNoRecordsIndexedColumnMajorSegmentBuilder() throws Exception { File tmpDir = new File(TMP_DIR, "tmp_" + System.currentTimeMillis()); TableConfig tableConfig = - new TableConfigBuilder(TableType.OFFLINE).setTableName("testTable").setTimeColumnName(DATE_TIME_COLUMN) + new TableConfigBuilder(TableType.REALTIME).setTableName("testTable").setTimeColumnName(DATE_TIME_COLUMN) .setInvertedIndexColumns(Lists.newArrayList(STRING_COLUMN1)).setSortedColumn(LONG_COLUMN1) .setRangeIndexColumns(Lists.newArrayList(STRING_COLUMN2)) .setNoDictionaryColumns(Lists.newArrayList(LONG_COLUMN2)) @@ -319,7 +327,7 @@ public void test10RecordsIndexedColumnMajorSegmentBuilder() throws Exception { File tmpDir = new File(TMP_DIR, "tmp_" + System.currentTimeMillis()); TableConfig tableConfig = - new TableConfigBuilder(TableType.OFFLINE).setTableName("testTable") + new TableConfigBuilder(TableType.REALTIME).setTableName("testTable") .setTimeColumnName(DATE_TIME_COLUMN) .setInvertedIndexColumns(Lists.newArrayList(STRING_COLUMN1, LONG_COLUMN1)) .setSortedColumn(LONG_COLUMN1) @@ -433,6 +441,130 @@ private void testSegment(List rows, File indexDir, } } + @DataProvider + public static Object[][] reuseParams() { + List enabledColumnMajorSegmentBuildParams = Arrays.asList(false, true); + String[] sortedColumnParams = new String[]{null, STRING_COLUMN1}; + + return enabledColumnMajorSegmentBuildParams.stream().flatMap( + columnMajor -> Arrays.stream(sortedColumnParams).map(sortedColumn -> new Object[]{columnMajor, + sortedColumn})) + .toArray(Object[][]::new); + } + + // Test the realtime segment conversion of a table with an index that reuses mutable index artifacts during conversion + @Test(dataProvider = "reuseParams") + public void testSegmentBuilderWithReuse(boolean columnMajorSegmentBuilder, String sortedColumn) + throws Exception { + File tmpDir = new File(TMP_DIR, "tmp_" + System.currentTimeMillis()); + FieldConfig textIndexFieldConfig = + new FieldConfig.Builder(STRING_COLUMN1).withEncodingType(FieldConfig.EncodingType.RAW) + .withIndexTypes(Collections.singletonList(FieldConfig.IndexType.TEXT)).build(); + List fieldConfigList = Collections.singletonList(textIndexFieldConfig); + TableConfig tableConfig = + new TableConfigBuilder(TableType.REALTIME).setTableName("testTable").setTimeColumnName(DATE_TIME_COLUMN) + .setInvertedIndexColumns(Lists.newArrayList(STRING_COLUMN1)) + .setSortedColumn(sortedColumn).setColumnMajorSegmentBuilderEnabled(columnMajorSegmentBuilder) + .setFieldConfigList(fieldConfigList).build(); + Schema schema = new Schema.SchemaBuilder().addSingleValueDimension(STRING_COLUMN1, FieldSpec.DataType.STRING) + .addDateTime(DATE_TIME_COLUMN, FieldSpec.DataType.LONG, "1:MILLISECONDS:EPOCH", "1:MILLISECONDS").build(); + + String tableNameWithType = tableConfig.getTableName(); + String segmentName = "testTable__0__0__123456"; + IndexingConfig indexingConfig = tableConfig.getIndexingConfig(); + TextIndexConfig textIndexConfig = + new TextIndexConfig(false, null, null, false, false, Collections.emptyList(), Collections.emptyList(), false, + 500, null, false); + + RealtimeSegmentConfig.Builder realtimeSegmentConfigBuilder = + new RealtimeSegmentConfig.Builder().setTableNameWithType(tableNameWithType).setSegmentName(segmentName) + .setStreamName(tableNameWithType).setSchema(schema).setTimeColumnName(DATE_TIME_COLUMN).setCapacity(1000) + .setIndex(Sets.newHashSet(STRING_COLUMN1), StandardIndexes.inverted(), IndexConfig.ENABLED) + .setIndex(Sets.newHashSet(STRING_COLUMN1), StandardIndexes.text(), textIndexConfig) + .setFieldConfigList(fieldConfigList).setSegmentZKMetadata(getSegmentZKMetadata(segmentName)) + .setOffHeap(true).setMemoryManager(new DirectMemoryManager(segmentName)) + .setStatsHistory(RealtimeSegmentStatsHistory.deserialzeFrom(new File(tmpDir, "stats"))) + .setConsumerDir(new File(tmpDir, "consumers").getAbsolutePath()); + + // create mutable segment impl + RealtimeLuceneTextIndexSearcherPool.init(1); + MutableSegmentImpl mutableSegmentImpl = new MutableSegmentImpl(realtimeSegmentConfigBuilder.build(), null); + List rows = generateTestDataForReusePath(); + + for (GenericRow row : rows) { + mutableSegmentImpl.index(row, null); + } + + // build converted segment + File outputDir = new File(new File(tmpDir, segmentName), "tmp-" + segmentName + "-" + System.currentTimeMillis()); + SegmentZKPropsConfig segmentZKPropsConfig = new SegmentZKPropsConfig(); + segmentZKPropsConfig.setStartOffset("1"); + segmentZKPropsConfig.setEndOffset("100"); + ColumnIndicesForRealtimeTable cdc = new ColumnIndicesForRealtimeTable(sortedColumn, + indexingConfig.getInvertedIndexColumns(), Collections.singletonList(STRING_COLUMN1), null, + indexingConfig.getNoDictionaryColumns(), indexingConfig.getVarLengthDictionaryColumns()); + RealtimeSegmentConverter converter = + new RealtimeSegmentConverter(mutableSegmentImpl, segmentZKPropsConfig, outputDir.getAbsolutePath(), schema, + tableNameWithType, tableConfig, segmentName, cdc, false); + converter.build(SegmentVersion.v3, null); + + File indexDir = new File(outputDir, segmentName); + SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(indexDir); + assertEquals(segmentMetadata.getVersion(), SegmentVersion.v3); + assertEquals(segmentMetadata.getTotalDocs(), rows.size()); + assertEquals(segmentMetadata.getTimeColumn(), DATE_TIME_COLUMN); + assertEquals(segmentMetadata.getTimeUnit(), TimeUnit.MILLISECONDS); + + long expectedStartTime = (long) rows.get(0).getValue(DATE_TIME_COLUMN); + assertEquals(segmentMetadata.getStartTime(), expectedStartTime); + long expectedEndTime = (long) rows.get(rows.size() - 1).getValue(DATE_TIME_COLUMN); + assertEquals(segmentMetadata.getEndTime(), expectedEndTime); + + assertTrue(segmentMetadata.getAllColumns().containsAll(schema.getColumnNames())); + assertEquals(segmentMetadata.getStartOffset(), "1"); + assertEquals(segmentMetadata.getEndOffset(), "100"); + + // read converted segment + SegmentLocalFSDirectory segmentDir = new SegmentLocalFSDirectory(indexDir, segmentMetadata, ReadMode.mmap); + SegmentDirectory.Reader segmentReader = segmentDir.createReader(); + + Map indexContainerMap = new HashMap<>(); + Map columnMetadataMap = segmentMetadata.getColumnMetadataMap(); + IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig(null, tableConfig); + for (Map.Entry entry : columnMetadataMap.entrySet()) { + indexContainerMap.put(entry.getKey(), + new PhysicalColumnIndexContainer(segmentReader, entry.getValue(), indexLoadingConfig)); + } + ImmutableSegmentImpl segmentFile = new ImmutableSegmentImpl(segmentDir, segmentMetadata, indexContainerMap, null); + + // test forward index contents + GenericRow readRow = new GenericRow(); + int docId = 0; + for (int i = 0; i < rows.size(); i++) { + GenericRow row; + if (sortedColumn == null) { + row = rows.get(i); + } else { + row = rows.get(rows.size() - i - 1); + } + + segmentFile.getRecord(docId, readRow); + assertEquals(readRow.getValue(STRING_COLUMN1), row.getValue(STRING_COLUMN1)); + assertEquals(readRow.getValue(DATE_TIME_COLUMN), row.getValue(DATE_TIME_COLUMN)); + docId += 1; + } + + // test docId conversion + TextIndexReader textIndexReader = segmentFile.getIndex(STRING_COLUMN1, StandardIndexes.text()); + if (sortedColumn == null) { + assertEquals(textIndexReader.getDocIds("str-8"), ImmutableRoaringBitmap.bitmapOf(0)); + assertEquals(textIndexReader.getDocIds("str-4"), ImmutableRoaringBitmap.bitmapOf(4)); + } else { + assertEquals(textIndexReader.getDocIds("str-8"), ImmutableRoaringBitmap.bitmapOf(7)); + assertEquals(textIndexReader.getDocIds("str-4"), ImmutableRoaringBitmap.bitmapOf(3)); + } + } + private List generateTestData() { LinkedList rows = new LinkedList<>(); @@ -457,6 +589,19 @@ private List generateTestData() { return rows; } + private List generateTestDataForReusePath() { + List rows = new LinkedList<>(); + + for (int i = 0; i < 8; i++) { + GenericRow row = new GenericRow(); + row.putValue(STRING_COLUMN1, "str" + (i - 8)); + row.putValue(DATE_TIME_COLUMN, 1697814309L + i); + rows.add(row); + } + + return rows; + } + private SegmentZKMetadata getSegmentZKMetadata(String segmentName) { SegmentZKMetadata segmentZKMetadata = new SegmentZKMetadata(segmentName); segmentZKMetadata.setCreationTime(System.currentTimeMillis()); diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java index f60de6d12d22..38eae8436ec8 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java @@ -204,8 +204,8 @@ public void testRemoveTextIndices() TextIndexConfig config = new TextIndexConfig(false, null, null, false, false, null, null, true, 500, null, false); try (FilePerIndexDirectory fpi = new FilePerIndexDirectory(TEMP_DIR, _segmentMetadata, ReadMode.mmap); - LuceneTextIndexCreator fooCreator = new LuceneTextIndexCreator("foo", TEMP_DIR, true, config); - LuceneTextIndexCreator barCreator = new LuceneTextIndexCreator("bar", TEMP_DIR, true, config)) { + LuceneTextIndexCreator fooCreator = new LuceneTextIndexCreator("foo", TEMP_DIR, true, false, null, config); + LuceneTextIndexCreator barCreator = new LuceneTextIndexCreator("bar", TEMP_DIR, true, false, null, config)) { PinotDataBuffer buf = fpi.newBuffer("col1", StandardIndexes.forward(), 1024); buf.putInt(0, 1); @@ -236,8 +236,7 @@ public void testRemoveTextIndices() new File(TEMP_DIR, "foo" + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION).exists()); } assertTrue(new File(TEMP_DIR, "bar" + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION).exists()); - assertTrue( - new File(TEMP_DIR, "bar" + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION).exists()); + assertTrue(new File(TEMP_DIR, "bar" + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION).exists()); // Read indices back and check the content. try (FilePerIndexDirectory fpi = new FilePerIndexDirectory(TEMP_DIR, _segmentMetadata, ReadMode.mmap)) { @@ -268,8 +267,8 @@ public void testGetColumnIndices() new TextIndexConfig(false, null, null, false, false, null, null, true, 500, null, false); // Write sth to buffers and flush them to index files on disk try (FilePerIndexDirectory fpi = new FilePerIndexDirectory(TEMP_DIR, _segmentMetadata, ReadMode.mmap); - LuceneTextIndexCreator fooCreator = new LuceneTextIndexCreator("foo", TEMP_DIR, true, config); - LuceneTextIndexCreator barCreator = new LuceneTextIndexCreator("bar", TEMP_DIR, true, config)) { + LuceneTextIndexCreator fooCreator = new LuceneTextIndexCreator("foo", TEMP_DIR, true, false, null, config); + LuceneTextIndexCreator barCreator = new LuceneTextIndexCreator("bar", TEMP_DIR, true, false, null, config)) { PinotDataBuffer buf = fpi.newBuffer("col1", StandardIndexes.forward(), 1024); buf.putInt(0, 111); buf = fpi.newBuffer("col2", StandardIndexes.dictionary(), 1024); diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java index 28494666362b..7f0dcebb05f8 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java @@ -237,8 +237,8 @@ public void testRemoveTextIndices() TextIndexConfig config = new TextIndexConfig(false, null, null, false, false, null, null, true, 500, null, false); try (SingleFileIndexDirectory sfd = new SingleFileIndexDirectory(TEMP_DIR, _segmentMetadata, ReadMode.mmap); - LuceneTextIndexCreator fooCreator = new LuceneTextIndexCreator("foo", TEMP_DIR, true, config); - LuceneTextIndexCreator barCreator = new LuceneTextIndexCreator("bar", TEMP_DIR, true, config)) { + LuceneTextIndexCreator fooCreator = new LuceneTextIndexCreator("foo", TEMP_DIR, true, false, null, config); + LuceneTextIndexCreator barCreator = new LuceneTextIndexCreator("bar", TEMP_DIR, true, false, null, config)) { PinotDataBuffer buf = sfd.newBuffer("col1", StandardIndexes.forward(), 1024); buf.putInt(0, 1); @@ -343,8 +343,8 @@ public void testGetColumnIndices() TextIndexConfig config = new TextIndexConfig(false, null, null, false, false, null, null, true, 500, null, false); try (SingleFileIndexDirectory sfd = new SingleFileIndexDirectory(TEMP_DIR, _segmentMetadata, ReadMode.mmap); - LuceneTextIndexCreator fooCreator = new LuceneTextIndexCreator("foo", TEMP_DIR, true, config); - LuceneTextIndexCreator barCreator = new LuceneTextIndexCreator("bar", TEMP_DIR, true, config)) { + LuceneTextIndexCreator fooCreator = new LuceneTextIndexCreator("foo", TEMP_DIR, true, false, null, config); + LuceneTextIndexCreator barCreator = new LuceneTextIndexCreator("bar", TEMP_DIR, true, false, null, config)) { PinotDataBuffer buf = sfd.newBuffer("col1", StandardIndexes.forward(), 1024); buf.putInt(0, 111); buf = sfd.newBuffer("col2", StandardIndexes.dictionary(), 1024); diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/IndexCreationContext.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/IndexCreationContext.java index 52df382efa2d..3ebe041e877e 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/IndexCreationContext.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/IndexCreationContext.java @@ -91,6 +91,20 @@ public interface IndexCreationContext { boolean isTextCommitOnClose(); ColumnStatistics getColumnStatistics(); + /** + * This flags whether the index creation is done during realtime segment conversion + * @return + */ + boolean isRealtimeConversion(); + + /** + * This contains immutableToMutableIdMap mapping generated in {@link SegmentIndexCreationDriver} + * + * This allows for index creation during realtime segment conversion to take advantage of mutable to immutable + * docId mapping + * @return + */ + int[] getImmutableToMutableIdMap(); final class Builder { private ColumnStatistics _columnStatistics; @@ -112,6 +126,8 @@ final class Builder { private boolean _optimizedDictionary; private boolean _fixedLength; private boolean _textCommitOnClose; + private boolean _realtimeConversion = false; + private int[] _immutableToMutableIdMap; public Builder withColumnIndexCreationInfo(ColumnIndexCreationInfo columnIndexCreationInfo) { return withLengthOfLongestEntry(columnIndexCreationInfo.getLengthOfLongestEntry()) @@ -229,11 +245,22 @@ public Builder withTextCommitOnClose(boolean textCommitOnClose) { return this; } + public Builder withRealtimeConversion(boolean realtimeConversion) { + _realtimeConversion = realtimeConversion; + return this; + } + + public Builder withImmutableToMutableIdMap(int[] immutableToMutableIdMap) { + _immutableToMutableIdMap = immutableToMutableIdMap; + return this; + } + public Common build() { return new Common(Objects.requireNonNull(_indexDir), _lengthOfLongestEntry, _maxNumberOfMultiValueElements, _maxRowLengthInBytes, _onHeap, Objects.requireNonNull(_fieldSpec), _sorted, _cardinality, _totalNumberOfEntries, _totalDocs, _hasDictionary, _minValue, _maxValue, _forwardIndexDisabled, - _sortedUniqueElementsArray, _optimizedDictionary, _fixedLength, _textCommitOnClose, _columnStatistics); + _sortedUniqueElementsArray, _optimizedDictionary, _fixedLength, _textCommitOnClose, _columnStatistics, + _realtimeConversion, _immutableToMutableIdMap); } public Builder withSortedUniqueElementsArray(Object sortedUniqueElementsArray) { @@ -267,13 +294,16 @@ final class Common implements IndexCreationContext { private final boolean _fixedLength; private final boolean _textCommitOnClose; private final ColumnStatistics _columnStatistics; + private final boolean _realtimeConversion; + private final int[] _immutableToMutableIdMap; public Common(File indexDir, int lengthOfLongestEntry, int maxNumberOfMultiValueElements, int maxRowLengthInBytes, boolean onHeap, FieldSpec fieldSpec, boolean sorted, int cardinality, int totalNumberOfEntries, int totalDocs, boolean hasDictionary, Comparable minValue, Comparable maxValue, - boolean forwardIndexDisabled, Object sortedUniqueElementsArray, boolean optimizeDictionary, - boolean fixedLength, boolean textCommitOnClose, ColumnStatistics columnStatistics) { + boolean forwardIndexDisabled, Object sortedUniqueElementsArray, boolean optimizeDictionary, boolean fixedLength, + boolean textCommitOnClose, ColumnStatistics columnStatistics, boolean realtimeConversion, + int[] immutableToMutableIdMap) { _indexDir = indexDir; _lengthOfLongestEntry = lengthOfLongestEntry; _maxNumberOfMultiValueElements = maxNumberOfMultiValueElements; @@ -293,6 +323,8 @@ public Common(File indexDir, int lengthOfLongestEntry, _fixedLength = fixedLength; _textCommitOnClose = textCommitOnClose; _columnStatistics = columnStatistics; + _realtimeConversion = realtimeConversion; + _immutableToMutableIdMap = immutableToMutableIdMap; } public FieldSpec getFieldSpec() { @@ -378,5 +410,15 @@ public boolean isTextCommitOnClose() { public ColumnStatistics getColumnStatistics() { return _columnStatistics; } + + @Override + public boolean isRealtimeConversion() { + return _realtimeConversion; + } + + @Override + public int[] getImmutableToMutableIdMap() { + return _immutableToMutableIdMap; + } } } diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentCreator.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentCreator.java index 9adda03b728e..dce1d5b1d40d 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentCreator.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentCreator.java @@ -46,7 +46,8 @@ public interface SegmentCreator extends Closeable, Serializable { * @throws Exception */ void init(SegmentGeneratorConfig segmentCreationSpec, SegmentIndexCreationInfo segmentIndexCreationInfo, - TreeMap indexCreationInfoMap, Schema schema, File outDir) + TreeMap indexCreationInfoMap, Schema schema, File outDir, + @Nullable int[] immutableToMutableIdMap) throws Exception; /** diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java index 6305dcd8522d..5381bdc43082 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java @@ -120,6 +120,7 @@ public enum TimeColumnType { private boolean _optimizeDictionary = false; private boolean _optimizeDictionaryForMetrics = false; private double _noDictionarySizeRatioThreshold = IndexingConfig.DEFAULT_NO_DICTIONARY_SIZE_RATIO_THRESHOLD; + private boolean _realtimeConversion = false; private final Map _indexConfigsByColName; // constructed from FieldConfig @@ -723,6 +724,14 @@ public double getNoDictionarySizeRatioThreshold() { return _noDictionarySizeRatioThreshold; } + public boolean isRealtimeConversion() { + return _realtimeConversion; + } + + public void setRealtimeConversion(boolean realtimeConversion) { + _realtimeConversion = realtimeConversion; + } + public void setNoDictionarySizeRatioThreshold(double noDictionarySizeRatioThreshold) { _noDictionarySizeRatioThreshold = noDictionarySizeRatioThreshold; } diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/TextIndexConfig.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/TextIndexConfig.java index afbf7eb876af..6c400a16db31 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/TextIndexConfig.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/TextIndexConfig.java @@ -232,7 +232,7 @@ public boolean equals(Object o) { && _fstType == that._fstType && Objects.equals(_rawValueForTextIndex, that._rawValueForTextIndex) && Objects.equals(_stopWordsInclude, that._stopWordsInclude) && Objects.equals(_stopWordsExclude, that._stopWordsExclude) && _luceneUseCompoundFile == that._luceneUseCompoundFile - && _luceneMaxBufferSizeMB == that._luceneMaxBufferSizeMB; + && _luceneMaxBufferSizeMB == that._luceneMaxBufferSizeMB && _luceneAnalyzerClass == that._luceneAnalyzerClass; } @Override diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/mutable/MutableIndex.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/mutable/MutableIndex.java index 2a9c4d9a56c6..dc3bdc98691d 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/mutable/MutableIndex.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/mutable/MutableIndex.java @@ -60,4 +60,12 @@ public interface MutableIndex extends IndexReader { * @param docId The document id of the given row. A non-negative value. */ void add(@Nonnull Object[] values, @Nullable int[] dictIds, int docId); + + /** + * Commits the mutable index artifacts to disk. This is used in preparation for realtime segment conversion. + * commit() should be implemented to perform any required actions before using mutable segment artifacts to + * optimize realtime segment conversion. + */ + default void commit() { + } } From c823430f11bf61f3150f70ac4db9eb2581f89d5e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 15 Apr 2024 14:08:48 -0700 Subject: [PATCH 012/102] Bump io.netty:netty-bom from 4.1.108.Final to 4.1.109.Final (#12929) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 6c836c52c93a..7f1819250b9a 100644 --- a/pom.xml +++ b/pom.xml @@ -160,7 +160,7 @@ 0.20.0 2.23.1 2.0.12 - 4.1.108.Final + 4.1.109.Final 1.0.4 1.19.0 4.1.1 From af8fd4089c33db3c11e09ab74df64746fa324f2f Mon Sep 17 00:00:00 2001 From: swaminathanmanish <126024920+swaminathanmanish@users.noreply.github.com> Date: Mon, 15 Apr 2024 14:43:18 -0700 Subject: [PATCH 013/102] Using local copy of segment instead of downloading from remote (#12863) --- ...aseMultipleSegmentsConversionExecutor.java | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/BaseMultipleSegmentsConversionExecutor.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/BaseMultipleSegmentsConversionExecutor.java index e7ef8a4eea66..9d9db049827b 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/BaseMultipleSegmentsConversionExecutor.java +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/BaseMultipleSegmentsConversionExecutor.java @@ -54,6 +54,7 @@ import org.apache.pinot.segment.spi.index.metadata.SegmentMetadataImpl; import org.apache.pinot.spi.auth.AuthProvider; import org.apache.pinot.spi.config.table.TableType; +import org.apache.pinot.spi.filesystem.LocalPinotFS; import org.apache.pinot.spi.filesystem.PinotFS; import org.apache.pinot.spi.ingestion.batch.BatchConfigProperties; import org.apache.pinot.spi.ingestion.batch.spec.PinotClusterSpec; @@ -78,6 +79,7 @@ public abstract class BaseMultipleSegmentsConversionExecutor extends BaseTaskExecutor { private static final Logger LOGGER = LoggerFactory.getLogger(BaseMultipleSegmentsConversionExecutor.class); private static final String CUSTOM_SEGMENT_UPLOAD_CONTEXT_LINEAGE_ENTRY_ID = "lineageEntryId"; + private static final PinotFS LOCAL_PINOT_FS = new LocalPinotFS(); private static final int DEFUALT_PUSH_ATTEMPTS = 5; private static final int DEFAULT_PUSH_PARALLELISM = 1; @@ -285,14 +287,11 @@ public List executeTask(PinotTaskConfig pinotTaskConfig String pushMode = configs.getOrDefault(BatchConfigProperties.PUSH_MODE, BatchConfigProperties.SegmentPushType.TAR.name()); - URI outputSegmentTarURI; if (BatchConfigProperties.SegmentPushType.valueOf(pushMode.toUpperCase()) != BatchConfigProperties.SegmentPushType.TAR) { - outputSegmentTarURI = moveSegmentToOutputPinotFS(configs, convertedTarredSegmentFile); + URI outputSegmentTarURI = moveSegmentToOutputPinotFS(configs, convertedTarredSegmentFile); LOGGER.info("Moved generated segment from [{}] to location: [{}]", convertedTarredSegmentFile, outputSegmentTarURI); - } else { - outputSegmentTarURI = convertedTarredSegmentFile.toURI(); } List
httpHeaders = new ArrayList<>(); @@ -316,7 +315,7 @@ public List executeTask(PinotTaskConfig pinotTaskConfig List parameters = Arrays.asList(enableParallelPushProtectionParameter, tableNameParameter, tableTypeParameter); - pushSegment(tableNameParameter.getValue(), configs, outputSegmentTarURI, httpHeaders, parameters, + pushSegment(tableNameParameter.getValue(), configs, convertedTarredSegmentFile.toURI(), httpHeaders, parameters, segmentConversionResult); if (!FileUtils.deleteQuietly(convertedTarredSegmentFile)) { LOGGER.warn("Failed to delete tarred converted segment: {}", convertedTarredSegmentFile.getAbsolutePath()); @@ -338,12 +337,12 @@ public List executeTask(PinotTaskConfig pinotTaskConfig } } - private void pushSegment(String tableName, Map taskConfigs, URI outputSegmentTarURI, + private void pushSegment(String tableName, Map taskConfigs, URI localSegmentTarURI, List
headers, List parameters, SegmentConversionResult segmentConversionResult) throws Exception { String pushMode = taskConfigs.getOrDefault(BatchConfigProperties.PUSH_MODE, BatchConfigProperties.SegmentPushType.TAR.name()); - LOGGER.info("Trying to push Pinot segment with push mode {} from {}", pushMode, outputSegmentTarURI); + LOGGER.info("Trying to push Pinot segment with push mode {} from {}", pushMode, localSegmentTarURI); PushJobSpec pushJobSpec = new PushJobSpec(); pushJobSpec.setPushAttempts(DEFUALT_PUSH_ATTEMPTS); @@ -356,7 +355,7 @@ private void pushSegment(String tableName, Map taskConfigs, URI switch (BatchConfigProperties.SegmentPushType.valueOf(pushMode.toUpperCase())) { case TAR: - File tarFile = new File(outputSegmentTarURI); + File tarFile = new File(localSegmentTarURI); String segmentName = segmentConversionResult.getSegmentName(); String tableNameWithType = segmentConversionResult.getTableNameWithType(); String uploadURL = taskConfigs.get(MinionConstants.UPLOAD_URL_KEY); @@ -366,12 +365,11 @@ private void pushSegment(String tableName, Map taskConfigs, URI case METADATA: if (taskConfigs.containsKey(BatchConfigProperties.OUTPUT_SEGMENT_DIR_URI)) { URI outputSegmentDirURI = URI.create(taskConfigs.get(BatchConfigProperties.OUTPUT_SEGMENT_DIR_URI)); - try (PinotFS outputFileFS = MinionTaskUtils.getOutputPinotFS(taskConfigs, outputSegmentDirURI)) { - Map segmentUriToTarPathMap = - SegmentPushUtils.getSegmentUriToTarPathMap(outputSegmentDirURI, pushJobSpec, - new String[]{outputSegmentTarURI.toString()}); - SegmentPushUtils.sendSegmentUriAndMetadata(spec, outputFileFS, segmentUriToTarPathMap, headers, parameters); - } + Map segmentUriToTarPathMap = + SegmentPushUtils.getSegmentUriToTarPathMap(outputSegmentDirURI, pushJobSpec, + new String[]{localSegmentTarURI.toString()}); + // Use local FS to avoid copying segment from deep store. + SegmentPushUtils.sendSegmentUriAndMetadata(spec, LOCAL_PINOT_FS, segmentUriToTarPathMap, headers, parameters); } else { throw new RuntimeException("Output dir URI missing for metadata push"); } From 2459cfce752aafd0ab6ff3d2f1eb98ac57b7efaf Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 15 Apr 2024 16:07:44 -0700 Subject: [PATCH 014/102] Bump slf4j.version from 2.0.12 to 2.0.13 (#12928) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 7f1819250b9a..22daf3bb303b 100644 --- a/pom.xml +++ b/pom.xml @@ -159,7 +159,7 @@ 1.8.0 0.20.0 2.23.1 - 2.0.12 + 2.0.13 4.1.109.Final 1.0.4 1.19.0 From 684cece35eb790b971f87952c2f2e260b0539c2e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 15 Apr 2024 16:07:59 -0700 Subject: [PATCH 015/102] Bump org.apache.maven.plugins:maven-jar-plugin from 3.3.0 to 3.4.0 (#12927) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 22daf3bb303b..7f8bbbb2ab68 100644 --- a/pom.xml +++ b/pom.xml @@ -130,7 +130,7 @@ org.apache.pinot.shaded - 3.3.0 + 3.4.0 1.11.3 1.13.1 From 013435a6a1ae883d6911ff8133a6111cec5c5b12 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 15 Apr 2024 16:08:56 -0700 Subject: [PATCH 016/102] Bump aws.sdk.version from 2.25.30 to 2.25.31 (#12926) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 7f8bbbb2ab68..0c51fca07432 100644 --- a/pom.xml +++ b/pom.xml @@ -168,7 +168,7 @@ 0.15.0 0.4.4 4.2.2 - 2.25.30 + 2.25.31 2.12.7 3.1.12 7.10.1 From edf9d535975426a106991e3f61cce29079b47539 Mon Sep 17 00:00:00 2001 From: "Xiaotian (Jackie) Jiang" <17555551+Jackie-Jiang@users.noreply.github.com> Date: Mon, 15 Apr 2024 16:58:11 -0700 Subject: [PATCH 017/102] Specify version for commons-validator (#12935) --- pom.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pom.xml b/pom.xml index 0c51fca07432..6636cdaa892c 100644 --- a/pom.xml +++ b/pom.xml @@ -187,6 +187,7 @@ 1.16.1 1.6.0 3.10.0 + 1.8.0 2.6 3.2.2 @@ -728,6 +729,11 @@ commons-net ${commons-net.version} + + commons-validator + commons-validator + ${commons-validator.version} + commons-lang From c08ba2ccd52db1643dbf9ff89a5bcfe89edb6da0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 15 Apr 2024 17:01:30 -0700 Subject: [PATCH 018/102] Bump org.apache.maven.scm:maven-scm-provider-gitexe from 2.0.1 to 2.1.0 (#12925) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 6636cdaa892c..0b27acf9478f 100644 --- a/pom.xml +++ b/pom.xml @@ -1825,7 +1825,7 @@ org.apache.maven.scm maven-scm-provider-gitexe - 2.0.1 + 2.1.0 From ec452a49f3c885308613bc45dfa44b48a16076ba Mon Sep 17 00:00:00 2001 From: "Xiaotian (Jackie) Jiang" <17555551+Jackie-Jiang@users.noreply.github.com> Date: Mon, 15 Apr 2024 17:02:18 -0700 Subject: [PATCH 019/102] Refine PeerServerSegmentFinder (#12933) --- .../utils/fetcher/BaseSegmentFetcher.java | 9 +- .../utils/fetcher/HttpSegmentFetcher.java | 28 ++-- .../core/util/PeerServerSegmentFinder.java | 101 +++++------- .../utils/fetcher/HttpSegmentFetcherTest.java | 152 ++++++------------ .../PinotLLCRealtimeSegmentManager.java | 3 +- .../PinotLLCRealtimeSegmentManagerTest.java | 91 ++++------- .../data/manager/BaseTableDataManager.java | 9 +- .../realtime/RealtimeTableDataManager.java | 28 ++-- .../manager/BaseTableDataManagerTest.java | 4 +- .../util/PeerServerSegmentFinderTest.java | 128 +++++++-------- .../retry/ExponentialBackoffRetryPolicy.java | 6 +- 11 files changed, 220 insertions(+), 339 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/utils/fetcher/BaseSegmentFetcher.java b/pinot-common/src/main/java/org/apache/pinot/common/utils/fetcher/BaseSegmentFetcher.java index d33c7ead4363..5fb82388f2b4 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/utils/fetcher/BaseSegmentFetcher.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/utils/fetcher/BaseSegmentFetcher.java @@ -42,13 +42,13 @@ public abstract class BaseSegmentFetcher implements SegmentFetcher { public static final String RETRY_DELAY_SCALE_FACTOR_CONFIG_KEY = "retry.delay.scale.factor"; public static final int DEFAULT_RETRY_COUNT = 3; public static final int DEFAULT_RETRY_WAIT_MS = 100; - public static final int DEFAULT_RETRY_DELAY_SCALE_FACTOR = 5; + public static final double DEFAULT_RETRY_DELAY_SCALE_FACTOR = 5; protected final Logger _logger = LoggerFactory.getLogger(getClass().getSimpleName()); protected int _retryCount; protected int _retryWaitMs; - protected int _retryDelayScaleFactor; + protected double _retryDelayScaleFactor; protected AuthProvider _authProvider; @Override @@ -58,9 +58,8 @@ public void init(PinotConfiguration config) { _retryDelayScaleFactor = config.getProperty(RETRY_DELAY_SCALE_FACTOR_CONFIG_KEY, DEFAULT_RETRY_DELAY_SCALE_FACTOR); _authProvider = AuthProviderUtils.extractAuthProvider(config, CommonConstants.KEY_OF_AUTH); doInit(config); - _logger - .info("Initialized with retryCount: {}, retryWaitMs: {}, retryDelayScaleFactor: {}", _retryCount, _retryWaitMs, - _retryDelayScaleFactor); + _logger.info("Initialized with retryCount: {}, retryWaitMs: {}, retryDelayScaleFactor: {}", _retryCount, + _retryWaitMs, _retryDelayScaleFactor); } /** diff --git a/pinot-common/src/main/java/org/apache/pinot/common/utils/fetcher/HttpSegmentFetcher.java b/pinot-common/src/main/java/org/apache/pinot/common/utils/fetcher/HttpSegmentFetcher.java index 170327dc5b14..6872ac771464 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/utils/fetcher/HttpSegmentFetcher.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/utils/fetcher/HttpSegmentFetcher.java @@ -44,23 +44,16 @@ public class HttpSegmentFetcher extends BaseSegmentFetcher { protected FileUploadDownloadClient _httpClient; - @Override - protected void doInit(PinotConfiguration config) { - _httpClient = new FileUploadDownloadClient(HttpClientConfig.newBuilder(config).build()); - } - - public HttpSegmentFetcher() { - } - @VisibleForTesting - protected HttpSegmentFetcher(FileUploadDownloadClient httpClient, PinotConfiguration config) { + void setHttpClient(FileUploadDownloadClient httpClient) { _httpClient = httpClient; - _retryCount = config.getProperty(RETRY_COUNT_CONFIG_KEY, DEFAULT_RETRY_COUNT); - _retryWaitMs = config.getProperty(RETRY_WAIT_MS_CONFIG_KEY, DEFAULT_RETRY_WAIT_MS); - _retryDelayScaleFactor = config.getProperty(RETRY_DELAY_SCALE_FACTOR_CONFIG_KEY, DEFAULT_RETRY_DELAY_SCALE_FACTOR); - _logger - .info("Initialized with retryCount: {}, retryWaitMs: {}, retryDelayScaleFactor: {}", _retryCount, _retryWaitMs, - _retryDelayScaleFactor); + } + + @Override + protected void doInit(PinotConfiguration config) { + if (_httpClient == null) { + _httpClient = new FileUploadDownloadClient(HttpClientConfig.newBuilder(config).build()); + } } @Override @@ -87,9 +80,8 @@ public void fetchSegmentToLocal(URI downloadURI, File dest) httpHeaders.add(new BasicHeader(HttpHeaders.HOST, hostName + ":" + port)); } int statusCode = _httpClient.downloadFile(uri, dest, _authProvider, httpHeaders); - _logger - .info("Downloaded segment from: {} to: {} of size: {}; Response status code: {}", uri, dest, dest.length(), - statusCode); + _logger.info("Downloaded segment from: {} to: {} of size: {}; Response status code: {}", uri, dest, + dest.length(), statusCode); return true; } catch (HttpErrorStatusException e) { int statusCode = e.getStatusCode(); diff --git a/pinot-common/src/main/java/org/apache/pinot/core/util/PeerServerSegmentFinder.java b/pinot-common/src/main/java/org/apache/pinot/core/util/PeerServerSegmentFinder.java index e2c9d509f60a..7f26d759352d 100644 --- a/pinot-common/src/main/java/org/apache/pinot/core/util/PeerServerSegmentFinder.java +++ b/pinot-common/src/main/java/org/apache/pinot/core/util/PeerServerSegmentFinder.java @@ -19,21 +19,19 @@ package org.apache.pinot.core.util; import java.net.URI; -import java.net.URISyntaxException; import java.util.ArrayList; import java.util.List; import java.util.Map; -import org.apache.commons.collections.ListUtils; import org.apache.helix.HelixAdmin; import org.apache.helix.HelixManager; import org.apache.helix.model.ExternalView; import org.apache.helix.model.InstanceConfig; -import org.apache.pinot.common.utils.LLCSegmentName; -import org.apache.pinot.common.utils.helix.HelixHelper; -import org.apache.pinot.spi.config.table.TableType; import org.apache.pinot.spi.utils.CommonConstants; +import org.apache.pinot.spi.utils.CommonConstants.Helix.Instance; +import org.apache.pinot.spi.utils.CommonConstants.Helix.StateModel.SegmentStateModel; +import org.apache.pinot.spi.utils.CommonConstants.Server; import org.apache.pinot.spi.utils.StringUtil; -import org.apache.pinot.spi.utils.builder.TableNameBuilder; +import org.apache.pinot.spi.utils.retry.AttemptsExceededException; import org.apache.pinot.spi.utils.retry.RetryPolicies; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -47,93 +45,74 @@ public class PeerServerSegmentFinder { private PeerServerSegmentFinder() { } - private static final Logger _logger = LoggerFactory.getLogger(PeerServerSegmentFinder.class); + private static final Logger LOGGER = LoggerFactory.getLogger(PeerServerSegmentFinder.class); private static final int MAX_NUM_ATTEMPTS = 5; private static final int INITIAL_DELAY_MS = 500; private static final double DELAY_SCALE_FACTOR = 2; /** - * - * @param segmentName - * @param downloadScheme Can be either http or https. - * @param helixManager - * @return a list of uri strings of the form http(s)://hostname:port/segments/tablenameWithType/segmentName - * for the servers hosting ONLINE segments; empty list if no such server found. + * Returns a list of URIs of the form 'http(s)://hostname:port/segments/tableNameWithType/segmentName' for the servers + * hosting ONLINE segments; empty list if no such server found. The download scheme can be either 'http' or 'https'. */ - public static List getPeerServerURIs(String segmentName, String downloadScheme, HelixManager helixManager) { - LLCSegmentName llcSegmentName = new LLCSegmentName(segmentName); - String tableNameWithType = - TableNameBuilder.forType(TableType.REALTIME).tableNameWithType(llcSegmentName.getTableName()); - return getPeerServerURIs(segmentName, downloadScheme, helixManager, tableNameWithType); - } - - public static List getPeerServerURIs(String segmentName, String downloadScheme, - HelixManager helixManager, String tableNameWithType) { + public static List getPeerServerURIs(HelixManager helixManager, String tableNameWithType, String segmentName, + String downloadScheme) { HelixAdmin helixAdmin = helixManager.getClusterManagmentTool(); String clusterName = helixManager.getClusterName(); - if (clusterName == null) { - _logger.error("ClusterName not found"); - return ListUtils.EMPTY_LIST; - } - final List onlineServerURIs = new ArrayList<>(); + List onlineServerURIs = new ArrayList<>(); try { RetryPolicies.exponentialBackoffRetryPolicy(MAX_NUM_ATTEMPTS, INITIAL_DELAY_MS, DELAY_SCALE_FACTOR) .attempt(() -> { - getOnlineServersFromExternalView(segmentName, downloadScheme, tableNameWithType, helixAdmin, clusterName, + getOnlineServersFromExternalView(helixAdmin, clusterName, tableNameWithType, segmentName, downloadScheme, onlineServerURIs); return !onlineServerURIs.isEmpty(); }); + } catch (AttemptsExceededException e) { + LOGGER.error("Failed to find ONLINE servers for segment: {} in table: {} after {} attempts", segmentName, + tableNameWithType, MAX_NUM_ATTEMPTS); } catch (Exception e) { - _logger.error("Failure in getting online servers for segment {}", segmentName, e); + LOGGER.error("Caught exception while getting peer server URIs for segment: {} in table: {}", segmentName, + tableNameWithType, e); } return onlineServerURIs; } - private static void getOnlineServersFromExternalView(String segmentName, String downloadScheme, - String tableNameWithType, HelixAdmin helixAdmin, String clusterName, List onlineServerURIs) { - ExternalView externalViewForResource = - HelixHelper.getExternalViewForResource(helixAdmin, clusterName, tableNameWithType); - if (externalViewForResource == null) { - _logger.warn("External View not found for table {}", tableNameWithType); + private static void getOnlineServersFromExternalView(HelixAdmin helixAdmin, String clusterName, + String tableNameWithType, String segmentName, String downloadScheme, List onlineServerURIs) + throws Exception { + ExternalView externalView = helixAdmin.getResourceExternalView(clusterName, tableNameWithType); + if (externalView == null) { + LOGGER.warn("Failed to find external view for table: {}", tableNameWithType); return; } // Find out the ONLINE servers serving the segment. - Map instanceToStateMap = externalViewForResource.getStateMap(segmentName); - for (Map.Entry instanceState : instanceToStateMap.entrySet()) { - if ("ONLINE".equals(instanceState.getValue())) { + Map instanceStateMap = externalView.getStateMap(segmentName); + if (instanceStateMap == null) { + LOGGER.warn("Failed to find segment: {} in table: {}", segmentName, tableNameWithType); + return; + } + for (Map.Entry instanceState : instanceStateMap.entrySet()) { + if (SegmentStateModel.ONLINE.equals(instanceState.getValue())) { String instanceId = instanceState.getKey(); - _logger.info("Found ONLINE server {} for segment {}.", instanceId, segmentName); + LOGGER.info("Found ONLINE server: {} for segment: {} in table: {}", instanceId, segmentName, tableNameWithType); InstanceConfig instanceConfig = helixAdmin.getInstanceConfig(clusterName, instanceId); String hostName = instanceConfig.getHostName(); - int port = getServerAdminPort(helixAdmin, clusterName, instanceId, downloadScheme); - try { - onlineServerURIs.add(new URI(StringUtil - .join("/", downloadScheme + "://" + hostName + ":" + port, "segments", tableNameWithType, segmentName))); - } catch (URISyntaxException e) { - _logger.warn("Error in uri syntax: ", e); - } + String adminPortKey = getAdminPortKey(downloadScheme); + int port = instanceConfig.getRecord().getIntField(adminPortKey, Server.DEFAULT_ADMIN_API_PORT); + onlineServerURIs.add(new URI( + StringUtil.join("/", downloadScheme + "://" + hostName + ":" + port, "segments", tableNameWithType, + segmentName))); } } } - private static int getServerAdminPort(HelixAdmin helixAdmin, String clusterName, String instanceId, - String downloadScheme) { - try { - return Integer.parseInt(HelixHelper.getInstanceConfigsMapFor(instanceId, clusterName, helixAdmin) - .get(getServerAdminPortKey(downloadScheme))); - } catch (Exception e) { - _logger.warn("Failed to retrieve ADMIN PORT for instanceId {} in the cluster {} ", instanceId, clusterName, e); - return CommonConstants.Helix.DEFAULT_SERVER_NETTY_PORT; - } - } - - private static String getServerAdminPortKey(String downloadScheme) { + private static String getAdminPortKey(String downloadScheme) { switch (downloadScheme) { - case CommonConstants.HTTPS_PROTOCOL: - return CommonConstants.Helix.Instance.ADMIN_HTTPS_PORT_KEY; case CommonConstants.HTTP_PROTOCOL: + return Instance.ADMIN_PORT_KEY; + case CommonConstants.HTTPS_PROTOCOL: + return Instance.ADMIN_HTTPS_PORT_KEY; default: - return CommonConstants.Helix.Instance.ADMIN_PORT_KEY; + throw new IllegalArgumentException("Unsupported download scheme: " + downloadScheme); } } } diff --git a/pinot-common/src/test/java/org/apache/pinot/common/utils/fetcher/HttpSegmentFetcherTest.java b/pinot-common/src/test/java/org/apache/pinot/common/utils/fetcher/HttpSegmentFetcherTest.java index 3159168dab95..1a567901b947 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/utils/fetcher/HttpSegmentFetcherTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/utils/fetcher/HttpSegmentFetcherTest.java @@ -19,153 +19,97 @@ package org.apache.pinot.common.utils.fetcher; import java.io.File; -import java.io.IOException; import java.net.URI; -import java.net.URISyntaxException; -import java.util.ArrayList; import java.util.List; -import org.apache.helix.HelixManager; -import org.apache.pinot.common.exception.HttpErrorStatusException; +import java.util.function.Supplier; +import org.apache.commons.io.FileUtils; import org.apache.pinot.common.utils.FileUploadDownloadClient; -import org.apache.pinot.core.util.PeerServerSegmentFinder; import org.apache.pinot.spi.env.PinotConfiguration; import org.apache.pinot.spi.utils.retry.AttemptsExceededException; -import org.mockito.MockedStatic; -import org.testng.Assert; -import org.testng.annotations.BeforeSuite; +import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.mockStatic; import static org.mockito.Mockito.when; public class HttpSegmentFetcherTest { - private MockedStatic _peerServerSegmentFinder = mockStatic(PeerServerSegmentFinder.class); + private static final String SEGMENT_NAME = "testSegment"; + private static final File SEGMENT_FILE = new File(FileUtils.getTempDirectory(), SEGMENT_NAME); + private PinotConfiguration _fetcherConfig; - @BeforeSuite - public void initTest() { + @BeforeClass + public void setUp() { _fetcherConfig = new PinotConfiguration(); _fetcherConfig.setProperty(BaseSegmentFetcher.RETRY_COUNT_CONFIG_KEY, 3); + _fetcherConfig.setProperty(BaseSegmentFetcher.RETRY_WAIT_MS_CONFIG_KEY, 10); + _fetcherConfig.setProperty(BaseSegmentFetcher.RETRY_DELAY_SCALE_FACTOR_CONFIG_KEY, 1.1); + } + + private HttpSegmentFetcher getSegmentFetcher(FileUploadDownloadClient client) { + HttpSegmentFetcher segmentFetcher = new HttpSegmentFetcher(); + segmentFetcher.setHttpClient(client); + segmentFetcher.init(_fetcherConfig); + return segmentFetcher; } @Test public void testFetchSegmentToLocalSucceedAtFirstAttempt() - throws URISyntaxException, IOException, HttpErrorStatusException { + throws Exception { FileUploadDownloadClient client = mock(FileUploadDownloadClient.class); when(client.downloadFile(any(), any(), any())).thenReturn(200); - HttpSegmentFetcher httpSegmentFetcher = new HttpSegmentFetcher(client, _fetcherConfig); - HelixManager helixManager = mock(HelixManager.class); - - List uris = new ArrayList<>(); - uris.add(new URI("http://h1:8080")); - uris.add(new URI("http://h2:8080")); - _peerServerSegmentFinder.when(() -> PeerServerSegmentFinder.getPeerServerURIs(any(), any(), any())) - .thenReturn(uris); - try { - httpSegmentFetcher.fetchSegmentToLocal("seg", - () -> PeerServerSegmentFinder.getPeerServerURIs("seg", "http", helixManager), new File("/file")); - } catch (Exception e) { - // If we reach here, the download fails. - Assert.assertTrue(false, "Download segment failed"); - Assert.assertTrue(e instanceof AttemptsExceededException); - } - _peerServerSegmentFinder.reset(); + HttpSegmentFetcher segmentFetcher = getSegmentFetcher(client); + List uris = List.of(new URI("http://h1:8080"), new URI("http://h2:8080")); + segmentFetcher.fetchSegmentToLocal(SEGMENT_NAME, () -> uris, SEGMENT_FILE); } - @Test + @Test(expectedExceptions = AttemptsExceededException.class) public void testFetchSegmentToLocalAllDownloadAttemptsFailed() - throws URISyntaxException, IOException, HttpErrorStatusException { + throws Exception { FileUploadDownloadClient client = mock(FileUploadDownloadClient.class); - // All three attempts fails. - when(client.downloadFile(any(), any(), any())).thenReturn(300).thenReturn(300).thenReturn(300); - HttpSegmentFetcher httpSegmentFetcher = new HttpSegmentFetcher(client, _fetcherConfig); - HelixManager helixManager = mock(HelixManager.class); - List uris = new ArrayList<>(); - uris.add(new URI("http://h1:8080")); - uris.add(new URI("http://h2:8080")); - - _peerServerSegmentFinder.when(() -> PeerServerSegmentFinder.getPeerServerURIs(any(), any(), any())) - .thenReturn(uris); - try { - httpSegmentFetcher.fetchSegmentToLocal("seg", - () -> PeerServerSegmentFinder.getPeerServerURIs("seg", "http", helixManager), new File("/file")); - // The test should not reach here because the fetch will throw exception. - Assert.assertTrue(false, "Download segment failed"); - } catch (Exception e) { - // If we reach here, the download fails. - Assert.assertTrue(true, "Download segment failed"); - } + // All attempts failed + when(client.downloadFile(any(), any(), any())).thenReturn(300); + HttpSegmentFetcher segmentFetcher = getSegmentFetcher(client); + List uris = List.of(new URI("http://h1:8080"), new URI("http://h2:8080")); + segmentFetcher.fetchSegmentToLocal(SEGMENT_NAME, () -> uris, SEGMENT_FILE); } @Test public void testFetchSegmentToLocalSuccessAfterRetry() - throws URISyntaxException, IOException, HttpErrorStatusException { + throws Exception { FileUploadDownloadClient client = mock(FileUploadDownloadClient.class); - // the first two attempts failed until the last attempt succeeds + // The first two attempts failed and the last attempt succeeded when(client.downloadFile(any(), any(), any())).thenReturn(300).thenReturn(300).thenReturn(200); - HttpSegmentFetcher httpSegmentFetcher = new HttpSegmentFetcher(client, _fetcherConfig); - HelixManager helixManager = mock(HelixManager.class); - List uris = new ArrayList<>(); - uris.add(new URI("http://h1:8080")); - uris.add(new URI("http://h2:8080")); - - _peerServerSegmentFinder.when(() -> PeerServerSegmentFinder.getPeerServerURIs(any(), any(), any())) - .thenReturn(uris); - try { - httpSegmentFetcher.fetchSegmentToLocal("seg", - () -> PeerServerSegmentFinder.getPeerServerURIs("seg", "http", helixManager), new File("/file")); - } catch (Exception e) { - // If we reach here, the download fails. - Assert.assertTrue(false, "Download segment failed"); - } + HttpSegmentFetcher segmentFetcher = getSegmentFetcher(client); + List uris = List.of(new URI("http://h1:8080"), new URI("http://h2:8080")); + segmentFetcher.fetchSegmentToLocal(SEGMENT_NAME, () -> uris, SEGMENT_FILE); } @Test public void testFetchSegmentToLocalSuccessAfterFirstTwoAttemptsFoundNoPeerServers() - throws URISyntaxException, IOException, HttpErrorStatusException { + throws Exception { FileUploadDownloadClient client = mock(FileUploadDownloadClient.class); - // The download always succeeds. + // The download always succeeds when(client.downloadFile(any(), any(), any())).thenReturn(200); - HttpSegmentFetcher httpSegmentFetcher = new HttpSegmentFetcher(client, _fetcherConfig); - HelixManager helixManager = mock(HelixManager.class); - List uris = new ArrayList<>(); - uris.add(new URI("http://h1:8080")); - uris.add(new URI("http://h2:8080")); - - // The first two attempts find NO peers hosting the segment but the last one found two servers. - _peerServerSegmentFinder.when(() -> PeerServerSegmentFinder.getPeerServerURIs(any(), any(), any())) - .thenReturn(List.of()).thenReturn(List.of()).thenReturn(uris); - try { - httpSegmentFetcher.fetchSegmentToLocal("seg", - () -> PeerServerSegmentFinder.getPeerServerURIs("seg", "http", helixManager), new File("/file")); - } catch (Exception e) { - // If we reach here, the download fails. - Assert.assertTrue(false, "Download segment failed"); - } + HttpSegmentFetcher segmentFetcher = getSegmentFetcher(client); + List uris = List.of(new URI("http://h1:8080"), new URI("http://h2:8080")); + // The first two attempts found NO peers hosting the segment, and the last one found two servers + //noinspection unchecked + Supplier> uriSupplier = mock(Supplier.class); + when(uriSupplier.get()).thenReturn(List.of()).thenReturn(List.of()).thenReturn(uris); + segmentFetcher.fetchSegmentToLocal(SEGMENT_NAME, uriSupplier, SEGMENT_FILE); } - @Test + @Test(expectedExceptions = AttemptsExceededException.class) public void testFetchSegmentToLocalFailureWithNoPeerServers() - throws IOException, HttpErrorStatusException { + throws Exception { FileUploadDownloadClient client = mock(FileUploadDownloadClient.class); - // the download always succeeds. + // The download always succeeds when(client.downloadFile(any(), any(), any())).thenReturn(200); - HttpSegmentFetcher httpSegmentFetcher = new HttpSegmentFetcher(client, _fetcherConfig); - HelixManager helixManager = mock(HelixManager.class); - - _peerServerSegmentFinder.when(() -> PeerServerSegmentFinder.getPeerServerURIs(any(), any(), any())) - .thenReturn(List.of()).thenReturn(List.of()).thenReturn(List.of()); - try { - httpSegmentFetcher.fetchSegmentToLocal("seg", - () -> PeerServerSegmentFinder.getPeerServerURIs("seg", "http", helixManager), new File("/file")); - // The test should not reach here because the fetch will throw exception. - Assert.assertTrue(false, "Download segment failed"); - } catch (Exception e) { - Assert.assertTrue(true, "Download segment failed"); - Assert.assertTrue(e instanceof AttemptsExceededException); - } + HttpSegmentFetcher segmentFetcher = getSegmentFetcher(client); + List uris = List.of(); + segmentFetcher.fetchSegmentToLocal(SEGMENT_NAME, () -> uris, SEGMENT_FILE); } } diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java index 25e40084ab39..838a03a268a4 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java @@ -1483,7 +1483,8 @@ public void uploadToDeepStoreIfMissing(TableConfig tableConfig, List peerSegmentURIs = - PeerServerSegmentFinder.getPeerServerURIs(segmentName, CommonConstants.HTTP_PROTOCOL, _helixManager); + PeerServerSegmentFinder.getPeerServerURIs(_helixManager, realtimeTableName, segmentName, + CommonConstants.HTTP_PROTOCOL); if (peerSegmentURIs.isEmpty()) { throw new IllegalStateException( String.format("Failed to upload segment %s to deep store because no online replica is found", diff --git a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java index 60b83ba24ae1..f0496a8ee7e2 100644 --- a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java +++ b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java @@ -43,7 +43,6 @@ import org.apache.helix.HelixAdmin; import org.apache.helix.HelixManager; import org.apache.helix.model.ExternalView; -import org.apache.helix.model.HelixConfigScope; import org.apache.helix.model.IdealState; import org.apache.helix.model.InstanceConfig; import org.apache.helix.store.zk.ZkHelixPropertyStore; @@ -75,10 +74,10 @@ import org.apache.pinot.spi.stream.StreamConfig; import org.apache.pinot.spi.utils.CommonConstants; import org.apache.pinot.spi.utils.CommonConstants.Helix; +import org.apache.pinot.spi.utils.CommonConstants.Helix.Instance; import org.apache.pinot.spi.utils.CommonConstants.Helix.StateModel.SegmentStateModel; import org.apache.pinot.spi.utils.CommonConstants.Segment.Realtime.Status; import org.apache.pinot.spi.utils.IngestionConfigUtils; -import org.apache.pinot.spi.utils.StringUtil; import org.apache.pinot.spi.utils.builder.TableConfigBuilder; import org.apache.pinot.spi.utils.builder.TableNameBuilder; import org.apache.pinot.util.TestUtils; @@ -91,8 +90,6 @@ import static org.apache.pinot.controller.ControllerConf.ControllerPeriodicTasksConf.ENABLE_TMP_SEGMENT_ASYNC_DELETION; import static org.apache.pinot.controller.ControllerConf.ControllerPeriodicTasksConf.TMP_SEGMENT_RETENTION_IN_SECONDS; import static org.apache.pinot.spi.utils.CommonConstants.Segment.METADATA_URI_FOR_PEER_DOWNLOAD; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import static org.testng.Assert.*; @@ -101,6 +98,7 @@ public class PinotLLCRealtimeSegmentManagerTest { private static final File TEMP_DIR = new File(FileUtils.getTempDirectory(), "PinotLLCRealtimeSegmentManagerTest"); private static final String SCHEME = "file:"; + private static final String CLUSTER_NAME = "testCluster"; private static final String RAW_TABLE_NAME = "testTable"; private static final String REALTIME_TABLE_NAME = TableNameBuilder.REALTIME.tableNameWithType(RAW_TABLE_NAME); @@ -927,13 +925,13 @@ public void testUploadToSegmentStore() (ZkHelixPropertyStore) mock(ZkHelixPropertyStore.class); when(pinotHelixResourceManager.getHelixZkManager()).thenReturn(helixManager); when(helixManager.getClusterManagmentTool()).thenReturn(helixAdmin); - when(helixManager.getClusterName()).thenReturn("cluster_name"); + when(helixManager.getClusterName()).thenReturn(CLUSTER_NAME); when(pinotHelixResourceManager.getPropertyStore()).thenReturn(zkHelixPropertyStore); // init fake PinotLLCRealtimeSegmentManager ControllerConf controllerConfig = new ControllerConf(); - controllerConfig.setProperty( - ControllerConf.ControllerPeriodicTasksConf.ENABLE_DEEP_STORE_RETRY_UPLOAD_LLC_SEGMENT, true); + controllerConfig.setProperty(ControllerConf.ControllerPeriodicTasksConf.ENABLE_DEEP_STORE_RETRY_UPLOAD_LLC_SEGMENT, + true); controllerConfig.setDataDir(TEMP_DIR.toString()); FakePinotLLCRealtimeSegmentManager segmentManager = new FakePinotLLCRealtimeSegmentManager(pinotHelixResourceManager, controllerConfig); @@ -946,19 +944,12 @@ public void testUploadToSegmentStore() segmentsValidationAndRetentionConfig.setRetentionTimeUnit(TimeUnit.DAYS.toString()); segmentsValidationAndRetentionConfig.setRetentionTimeValue("3"); segmentManager._tableConfig.setValidationConfig(segmentsValidationAndRetentionConfig); - List segmentsZKMetadata = - new ArrayList<>(segmentManager._segmentZKMetadataMap.values()); + List segmentsZKMetadata = new ArrayList<>(segmentManager._segmentZKMetadataMap.values()); Assert.assertEquals(segmentsZKMetadata.size(), 5); // Set up external view for this table ExternalView externalView = new ExternalView(REALTIME_TABLE_NAME); - when(helixAdmin.getResourceExternalView("cluster_name", REALTIME_TABLE_NAME)) - .thenReturn(externalView); - when(helixAdmin.getConfigKeys(any(HelixConfigScope.class))).thenReturn(new ArrayList<>()); - String adminPort = "2077"; - Map instanceConfigMap = new HashMap<>(); - instanceConfigMap.put(CommonConstants.Helix.Instance.ADMIN_PORT_KEY, adminPort); - when(helixAdmin.getConfig(any(HelixConfigScope.class), any(List.class))).thenReturn(instanceConfigMap); + when(helixAdmin.getResourceExternalView(CLUSTER_NAME, REALTIME_TABLE_NAME)).thenReturn(externalView); // Change 1st segment status to be DONE, but with default peer download url. // Verify later the download url is fixed after upload success. @@ -966,28 +957,26 @@ public void testUploadToSegmentStore() segmentsZKMetadata.get(0).setDownloadUrl(METADATA_URI_FOR_PEER_DOWNLOAD); // set up the external view for 1st segment String instance0 = "instance0"; + int adminPort = 2077; externalView.setState(segmentsZKMetadata.get(0).getSegmentName(), instance0, "ONLINE"); InstanceConfig instanceConfig0 = new InstanceConfig(instance0); instanceConfig0.setHostName(instance0); - when(helixAdmin.getInstanceConfig(any(String.class), eq(instance0))).thenReturn(instanceConfig0); + instanceConfig0.getRecord().setIntField(Instance.ADMIN_PORT_KEY, adminPort); + when(helixAdmin.getInstanceConfig(CLUSTER_NAME, instance0)).thenReturn(instanceConfig0); // mock the request/response for 1st segment upload - String serverUploadRequestUrl0 = StringUtil - .join("/", - CommonConstants.HTTP_PROTOCOL + "://" + instance0 + ":" + adminPort, - "segments", - REALTIME_TABLE_NAME, - segmentsZKMetadata.get(0).getSegmentName(), - "upload") + "?uploadTimeoutMs=-1"; + String serverUploadRequestUrl0 = + String.format("http://%s:%d/segments/%s/%s/upload?uploadTimeoutMs=-1", instance0, adminPort, + REALTIME_TABLE_NAME, segmentsZKMetadata.get(0).getSegmentName()); // tempSegmentFileLocation is the location where the segment uploader will upload the segment. This usually ends // with a random UUID File tempSegmentFileLocation = new File(TEMP_DIR, segmentsZKMetadata.get(0).getSegmentName() + UUID.randomUUID()); FileUtils.write(tempSegmentFileLocation, "test"); // After the deep-store retry task gets the segment location returned by Pinot server, it will move the segment to // its final location. This is the expected segment location. - String expectedSegmentLocation = segmentManager.createSegmentPath(RAW_TABLE_NAME, - segmentsZKMetadata.get(0).getSegmentName()).toString(); - when(segmentManager._mockedFileUploadDownloadClient - .uploadToSegmentStore(serverUploadRequestUrl0)).thenReturn(tempSegmentFileLocation.getPath()); + String expectedSegmentLocation = + segmentManager.createSegmentPath(RAW_TABLE_NAME, segmentsZKMetadata.get(0).getSegmentName()).toString(); + when(segmentManager._mockedFileUploadDownloadClient.uploadToSegmentStore(serverUploadRequestUrl0)).thenReturn( + tempSegmentFileLocation.getPath()); // Change 2nd segment status to be DONE, but with default peer download url. // Verify later the download url isn't fixed after upload failure. @@ -998,25 +987,20 @@ public void testUploadToSegmentStore() externalView.setState(segmentsZKMetadata.get(1).getSegmentName(), instance1, "ONLINE"); InstanceConfig instanceConfig1 = new InstanceConfig(instance1); instanceConfig1.setHostName(instance1); - when(helixAdmin.getInstanceConfig(any(String.class), eq(instance1))).thenReturn(instanceConfig1); + instanceConfig1.getRecord().setIntField(Instance.ADMIN_PORT_KEY, adminPort); + when(helixAdmin.getInstanceConfig(CLUSTER_NAME, instance1)).thenReturn(instanceConfig1); // mock the request/response for 2nd segment upload - String serverUploadRequestUrl1 = StringUtil - .join("/", - CommonConstants.HTTP_PROTOCOL + "://" + instance1 + ":" + adminPort, - "segments", - REALTIME_TABLE_NAME, - segmentsZKMetadata.get(1).getSegmentName(), - "upload") + "?uploadTimeoutMs=-1"; - when(segmentManager._mockedFileUploadDownloadClient - .uploadToSegmentStore(serverUploadRequestUrl1)) - .thenThrow(new HttpErrorStatusException( - "failed to upload segment", Response.Status.INTERNAL_SERVER_ERROR.getStatusCode())); + String serverUploadRequestUrl1 = + String.format("http://%s:%d/segments/%s/%s/upload?uploadTimeoutMs=-1", instance1, adminPort, + REALTIME_TABLE_NAME, segmentsZKMetadata.get(1).getSegmentName()); + when(segmentManager._mockedFileUploadDownloadClient.uploadToSegmentStore(serverUploadRequestUrl1)).thenThrow( + new HttpErrorStatusException("failed to upload segment", + Response.Status.INTERNAL_SERVER_ERROR.getStatusCode())); // Change 3rd segment status to be DONE, but with default peer download url. // Verify later the download url isn't fixed because no ONLINE replica found in any server. segmentsZKMetadata.get(2).setStatus(Status.DONE); - segmentsZKMetadata.get(2).setDownloadUrl( - METADATA_URI_FOR_PEER_DOWNLOAD); + segmentsZKMetadata.get(2).setDownloadUrl(METADATA_URI_FOR_PEER_DOWNLOAD); // set up the external view for 3rd segment String instance2 = "instance2"; externalView.setState(segmentsZKMetadata.get(2).getSegmentName(), instance2, "OFFLINE"); @@ -1029,11 +1013,9 @@ public void testUploadToSegmentStore() // Keep 5th segment status as IN_PROGRESS. - List segmentNames = segmentsZKMetadata.stream() - .map(SegmentZKMetadata::getSegmentName).collect(Collectors.toList()); - when(pinotHelixResourceManager.getTableConfig(REALTIME_TABLE_NAME)) - .thenReturn(segmentManager._tableConfig); - + List segmentNames = + segmentsZKMetadata.stream().map(SegmentZKMetadata::getSegmentName).collect(Collectors.toList()); + when(pinotHelixResourceManager.getTableConfig(REALTIME_TABLE_NAME)).thenReturn(segmentManager._tableConfig); // Verify the result segmentManager.uploadToDeepStoreIfMissing(segmentManager._tableConfig, segmentsZKMetadata); @@ -1042,23 +1024,18 @@ public void testUploadToSegmentStore() TestUtils.waitForCondition(aVoid -> segmentManager.deepStoreUploadExecutorPendingSegmentsIsEmpty(), 30_000L, "Timed out waiting for upload retry tasks to finish"); - assertEquals( - segmentManager.getSegmentZKMetadata(REALTIME_TABLE_NAME, segmentNames.get(0), null).getDownloadUrl(), + assertEquals(segmentManager.getSegmentZKMetadata(REALTIME_TABLE_NAME, segmentNames.get(0), null).getDownloadUrl(), expectedSegmentLocation); assertFalse(tempSegmentFileLocation.exists(), "Deep-store retry task should move the file from temp location to permanent location"); - assertEquals( - segmentManager.getSegmentZKMetadata(REALTIME_TABLE_NAME, segmentNames.get(1), null).getDownloadUrl(), + assertEquals(segmentManager.getSegmentZKMetadata(REALTIME_TABLE_NAME, segmentNames.get(1), null).getDownloadUrl(), METADATA_URI_FOR_PEER_DOWNLOAD); - assertEquals( - segmentManager.getSegmentZKMetadata(REALTIME_TABLE_NAME, segmentNames.get(2), null).getDownloadUrl(), + assertEquals(segmentManager.getSegmentZKMetadata(REALTIME_TABLE_NAME, segmentNames.get(2), null).getDownloadUrl(), METADATA_URI_FOR_PEER_DOWNLOAD); - assertEquals( - segmentManager.getSegmentZKMetadata(REALTIME_TABLE_NAME, segmentNames.get(3), null).getDownloadUrl(), + assertEquals(segmentManager.getSegmentZKMetadata(REALTIME_TABLE_NAME, segmentNames.get(3), null).getDownloadUrl(), defaultDownloadUrl); - assertNull( - segmentManager.getSegmentZKMetadata(REALTIME_TABLE_NAME, segmentNames.get(4), null).getDownloadUrl()); + assertNull(segmentManager.getSegmentZKMetadata(REALTIME_TABLE_NAME, segmentNames.get(4), null).getDownloadUrl()); } @Test diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/BaseTableDataManager.java b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/BaseTableDataManager.java index c46a85690dc5..1237db547a08 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/BaseTableDataManager.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/BaseTableDataManager.java @@ -153,6 +153,13 @@ public void init(InstanceDataManagerConfig instanceDataManagerConfig, TableConfi if (_peerDownloadScheme == null) { _peerDownloadScheme = instanceDataManagerConfig.getSegmentPeerDownloadScheme(); } + if (_peerDownloadScheme != null) { + _peerDownloadScheme = _peerDownloadScheme.toLowerCase(); + Preconditions.checkState( + CommonConstants.HTTP_PROTOCOL.equals(_peerDownloadScheme) || CommonConstants.HTTPS_PROTOCOL.equals( + _peerDownloadScheme), "Unsupported peer download scheme: %s for table: %s", _peerDownloadScheme, + _tableNameWithType); + } _streamSegmentDownloadUntarRateLimitBytesPerSec = instanceDataManagerConfig.getStreamSegmentDownloadUntarRateLimit(); @@ -691,7 +698,7 @@ protected void downloadFromPeersWithoutStreaming(String segmentName, SegmentZKMe throws Exception { Preconditions.checkState(_peerDownloadScheme != null, "Download peers require non null peer download scheme"); List peerSegmentURIs = - PeerServerSegmentFinder.getPeerServerURIs(segmentName, _peerDownloadScheme, _helixManager, _tableNameWithType); + PeerServerSegmentFinder.getPeerServerURIs(_helixManager, _tableNameWithType, segmentName, _peerDownloadScheme); if (peerSegmentURIs.isEmpty()) { String msg = String.format("segment %s doesn't have any peers", segmentName); LOGGER.warn(msg); diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java index 8e50049028e9..b120867d6bc3 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java @@ -635,17 +635,15 @@ void downloadAndReplaceSegment(String segmentName, SegmentZKMetadata segmentZKMe } catch (Exception e) { _logger.warn("Download segment {} from deepstore uri {} failed.", segmentName, uri, e); // Download from deep store failed; try to download from peer if peer download is setup for the table. - if (isPeerSegmentDownloadEnabled(tableConfig)) { - downloadSegmentFromPeer(segmentName, tableConfig.getValidationConfig().getPeerSegmentDownloadScheme(), - indexLoadingConfig); + if (_peerDownloadScheme != null) { + downloadSegmentFromPeer(segmentName, indexLoadingConfig); } else { throw e; } } } else { - if (isPeerSegmentDownloadEnabled(tableConfig)) { - downloadSegmentFromPeer(segmentName, tableConfig.getValidationConfig().getPeerSegmentDownloadScheme(), - indexLoadingConfig); + if (_peerDownloadScheme != null) { + downloadSegmentFromPeer(segmentName, indexLoadingConfig); } else { throw new RuntimeException("Peer segment download not enabled for segment " + segmentName); } @@ -687,23 +685,16 @@ private void untarAndMoveSegment(String segmentName, IndexLoadingConfig indexLoa replaceLLSegment(segmentName, indexLoadingConfig); } - private boolean isPeerSegmentDownloadEnabled(TableConfig tableConfig) { - return - CommonConstants.HTTP_PROTOCOL.equalsIgnoreCase(tableConfig.getValidationConfig().getPeerSegmentDownloadScheme()) - || CommonConstants.HTTPS_PROTOCOL.equalsIgnoreCase( - tableConfig.getValidationConfig().getPeerSegmentDownloadScheme()); - } - - private void downloadSegmentFromPeer(String segmentName, String downloadScheme, - IndexLoadingConfig indexLoadingConfig) { + private void downloadSegmentFromPeer(String segmentName, IndexLoadingConfig indexLoadingConfig) { File tempRootDir = null; try { tempRootDir = getTmpSegmentDataDir("tmp-" + segmentName + "." + System.currentTimeMillis()); File segmentTarFile = new File(tempRootDir, segmentName + TarGzCompressionUtils.TAR_GZ_FILE_EXTENSION); // Next download the segment from a randomly chosen server using configured download scheme (http or https). - SegmentFetcherFactory.getSegmentFetcher(downloadScheme).fetchSegmentToLocal(segmentName, () -> { + SegmentFetcherFactory.getSegmentFetcher(_peerDownloadScheme).fetchSegmentToLocal(segmentName, () -> { List peerServerURIs = - PeerServerSegmentFinder.getPeerServerURIs(segmentName, downloadScheme, _helixManager); + PeerServerSegmentFinder.getPeerServerURIs(_helixManager, _tableNameWithType, segmentName, + _peerDownloadScheme); Collections.shuffle(peerServerURIs); return peerServerURIs; }, segmentTarFile); @@ -711,7 +702,8 @@ private void downloadSegmentFromPeer(String segmentName, String downloadScheme, segmentTarFile.length()); untarAndMoveSegment(segmentName, indexLoadingConfig, segmentTarFile, tempRootDir); } catch (Exception e) { - _logger.warn("Download and move segment {} from peer with scheme {} failed.", segmentName, downloadScheme, e); + _logger.warn("Download and move segment {} from peer with scheme {} failed.", segmentName, _peerDownloadScheme, + e); throw new RuntimeException(e); } finally { FileUtils.deleteQuietly(tempRootDir); diff --git a/pinot-core/src/test/java/org/apache/pinot/core/data/manager/BaseTableDataManagerTest.java b/pinot-core/src/test/java/org/apache/pinot/core/data/manager/BaseTableDataManagerTest.java index d4c5f4fc298c..261fe0f23885 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/data/manager/BaseTableDataManagerTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/data/manager/BaseTableDataManagerTest.java @@ -660,8 +660,8 @@ public void testDownloadFromPeersWithoutStreaming() File destFile = new File(tempRootDir, "seg01" + TarGzCompressionUtils.TAR_GZ_FILE_EXTENSION); try (MockedStatic mockPeerSegFinder = mockStatic(PeerServerSegmentFinder.class)) { mockPeerSegFinder.when( - () -> PeerServerSegmentFinder.getPeerServerURIs("seg01", "http", helixManager, TABLE_NAME_WITH_TYPE)) - .thenReturn(Collections.singletonList(uri)); + () -> PeerServerSegmentFinder.getPeerServerURIs(helixManager, TABLE_NAME_WITH_TYPE, "seg01", + CommonConstants.HTTP_PROTOCOL)).thenReturn(List.of(uri)); tmgr.downloadFromPeersWithoutStreaming("seg01", mock(SegmentZKMetadata.class), destFile); } assertEquals(FileUtils.readFileToString(destFile), "this is from somewhere remote"); diff --git a/pinot-core/src/test/java/org/apache/pinot/core/util/PeerServerSegmentFinderTest.java b/pinot-core/src/test/java/org/apache/pinot/core/util/PeerServerSegmentFinderTest.java index 4b6c6fb910c3..2af972695ffa 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/util/PeerServerSegmentFinderTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/util/PeerServerSegmentFinderTest.java @@ -19,103 +19,93 @@ package org.apache.pinot.core.util; import java.net.URI; -import java.util.ArrayList; -import java.util.HashMap; import java.util.List; -import java.util.Map; import org.apache.helix.HelixAdmin; import org.apache.helix.HelixManager; import org.apache.helix.model.ExternalView; -import org.apache.helix.model.HelixConfigScope; import org.apache.helix.model.InstanceConfig; import org.apache.pinot.spi.utils.CommonConstants; -import org.apache.pinot.spi.utils.StringUtil; -import org.testng.Assert; +import org.apache.pinot.spi.utils.CommonConstants.Helix.Instance; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; public class PeerServerSegmentFinderTest { - private static final String TABLE_NAME_WITH_TYPE = "testTable_REALTIME"; - private static final String SEGMENT_1 = "testTable__0__0__t11"; - private static final String SEGMENT_2 = "testTable__0__1__t11"; - private static final String CLUSTER_NAME = "dummyCluster"; - private static final String INSTANCE_ID1 = "Server_localhost_1000"; - private static final String INSTANCE_ID2 = "Server_localhost_1001"; - private static final String INSTANCE_ID3 = "Server_localhost_1003"; - public static final String ADMIN_PORT = "1008"; - public static final String HOST_1_NAME = "s1"; - public static final String HOST_2_NAME = "s2"; - public static final String HOST_3_NAME = "s3"; + private static final String CLUSTER_NAME = "testCluster"; + private static final String REALTIME_TABLE_NAME = "testTable_REALTIME"; + private static final String SEGMENT_1 = "testSegment1"; + private static final String SEGMENT_2 = "testSegment2"; + private static final String INSTANCE_ID_1 = "Server_s1_1007"; + private static final String INSTANCE_ID_2 = "Server_s2_1007"; + private static final String INSTANCE_ID_3 = "Server_s3_1007"; + private static final String HOSTNAME_1 = "s1"; + private static final String HOSTNAME_2 = "s2"; + private static final String HOSTNAME_3 = "s3"; + private static final int HELIX_PORT = 1007; + private static final int HTTP_ADMIN_PORT = 1008; + private static final int HTTPS_ADMIN_PORT = 1009; + private HelixManager _helixManager; @BeforeClass - public void initSegmentFetcherFactoryWithPeerServerSegmentFetcher() - throws Exception { - HelixAdmin helixAdmin; - { - ExternalView ev = new ExternalView(TABLE_NAME_WITH_TYPE); - ev.setState(SEGMENT_1, INSTANCE_ID1, "ONLINE"); - ev.setState(SEGMENT_1, INSTANCE_ID2, "OFFLINE"); - ev.setState(SEGMENT_1, INSTANCE_ID3, "ONLINE"); - ev.setState(SEGMENT_2, INSTANCE_ID1, "OFFLINE"); - ev.setState(SEGMENT_2, INSTANCE_ID2, "OFFLINE"); - _helixManager = mock(HelixManager.class); - helixAdmin = mock(HelixAdmin.class); - when(_helixManager.getClusterManagmentTool()).thenReturn(helixAdmin); - when(_helixManager.getClusterName()).thenReturn(CLUSTER_NAME); - when(helixAdmin.getResourceExternalView(CLUSTER_NAME, TABLE_NAME_WITH_TYPE)).thenReturn(ev); - when(helixAdmin.getConfigKeys(any(HelixConfigScope.class))).thenReturn(new ArrayList<>()); - Map instanceConfigMap = new HashMap<>(); - instanceConfigMap.put(CommonConstants.Helix.Instance.ADMIN_PORT_KEY, ADMIN_PORT); - when(helixAdmin.getConfig(any(HelixConfigScope.class), any(List.class))).thenReturn(instanceConfigMap); - InstanceConfig instanceConfig1 = new InstanceConfig(INSTANCE_ID1); - instanceConfig1.setHostName(HOST_1_NAME); - instanceConfig1.setPort("1000"); - when(helixAdmin.getInstanceConfig(any(String.class), eq(INSTANCE_ID1))).thenReturn(instanceConfig1); + public void initSegmentFetcherFactoryWithPeerServerSegmentFetcher() { + ExternalView externalView = new ExternalView(REALTIME_TABLE_NAME); + externalView.setState(SEGMENT_1, INSTANCE_ID_1, "ONLINE"); + externalView.setState(SEGMENT_1, INSTANCE_ID_2, "OFFLINE"); + externalView.setState(SEGMENT_1, INSTANCE_ID_3, "ONLINE"); + externalView.setState(SEGMENT_2, INSTANCE_ID_1, "OFFLINE"); + externalView.setState(SEGMENT_2, INSTANCE_ID_2, "OFFLINE"); - InstanceConfig instanceConfig2 = new InstanceConfig(INSTANCE_ID2); - instanceConfig2.setHostName(HOST_2_NAME); - instanceConfig2.setPort("1000"); - when(helixAdmin.getInstanceConfig(any(String.class), eq(INSTANCE_ID2))).thenReturn(instanceConfig2); + _helixManager = mock(HelixManager.class); + HelixAdmin helixAdmin = mock(HelixAdmin.class); + when(_helixManager.getClusterManagmentTool()).thenReturn(helixAdmin); + when(_helixManager.getClusterName()).thenReturn(CLUSTER_NAME); + when(helixAdmin.getResourceExternalView(CLUSTER_NAME, REALTIME_TABLE_NAME)).thenReturn(externalView); + when(helixAdmin.getInstanceConfig(CLUSTER_NAME, INSTANCE_ID_1)).thenReturn( + getInstanceConfig(INSTANCE_ID_1, HOSTNAME_1)); + when(helixAdmin.getInstanceConfig(CLUSTER_NAME, INSTANCE_ID_2)).thenReturn( + getInstanceConfig(INSTANCE_ID_2, HOSTNAME_2)); + when(helixAdmin.getInstanceConfig(CLUSTER_NAME, INSTANCE_ID_3)).thenReturn( + getInstanceConfig(INSTANCE_ID_3, HOSTNAME_3)); + } - InstanceConfig instanceConfig3 = new InstanceConfig(INSTANCE_ID3); - instanceConfig3.setHostName(HOST_3_NAME); - instanceConfig3.setPort("1000"); - when(helixAdmin.getInstanceConfig(any(String.class), eq(INSTANCE_ID3))).thenReturn(instanceConfig3); - } + private static InstanceConfig getInstanceConfig(String instanceId, String hostName) { + InstanceConfig instanceConfig = new InstanceConfig(instanceId); + instanceConfig.setHostName(hostName); + instanceConfig.setPort(Integer.toString(HELIX_PORT)); + instanceConfig.getRecord().setIntField(Instance.ADMIN_PORT_KEY, HTTP_ADMIN_PORT); + instanceConfig.getRecord().setIntField(Instance.ADMIN_HTTPS_PORT_KEY, HTTPS_ADMIN_PORT); + return instanceConfig; } @Test public void testSegmentFoundSuccessfully() throws Exception { // SEGMENT_1 has only 2 online replicas. - List httpServerURIs = - PeerServerSegmentFinder.getPeerServerURIs(SEGMENT_1, CommonConstants.HTTP_PROTOCOL, _helixManager); - assertEquals(2, httpServerURIs.size()); - httpServerURIs.contains(new URI( - StringUtil.join("/", "http://" + HOST_1_NAME + ":" + ADMIN_PORT, "segments", TABLE_NAME_WITH_TYPE, SEGMENT_1))); - httpServerURIs.contains(new URI( - StringUtil.join("/", "http://" + HOST_3_NAME + ":" + ADMIN_PORT, "segments", TABLE_NAME_WITH_TYPE, SEGMENT_1))); - List httpsServerURIs = - PeerServerSegmentFinder.getPeerServerURIs(SEGMENT_1, CommonConstants.HTTPS_PROTOCOL, _helixManager); - assertEquals(2, httpsServerURIs.size()); - httpServerURIs.contains(new URI(StringUtil - .join("/", "https://" + HOST_1_NAME + ":" + ADMIN_PORT, "segments", TABLE_NAME_WITH_TYPE, SEGMENT_1))); - httpServerURIs.contains(new URI(StringUtil - .join("/", "https://" + HOST_3_NAME + ":" + ADMIN_PORT, "segments", TABLE_NAME_WITH_TYPE, SEGMENT_1))); + List httpServerURIs = PeerServerSegmentFinder.getPeerServerURIs(_helixManager, REALTIME_TABLE_NAME, SEGMENT_1, + CommonConstants.HTTP_PROTOCOL); + assertEquals(httpServerURIs.size(), 2); + assertTrue(httpServerURIs.contains(new URI( + String.format("http://%s:%d/segments/%s/%s", HOSTNAME_1, HTTP_ADMIN_PORT, REALTIME_TABLE_NAME, SEGMENT_1)))); + assertTrue(httpServerURIs.contains(new URI( + String.format("http://%s:%d/segments/%s/%s", HOSTNAME_3, HTTP_ADMIN_PORT, REALTIME_TABLE_NAME, SEGMENT_1)))); + List httpsServerURIs = PeerServerSegmentFinder.getPeerServerURIs(_helixManager, REALTIME_TABLE_NAME, SEGMENT_1, + CommonConstants.HTTPS_PROTOCOL); + assertEquals(httpsServerURIs.size(), 2); + assertTrue(httpsServerURIs.contains(new URI( + String.format("https://%s:%d/segments/%s/%s", HOSTNAME_1, HTTPS_ADMIN_PORT, REALTIME_TABLE_NAME, SEGMENT_1)))); + assertTrue(httpsServerURIs.contains(new URI( + String.format("https://%s:%d/segments/%s/%s", HOSTNAME_3, HTTPS_ADMIN_PORT, REALTIME_TABLE_NAME, SEGMENT_1)))); } @Test - public void testSegmentNotFound() - throws Exception { - Assert.assertEquals(0, - PeerServerSegmentFinder.getPeerServerURIs(SEGMENT_2, CommonConstants.HTTP_PROTOCOL, _helixManager).size()); + public void testSegmentNotFound() { + assertTrue(PeerServerSegmentFinder.getPeerServerURIs(_helixManager, REALTIME_TABLE_NAME, SEGMENT_2, + CommonConstants.HTTP_PROTOCOL).isEmpty()); } } diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/retry/ExponentialBackoffRetryPolicy.java b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/retry/ExponentialBackoffRetryPolicy.java index 6151aab06f96..e5b9b7dc1a52 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/retry/ExponentialBackoffRetryPolicy.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/retry/ExponentialBackoffRetryPolicy.java @@ -39,8 +39,8 @@ public ExponentialBackoffRetryPolicy(int maxNumAttempts, long initialDelayMs, do @Override protected long getDelayMs(int currentAttempt) { - double minDelayMs = _initialDelayMs * Math.pow(_delayScaleFactor, currentAttempt); - double maxDelayMs = minDelayMs * _delayScaleFactor; - return _random.nextLong((long) minDelayMs, (long) maxDelayMs); + long minDelayMs = (long) (_initialDelayMs * Math.pow(_delayScaleFactor, currentAttempt)); + long maxDelayMs = (long) (minDelayMs * _delayScaleFactor); + return minDelayMs < maxDelayMs ? _random.nextLong(minDelayMs, maxDelayMs) : minDelayMs; } } From 3c45469c3962f726b3c12cc8c9bbd2bd15838798 Mon Sep 17 00:00:00 2001 From: Xiang Fu Date: Tue, 16 Apr 2024 12:47:19 +0800 Subject: [PATCH 020/102] Update superset docker build script (#12385) --- .../.superset_docker_image_build_and_push.sh | 2 +- ..._multi_arch_docker_image_build_and_push.sh | 59 +++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) create mode 100755 .github/workflows/scripts/docker/.superset_multi_arch_docker_image_build_and_push.sh diff --git a/.github/workflows/scripts/docker/.superset_docker_image_build_and_push.sh b/.github/workflows/scripts/docker/.superset_docker_image_build_and_push.sh index 700b8e01a1be..a82997e2d842 100755 --- a/.github/workflows/scripts/docker/.superset_docker_image_build_and_push.sh +++ b/.github/workflows/scripts/docker/.superset_docker_image_build_and_push.sh @@ -49,7 +49,7 @@ done cd ${DOCKER_FILE_BASE_DIR} -docker buildx build \ +docker build \ --no-cache \ --platform=${BUILD_PLATFORM} \ --file Dockerfile \ diff --git a/.github/workflows/scripts/docker/.superset_multi_arch_docker_image_build_and_push.sh b/.github/workflows/scripts/docker/.superset_multi_arch_docker_image_build_and_push.sh new file mode 100755 index 000000000000..a82997e2d842 --- /dev/null +++ b/.github/workflows/scripts/docker/.superset_multi_arch_docker_image_build_and_push.sh @@ -0,0 +1,59 @@ +#!/bin/bash -x +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +if [ -z "${DOCKER_IMAGE_NAME}" ]; then + DOCKER_IMAGE_NAME="apachepinot/pinot-superset" +fi +if [ -z "${SUPERSET_IMAGE_TAG}" ]; then + SUPERSET_IMAGE_TAG="latest" +fi +if [ -z "${BUILD_PLATFORM}" ]; then + BUILD_PLATFORM="linux/amd64" +fi + +DATE=`date +%Y%m%d` +docker pull apache/superset:${SUPERSET_IMAGE_TAG} +COMMIT_ID=`docker images apache/superset:${SUPERSET_IMAGE_TAG} --format "{{.ID}}"` + +tags=() +if [ -z "${TAGS}" ]; then + tags=("${COMMIT_ID}-${DATE}") + tags+=("latest") +else + declare -a tags=($(echo ${TAGS} | tr "," " ")) +fi + +DOCKER_BUILD_TAGS="" +for tag in "${tags[@]}" +do + echo "Plan to build and push docker images for: ${DOCKER_IMAGE_NAME}:${tag}" + DOCKER_BUILD_TAGS+=" --tag ${DOCKER_IMAGE_NAME}:${tag} " +done + +cd ${DOCKER_FILE_BASE_DIR} + +docker build \ + --no-cache \ + --platform=${BUILD_PLATFORM} \ + --file Dockerfile \ + --build-arg SUPERSET_IMAGE_TAG=${SUPERSET_IMAGE_TAG} \ + ${DOCKER_BUILD_TAGS} \ + --push \ + . From 1393750622462b8a80dfc0550de3d3723f2bba26 Mon Sep 17 00:00:00 2001 From: Chaitanya Deepthi <45308220+deepthi912@users.noreply.github.com> Date: Tue, 16 Apr 2024 02:39:15 -0400 Subject: [PATCH 021/102] Allow Server throttling just before executing queries on server to allow max CPU and disk utilization (#12930) Co-authored-by: deepthi912 --- .../pinot/server/starter/helix/BaseServerStarter.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/BaseServerStarter.java b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/BaseServerStarter.java index f16b32659165..02c7b81ea5eb 100644 --- a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/BaseServerStarter.java +++ b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/BaseServerStarter.java @@ -578,9 +578,6 @@ public void start() _serverInstance = new ServerInstance(serverConf, _helixManager, accessControlFactory); ServerMetrics serverMetrics = _serverInstance.getServerMetrics(); - // Enable Server level realtime ingestion rate limier - RealtimeConsumptionRateManager.getInstance().createServerRateLimiter(_serverConf, serverMetrics); - InstanceDataManager instanceDataManager = _serverInstance.getInstanceDataManager(); instanceDataManager.setSupplierOfIsServerReadyToServeQueries(() -> _isServerReadyToServeQueries); // initialize the thread accountant for query killing @@ -639,6 +636,9 @@ public void start() preServeQueries(); + // Enable Server level realtime ingestion rate limier + RealtimeConsumptionRateManager.getInstance().createServerRateLimiter(_serverConf, serverMetrics); + // Start the query server after finishing the service status check. If the query server is started before all the // segments are loaded, broker might not have finished processing the callback of routing table update, and start // querying the server pre-maturely. From 9b4ec33d685ef7d357b57095505b76df21e65293 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 16 Apr 2024 10:06:16 -0700 Subject: [PATCH 022/102] Bump org.scala-lang.modules:scala-xml_2.12 from 1.3.0 to 2.3.0 (#12939) --- pinot-connectors/pinot-spark-2-connector/pom.xml | 2 +- pinot-connectors/pinot-spark-common/pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pinot-connectors/pinot-spark-2-connector/pom.xml b/pinot-connectors/pinot-spark-2-connector/pom.xml index c2a0ea3f4f5d..58e307221b71 100644 --- a/pinot-connectors/pinot-spark-2-connector/pom.xml +++ b/pinot-connectors/pinot-spark-2-connector/pom.xml @@ -35,7 +35,7 @@ ${basedir}/../.. 2.4.8 2.8 - 1.3.0 + 2.3.0 3.2.18 org.apache.pinot.\$internal diff --git a/pinot-connectors/pinot-spark-common/pom.xml b/pinot-connectors/pinot-spark-common/pom.xml index 5c8f812cf31b..ec708e5d5c5e 100644 --- a/pinot-connectors/pinot-spark-common/pom.xml +++ b/pinot-connectors/pinot-spark-common/pom.xml @@ -35,7 +35,7 @@ ${basedir}/../.. 0.14.6 2.8 - 1.3.0 + 2.3.0 3.2.18 From b264512c0ca335ae83afe15d004c48010264cfda Mon Sep 17 00:00:00 2001 From: Xiang Fu Date: Wed, 17 Apr 2024 01:06:39 +0800 Subject: [PATCH 023/102] Bump org.codehaus.mojo:buildnumber-maven-plugin from 1.3 to 3.2.0 (#12937) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 0b27acf9478f..bcf5614a39f0 100644 --- a/pom.xml +++ b/pom.xml @@ -2282,7 +2282,7 @@ org.codehaus.mojo buildnumber-maven-plugin - 1.3 + 3.2.0 validate From d4cb93dbda1e268e0c064b1b3c62e8b81599db32 Mon Sep 17 00:00:00 2001 From: Shounak kulkarni Date: Tue, 16 Apr 2024 23:27:40 +0500 Subject: [PATCH 024/102] Fix metric rule pattern regex (#12856) --- .../configs/broker.yml | 68 +++--- .../configs/controller.yml | 80 +++---- .../configs/minion.yml | 10 +- .../configs/pinot.yml | 220 +++++++++--------- .../configs/server.yml | 60 ++--- 5 files changed, 219 insertions(+), 219 deletions(-) diff --git a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/broker.yml b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/broker.yml index ce5e5df7b9b0..5c7a6cc8a900 100644 --- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/broker.yml +++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/broker.yml @@ -1,173 +1,173 @@ rules: -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_authorization_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_documentsScanned_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_entriesScannedInFilter_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_entriesScannedPostFilter_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_freshnessLagMs_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_queries_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_queryExecution_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_queryRouting_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_reduce_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_requestCompilation_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_scatterGather_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_totalServerResponseSize_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_groupBySize_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_noServingHostForSegment_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_healthcheck_$1_$2" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_helix_$1_$2" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_helix_zookeeper_$1_$2" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_nettyConnection_$1_$2" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_unhealthyServers_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_clusterChangeCheck_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_proactiveClusterChangeCheck_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_exceptions_$1_$2" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_routingTableUpdateTime_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_brokerResponsesWithPartialServersResponded_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_brokerResponsesWithTimeouts_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_noServerFoundExceptions_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_brokerResponsesWithProcessingExceptions_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_brokerResponsesWithNumGroupsLimitReached_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_queryQuotaExceeded_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_queryTotalTimeMs_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_serverMissingForRouting_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_deserialization_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_requestConnectionWait_$4" cache: true labels: @@ -207,7 +207,7 @@ rules: table: "$3$5" tableType: "$6" #This is a catch-all pattern for pinot table metrics with offline/realtime suffix that also contain kafka topic -- pattern: "\"?org\\.apache\\.pinot\\.common\\.metrics\"?<>(\\w+)" +- pattern: "\"?org\\.apache\\.pinot\\.common\\.metrics\"?<>(\\w+)" name: "pinot_$1_$2_$9" cache: true labels: diff --git a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml index 44b07d1718ef..a441f714e4a6 100644 --- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml +++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml @@ -1,138 +1,138 @@ rules: -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_$1_$2" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_helix_$1_$2" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_helix_ZookeeperReconnects_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_idealstateZnodeSize_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_idealstateZnodeByteSize_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_replicationFromConfig_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_numberOfReplicas_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_percentOfReplicas_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_percentSegmentsAvailable_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_segmentCount_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_segmentsInErrorState_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_numberSegmentUploadTimeoutExceeded_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_numberTimesScheduleTasksCalled_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_periodicTaskNumTablesProcessed_$1_$2" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_pinotControllerLeader_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_partitionLeader_$1_$2" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_realtimeTableCount_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_offlineTableCount_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_validateion_$4_$5" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_cronSchedulerJobScheduled_$5" cache: true labels: database: "$2" table: "$1$3" taskType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_cronSchedulerJobTriggered_$5" cache: true labels: database: "$2" table: "$1$3" taskType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_cronSchedulerJobSkipped_$5" cache: true labels: database: "$2" table: "$1$3" taskType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_cronSchedulerJobExecutionTimeMs_$5" cache: true labels: database: "$2" table: "$1$3" taskType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_tableRebalanceExecutionTimeMs_$5" cache: true labels: database: "$2" table: "$1$3" result: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_taskStatus_$3" cache: true labels: taskType: "$1" status: "$2" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_timeMsSinceLastMinionTaskMetadataUpdate_$6" cache: true labels: @@ -140,7 +140,7 @@ rules: table: "$1$3" tableType: "$4" taskType: "$5" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_$1_$7" cache: true labels: @@ -148,12 +148,12 @@ rules: table: "$2$4" tableType: "$5" taskType: "$6" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_$1_$3" cache: true labels: taskType: "$2" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_timeMsSinceLastSuccessfulMinionTaskGeneration_$6" cache: true labels: @@ -161,7 +161,7 @@ rules: table: "$1$3" tableType: "$4" taskType: "$5" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_lastMinionTaskGenerationEncountersError_$6" cache: true labels: @@ -169,23 +169,23 @@ rules: table: "$1$3" tableType: "$4" taskType: "$5" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_pinotLeadControllerResourceEnabled_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_offlineTableEstimatedSize_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_tableQuota_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_periodicTaskError_$6" cache: true labels: @@ -193,33 +193,33 @@ rules: table: "$1$3" tableType: "$4" periodicTask: "$5" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_tableStorageQuotaUtilization_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_tableStorageEstMissingSegmentPercent_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_tableTotalSizeOnServer_$5" labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_tableSizePerReplicaOnServer_$5" labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_tableCompressedSize_$5" labels: database: "$2" @@ -251,7 +251,7 @@ rules: table: "$2$4" tableType: "$5" #This is a catch-all pattern for pinot table metrics with offline/realtime suffix that also contain kafka topic -- pattern: "\"?org\\.apache\\.pinot\\.common\\.metrics\"?<>(\\w+)" +- pattern: "\"?org\\.apache\\.pinot\\.common\\.metrics\"?<>(\\w+)" name: "pinot_$1_$2_$9" cache: true labels: diff --git a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/minion.yml b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/minion.yml index 3541cc8f3669..4d21107a1b9f 100644 --- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/minion.yml +++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/minion.yml @@ -1,17 +1,17 @@ rules: -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_minion_version" cache: true labels: version: "$1" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_minion_numberOfTasks_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_minion_$6_$7" cache: true labels: @@ -19,10 +19,10 @@ rules: table: "$1$3" tableType: "$4" taskType: "$5" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_minion_$1_$2" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_minion_$2_$3" cache: true labels: diff --git a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/pinot.yml b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/pinot.yml index 2739fb557d27..b530a79b363b 100644 --- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/pinot.yml +++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/pinot.yml @@ -5,140 +5,140 @@ rules: # Pinot Controller -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_$1_$2" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_helix_$1_$2" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_helix_ZookeeperReconnects_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_idealstateZnodeSize_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_idealstateZnodeByteSize_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_replicationFromConfig_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_numberOfReplicas_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_percentOfReplicas_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_percentSegmentsAvailable_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_segmentCount_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_segmentsInErrorState_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_numberSegmentUploadTimeoutExceeded_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_numberTimesScheduleTasksCalled_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_periodicTaskNumTablesProcessed_$1_$2" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_pinotControllerLeader_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_partitionLeader_$1_$2" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_realtimeTableCount_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_offlineTableCount_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_validateion_$4_$5" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_cronSchedulerJobScheduled_$5" cache: true labels: database: "$2" table: "$1$3" taskType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_cronSchedulerJobTriggered_$5" cache: true labels: database: "$2" table: "$1$3" taskType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_cronSchedulerJobSkipped_$5" cache: true labels: database: "$2" table: "$1$3" taskType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_cronSchedulerJobExecutionTimeMs_$5" cache: true labels: database: "$2" table: "$1$3" taskType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_tableRebalanceExecutionTimeMs_$5" cache: true labels: database: "$2" table: "$1$3" result: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_taskStatus_$3" cache: true labels: taskType: "$1" status: "$2" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_timeMsSinceLastMinionTaskMetadataUpdate_$6" cache: true labels: @@ -146,7 +146,7 @@ rules: table: "$1$3" tableType: "$4" taskType: "$5" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_$1_$7" cache: true labels: @@ -154,12 +154,12 @@ rules: table: "$2$4" tableType: "$5" taskType: "$6" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_$1_$3" cache: true labels: taskType: "$2" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_timeMsSinceLastSuccessfulMinionTaskGeneration_$6" cache: true labels: @@ -167,7 +167,7 @@ rules: table: "$1$3" tableType: "$4" taskType: "$5" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_lastMinionTaskGenerationEncountersError_$6" cache: true labels: @@ -175,47 +175,47 @@ rules: table: "$1$3" tableType: "$4" taskType: "$5" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_pinotLeadControllerResourceEnabled_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_offlineTableEstimatedSize_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_largestSegmentSizeOnServer_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_tableTotalSizeOnServer_$5" labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_tableSizePerReplicaOnServer_$5" labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_tableCompressedSize_$5" labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_tableQuota_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_periodicTaskError_$6" cache: true labels: @@ -223,14 +223,14 @@ rules: table: "$1$3" tableType: "$4" periodicTask: "$5" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_tableStorageQuotaUtilization_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_tableStorageEstMissingSegmentPercent_$5" cache: true labels: @@ -239,187 +239,187 @@ rules: tableType: "$4" # Pinot Broker -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_authorization_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_documentsScanned_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_entriesScannedInFilter_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_entriesScannedPostFilter_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_freshnessLagMs_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_queries_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_queryExecution_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_queryRouting_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_queryTotalTimeMs_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_reduce_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_requestCompilation_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_scatterGather_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_totalServerResponseSize_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_groupBySize_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_noServingHostForSegment_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_healthcheck_$1_$2" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_helix_$1_$2" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_helix_zookeeper_$1_$2" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_nettyConnection_$1_$2" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_unhealthyServers_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_clusterChangeCheck_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_proactiveClusterChangeCheck_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_exceptions_$1_$2" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_routingTableUpdateTime_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_brokerResponsesWithPartialServersResponded_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_brokerResponsesWithProcessingExceptions_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_brokerResponsesWithNumGroupsLimitReached_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_queryQuotaExceeded_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_serverMissingForRouting_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_deserialization_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_requestConnectionWait_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_brokerResponsesWithTimeouts_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_noServerFoundExceptions_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_brokerResponsesWithProcessingExceptions_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_broker_queryTotalTimeMs_$4" cache: true labels: @@ -427,28 +427,28 @@ rules: table: "$1$3" # Pinot Server -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_documentCount_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_segmentCount_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_$5_$6" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_$7_$8" cache: true labels: @@ -457,13 +457,13 @@ rules: tableType: "$4" topic: "$5" partition: "$6" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_helix_connected_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_helix_zookeeperReconnects_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_highestKafkaOffsetConsumed_$7" cache: true labels: @@ -472,7 +472,7 @@ rules: tableType: "$4" topic: "$5" partition: "$6" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_highestStreamOffsetConsumed_$7" cache: true labels: @@ -481,7 +481,7 @@ rules: tableType: "$4" topic: "$5" partition: "$6" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_lastRealtimeSegment$1Seconds_$8" cache: true labels: @@ -490,10 +490,10 @@ rules: tableType: "$5" topic: "$6" partition: "$7" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_llcControllerResponse_$1_$2" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_llcPartitionConsuming_$7" cache: true labels: @@ -502,65 +502,65 @@ rules: tableType: "$4" topic: "$5" partition: "$6" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_llcSimultaneousSegmentBuilds_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_memory_$1_$2" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_queries_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_realtime_consumptionExceptions_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_realtime_offheapMemoryUsed_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_realtime_offsetCommits_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_realtime_rowsConsumed_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_realtime_rowsErrored_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_realtime_exceptions_$1_$2" cache: true -- pattern: "\"org.apache.pinot.transport.netty.NettyTCPServer_(\\w+)_\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.transport\\.netty\\.NettyTCPServer_(\\w+)_\"<>(\\w+)" name: "pinot_server_netty_tcp_$2_$3" cache: true labels: id: "$1" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_nettyConnection_$1_$2" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_realtimeSegmentNumPartitions_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_resizeTimeMs_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_numResizes_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_upsertPrimaryKeysCount_$6" cache: true labels: @@ -568,7 +568,7 @@ rules: table: "$1$3" tableType: "$4" partition: "$5" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_realtimeIngestionDelayMs_$6" cache: true labels: @@ -576,7 +576,7 @@ rules: table: "$1$3" tableType: "$4" partition: "$5" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_upsertValidDocSnapshotCount_$6" cache: true labels: @@ -584,7 +584,7 @@ rules: table: "$1$3" tableType: "$4" partition: "$5" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_upsertPrimaryKeysInSnapshotCount_$6" cache: true labels: @@ -593,19 +593,19 @@ rules: tableType: "$4" partition: "$5" #grpc related metrics -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_grpc$1_$2" cache: true # Pinot Minions -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_minion_numberOfTasks_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_minion_$6_$7" cache: true labels: @@ -613,10 +613,10 @@ rules: table: "$1$3" tableType: "$4" taskType: "$5" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_minion_$1_$2" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_minion_$2_$3" cache: true labels: @@ -652,7 +652,7 @@ rules: tableType: "$5" # This is a catch-all pattern for pinot table metrics with offline/realtime suffix with topic and partition # Patterns after this line may be skipped. -- pattern: "\"?org\\.apache\\.pinot\\.common\\.metrics\"?<>(\\w+)" +- pattern: "\"?org\\.apache\\.pinot\\.common\\.metrics\"?<>(\\w+)" name: "pinot_$1_$2_$9" cache: true labels: diff --git a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/server.yml b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/server.yml index f59c707d68e1..1aae226de3e9 100644 --- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/server.yml +++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/server.yml @@ -1,32 +1,32 @@ rules: -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_documentCount_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_segmentCount_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_$5_$6" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_helix_connected_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_helix_zookeeperReconnects_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_highestKafkaOffsetConsumed_$7" cache: true labels: @@ -35,7 +35,7 @@ rules: tableType: "$4" topic: "$5" partition: "$6" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_highestStreamOffsetConsumed_$7" cache: true labels: @@ -44,7 +44,7 @@ rules: tableType: "$4" topic: "$5" partition: "$6" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_lastRealtimeSegment$1Seconds_$8" cache: true labels: @@ -53,10 +53,10 @@ rules: tableType: "$5" topic: "$6" partition: "$7" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_llcControllerResponse_$1_$2" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_llcPartitionConsuming_$7" cache: true labels: @@ -65,7 +65,7 @@ rules: tableType: "$4" topic: "$5" partition: "$6" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_realtimeIngestionDelayMs_$6" cache: true labels: @@ -73,19 +73,19 @@ rules: table: "$1$3" tableType: "$4" partition: "$5" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_llcSimultaneousSegmentBuilds_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_memory_$1_$2" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_queries_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_realtime_consumptionExceptions_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_$7_$8" cache: true labels: @@ -94,50 +94,50 @@ rules: tableType: "$4" topic: "$5" partition: "$6" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_realtime_offheapMemoryUsed_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_realtime_offsetCommits_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_realtime_rowsConsumed_$1" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_realtime_exceptions_$1_$2" cache: true -- pattern: "\"org.apache.pinot.transport.netty.NettyTCPServer_(\\w+)_\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.transport\\.netty\\.NettyTCPServer_(\\w+)_\"<>(\\w+)" name: "pinot_server_netty_tcp_$2_$3" cache: true labels: id: "$1" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_nettyConnection_$1_$2" cache: true -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_realtimeSegmentNumPartitions_$4" cache: true labels: database: "$2" table: "$1$3" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_numResizes_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_resizeTimeMs_$5" cache: true labels: database: "$2" table: "$1$3" tableType: "$4" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_upsertPrimaryKeysCount_$6" cache: true labels: @@ -150,7 +150,7 @@ rules: cache: true labels: version: "$2" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_upsertValidDocSnapshotCount_$6" cache: true labels: @@ -158,7 +158,7 @@ rules: table: "$1$3" tableType: "$4" partition: "$5" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_upsertPrimaryKeysInSnapshotCount_$6" cache: true labels: @@ -167,7 +167,7 @@ rules: tableType: "$4" partition: "$5" #grpc related metrics -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_server_grpc$1_$2" cache: true @@ -192,7 +192,7 @@ rules: table: "$2$4" tableType: "$5" #when there is partition and topic in the metric -- pattern: "\"?org\\.apache\\.pinot\\.common\\.metrics\"?<>(\\w+)" +- pattern: "\"?org\\.apache\\.pinot\\.common\\.metrics\"?<>(\\w+)" name: "pinot_$1_$2_$9" cache: true labels: From 67cb52c04b5a2e81bfe26f4878e6782422f8f1c1 Mon Sep 17 00:00:00 2001 From: "Xiaotian (Jackie) Jiang" <17555551+Jackie-Jiang@users.noreply.github.com> Date: Tue, 16 Apr 2024 14:09:24 -0700 Subject: [PATCH 025/102] Refine SegmentFetcherFactory (#12936) --- .../utils/fetcher/SegmentFetcherFactory.java | 115 ++++++------------ .../data/manager/BaseTableDataManager.java | 29 ++--- .../manager/BaseTableDataManagerTest.java | 21 ---- 3 files changed, 51 insertions(+), 114 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/utils/fetcher/SegmentFetcherFactory.java b/pinot-common/src/main/java/org/apache/pinot/common/utils/fetcher/SegmentFetcherFactory.java index 3c3f66248cca..543db8c4031d 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/utils/fetcher/SegmentFetcherFactory.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/utils/fetcher/SegmentFetcherFactory.java @@ -18,61 +18,45 @@ */ package org.apache.pinot.common.utils.fetcher; -import com.google.common.base.Preconditions; import java.io.File; import java.net.URI; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Random; import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Supplier; +import javax.annotation.Nullable; import org.apache.pinot.common.auth.AuthConfig; import org.apache.pinot.common.auth.AuthProviderUtils; import org.apache.pinot.spi.crypt.PinotCrypter; import org.apache.pinot.spi.crypt.PinotCrypterFactory; import org.apache.pinot.spi.env.PinotConfiguration; import org.apache.pinot.spi.utils.CommonConstants; -import org.checkerframework.checker.nullness.qual.NonNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class SegmentFetcherFactory { - private final static SegmentFetcherFactory INSTANCE = new SegmentFetcherFactory(); - - static final String SEGMENT_FETCHER_CLASS_KEY_SUFFIX = ".class"; - private static final String PROTOCOLS_KEY = "protocols"; - private static final String ENCODED_SUFFIX = ".enc"; - private static final String AUTH_KEY = CommonConstants.KEY_OF_AUTH; - - private static final Logger LOGGER = LoggerFactory.getLogger(SegmentFetcherFactory.class); - private static final Random RANDOM = new Random(); - - private final Map _segmentFetcherMap = new HashMap<>(); - private final SegmentFetcher _httpSegmentFetcher = new HttpSegmentFetcher(); - private final SegmentFetcher _pinotFSSegmentFetcher = new PinotFSSegmentFetcher(); - private SegmentFetcherFactory() { - // left blank } - public static SegmentFetcherFactory getInstance() { - return INSTANCE; - } + public static final String SEGMENT_FETCHER_CLASS_KEY_SUFFIX = ".class"; + public static final String PROTOCOLS_KEY = "protocols"; + public static final String ENCODED_SUFFIX = ".enc"; + + private static final Logger LOGGER = LoggerFactory.getLogger(SegmentFetcherFactory.class); + private static final Map SEGMENT_FETCHER_MAP = new HashMap<>(); + private static final SegmentFetcher HTTP_SEGMENT_FETCHER = new HttpSegmentFetcher(); + private static final SegmentFetcher PINOT_FS_SEGMENT_FETCHER = new PinotFSSegmentFetcher(); /** * Initializes the segment fetcher factory. This method should only be called once. */ public static void init(PinotConfiguration config) throws Exception { - getInstance().initInternal(config); - } - - private void initInternal(PinotConfiguration config) - throws Exception { - _httpSegmentFetcher.init(config); // directly, without sub-namespace - _pinotFSSegmentFetcher.init(config); // directly, without sub-namespace + HTTP_SEGMENT_FETCHER.init(config); // directly, without sub-namespace + PINOT_FS_SEGMENT_FETCHER.init(config); // directly, without sub-namespace List protocols = config.getProperty(PROTOCOLS_KEY, Collections.emptyList()); for (String protocol : protocols) { @@ -93,22 +77,22 @@ private void initInternal(PinotConfiguration config) } } else { LOGGER.info("Creating segment fetcher for protocol: {} with class: {}", protocol, segmentFetcherClassName); - segmentFetcher = (SegmentFetcher) Class.forName(segmentFetcherClassName).newInstance(); + segmentFetcher = (SegmentFetcher) Class.forName(segmentFetcherClassName).getConstructor().newInstance(); } - AuthConfig authConfig = AuthProviderUtils.extractAuthConfig(config, AUTH_KEY); - PinotConfiguration subConfig = config.subset(protocol); - AuthConfig subAuthConfig = AuthProviderUtils.extractAuthConfig(subConfig, AUTH_KEY); + Map subConfigMap = subConfig.toMap(); - Map subConfigMap = config.subset(protocol).toMap(); + // Put global auth properties into sub-config if sub-config does not have auth properties + AuthConfig authConfig = AuthProviderUtils.extractAuthConfig(config, CommonConstants.KEY_OF_AUTH); + AuthConfig subAuthConfig = AuthProviderUtils.extractAuthConfig(subConfig, CommonConstants.KEY_OF_AUTH); if (subAuthConfig.getProperties().isEmpty() && !authConfig.getProperties().isEmpty()) { - authConfig.getProperties().forEach((key, value) -> subConfigMap.put(AUTH_KEY + "." + key, value)); + authConfig.getProperties() + .forEach((key, value) -> subConfigMap.put(CommonConstants.KEY_OF_AUTH + "." + key, value)); } segmentFetcher.init(new PinotConfiguration(subConfigMap)); - - _segmentFetcherMap.put(protocol, segmentFetcher); + SEGMENT_FETCHER_MAP.put(protocol, segmentFetcher); } } @@ -117,11 +101,7 @@ private void initInternal(PinotConfiguration config) * ({@link HttpSegmentFetcher} for "http" and "https", {@link PinotFSSegmentFetcher} for other protocols). */ public static SegmentFetcher getSegmentFetcher(String protocol) { - return getInstance().getSegmentFetcherInternal(protocol); - } - - private SegmentFetcher getSegmentFetcherInternal(String protocol) { - SegmentFetcher segmentFetcher = _segmentFetcherMap.get(protocol); + SegmentFetcher segmentFetcher = SEGMENT_FETCHER_MAP.get(protocol); if (segmentFetcher != null) { return segmentFetcher; } else { @@ -129,9 +109,9 @@ private SegmentFetcher getSegmentFetcherInternal(String protocol) { switch (protocol) { case CommonConstants.HTTP_PROTOCOL: case CommonConstants.HTTPS_PROTOCOL: - return _httpSegmentFetcher; + return HTTP_SEGMENT_FETCHER; default: - return _pinotFSSegmentFetcher; + return PINOT_FS_SEGMENT_FETCHER; } } } @@ -141,7 +121,7 @@ private SegmentFetcher getSegmentFetcherInternal(String protocol) { */ public static void fetchSegmentToLocal(URI uri, File dest) throws Exception { - getInstance().fetchSegmentToLocalInternal(uri, dest); + getSegmentFetcher(uri.getScheme()).fetchSegmentToLocal(uri, dest); } /** @@ -149,13 +129,7 @@ public static void fetchSegmentToLocal(URI uri, File dest) */ public static void fetchSegmentToLocal(String uri, File dest) throws Exception { - getInstance().fetchSegmentToLocalInternal(new URI(uri), dest); - } - - private void fetchSegmentToLocalInternal(URI uri, File dest) - throws Exception { - // caller untars - getSegmentFetcher(uri.getScheme()).fetchSegmentToLocal(uri, dest); + fetchSegmentToLocal(new URI(uri), dest); } /** @@ -167,17 +141,17 @@ private void fetchSegmentToLocalInternal(URI uri, File dest) * @return the untared directory * @throws Exception */ - public static File fetchAndStreamUntarToLocal(String uri, File tempRootDir, - long maxStreamRateInByte, AtomicInteger attempts) + public static File fetchAndStreamUntarToLocal(URI uri, File tempRootDir, long maxStreamRateInByte, + AtomicInteger attempts) throws Exception { - return getInstance().fetchAndStreamUntarToLocalInternal(new URI(uri), tempRootDir, maxStreamRateInByte, attempts); + return getSegmentFetcher(uri.getScheme()).fetchUntarSegmentToLocalStreamed(uri, tempRootDir, maxStreamRateInByte, + attempts); } - private File fetchAndStreamUntarToLocalInternal(URI uri, File tempRootDir, - long maxStreamRateInByte, AtomicInteger attempts) + public static File fetchAndStreamUntarToLocal(String uri, File tempRootDir, long maxStreamRateInByte, + AtomicInteger attempts) throws Exception { - return getSegmentFetcher(uri.getScheme()).fetchUntarSegmentToLocalStreamed(uri, tempRootDir, maxStreamRateInByte, - attempts); + return fetchAndStreamUntarToLocal(new URI(uri), tempRootDir, maxStreamRateInByte, attempts); } /** @@ -185,18 +159,7 @@ private File fetchAndStreamUntarToLocalInternal(URI uri, File tempRootDir, * @param uri remote segment location * @param dest local file */ - public static void fetchAndDecryptSegmentToLocal(String uri, File dest, String crypterName) - throws Exception { - getInstance().fetchAndDecryptSegmentToLocalInternal(uri, dest, crypterName); - } - - // uris have equal weight to be selected for segment download - public static void fetchAndDecryptSegmentToLocal(List uris, File dest, String crypterName) - throws Exception { - getInstance().fetchAndDecryptSegmentToLocalInternal(uris, dest, crypterName); - } - - private void fetchAndDecryptSegmentToLocalInternal(String uri, File dest, String crypterName) + public static void fetchAndDecryptSegmentToLocal(String uri, File dest, @Nullable String crypterName) throws Exception { if (crypterName == null) { fetchSegmentToLocal(uri, dest); @@ -211,16 +174,16 @@ private void fetchAndDecryptSegmentToLocalInternal(String uri, File dest, String } } - private void fetchAndDecryptSegmentToLocalInternal(@NonNull List uris, File dest, String crypterName) - throws Exception { - Preconditions.checkArgument(!uris.isEmpty(), "empty uris passed into the fetchAndDecryptSegmentToLocalInternal"); - URI uri = uris.get(RANDOM.nextInt(uris.size())); + public static void fetchAndDecryptSegmentToLocal(String segmentName, String scheme, Supplier> uriSupplier, + File dest, @Nullable String crypterName) + throws Exception { + SegmentFetcher segmentFetcher = getSegmentFetcher(scheme); if (crypterName == null) { - fetchSegmentToLocal(uri, dest); + segmentFetcher.fetchSegmentToLocal(segmentName, uriSupplier, dest); } else { // download File tempDownloadedFile = new File(dest.getPath() + ENCODED_SUFFIX); - fetchSegmentToLocal(uri, tempDownloadedFile); + segmentFetcher.fetchSegmentToLocal(segmentName, uriSupplier, tempDownloadedFile); // decrypt PinotCrypter crypter = PinotCrypterFactory.create(crypterName); diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/BaseTableDataManager.java b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/BaseTableDataManager.java index 1237db547a08..73af1ed3a051 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/BaseTableDataManager.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/BaseTableDataManager.java @@ -693,27 +693,22 @@ File downloadAndDecrypt(String segmentName, SegmentZKMetadata zkMetadata, File t } } - // not thread safe. Caller should invoke it with safe concurrency control. protected void downloadFromPeersWithoutStreaming(String segmentName, SegmentZKMetadata zkMetadata, File destTarFile) throws Exception { - Preconditions.checkState(_peerDownloadScheme != null, "Download peers require non null peer download scheme"); - List peerSegmentURIs = - PeerServerSegmentFinder.getPeerServerURIs(_helixManager, _tableNameWithType, segmentName, _peerDownloadScheme); - if (peerSegmentURIs.isEmpty()) { - String msg = String.format("segment %s doesn't have any peers", segmentName); - LOGGER.warn(msg); - // HelixStateTransitionHandler would catch the runtime exception and mark the segment state as Error - throw new RuntimeException(msg); - } + Preconditions.checkState(_peerDownloadScheme != null, "Peer download is not enabled for table: %s", + _tableNameWithType); try { - // Next download the segment from a randomly chosen server using configured scheme. - SegmentFetcherFactory.fetchAndDecryptSegmentToLocal(peerSegmentURIs, destTarFile, zkMetadata.getCrypterName()); - LOGGER.info("Fetched segment {} from peers: {} to: {} of size: {}", segmentName, peerSegmentURIs, destTarFile, + SegmentFetcherFactory.fetchAndDecryptSegmentToLocal(segmentName, _peerDownloadScheme, () -> { + List peerServerURIs = + PeerServerSegmentFinder.getPeerServerURIs(_helixManager, _tableNameWithType, segmentName, + _peerDownloadScheme); + Collections.shuffle(peerServerURIs); + return peerServerURIs; + }, destTarFile, zkMetadata.getCrypterName()); + _logger.info("Downloaded tarred segment: {} from peers to: {}, file length: {}", segmentName, destTarFile, destTarFile.length()); - } catch (AttemptsExceededException e) { - LOGGER.error("Attempts exceeded when downloading segment: {} for table: {} from peers {} to: {}", segmentName, - _tableNameWithType, peerSegmentURIs, destTarFile); - _serverMetrics.addMeteredTableValue(_tableNameWithType, ServerMeter.SEGMENT_DOWNLOAD_FROM_PEERS_FAILURES, 1L); + } catch (Exception e) { + _serverMetrics.addMeteredTableValue(_tableNameWithType, ServerMeter.SEGMENT_DOWNLOAD_FROM_PEERS_FAILURES, 1); throw e; } } diff --git a/pinot-core/src/test/java/org/apache/pinot/core/data/manager/BaseTableDataManagerTest.java b/pinot-core/src/test/java/org/apache/pinot/core/data/manager/BaseTableDataManagerTest.java index 261fe0f23885..ace744c99966 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/data/manager/BaseTableDataManagerTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/data/manager/BaseTableDataManagerTest.java @@ -38,7 +38,6 @@ import org.apache.pinot.common.utils.fetcher.BaseSegmentFetcher; import org.apache.pinot.common.utils.fetcher.SegmentFetcherFactory; import org.apache.pinot.core.data.manager.offline.OfflineTableDataManager; -import org.apache.pinot.core.util.PeerServerSegmentFinder; import org.apache.pinot.segment.local.segment.creator.impl.SegmentIndexCreationDriverImpl; import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig; import org.apache.pinot.segment.local.segment.readers.GenericRowRecordReader; @@ -647,26 +646,6 @@ public void testDownloadAndDecryptPeerDownload() verify(tmgr, times(1)).downloadFromPeersWithoutStreaming("seg01", zkmd, destFile); } - // happy case: download from peers - @Test - public void testDownloadFromPeersWithoutStreaming() - throws Exception { - URI uri = mockRemoteCopy(); - InstanceDataManagerConfig config = createDefaultInstanceDataManagerConfig(); - when(config.getSegmentPeerDownloadScheme()).thenReturn("http"); - HelixManager helixManager = mock(HelixManager.class); - BaseTableDataManager tmgr = createTableManager(config, helixManager); - File tempRootDir = tmgr.getTmpSegmentDataDir("test-download-peer-without-streaming"); - File destFile = new File(tempRootDir, "seg01" + TarGzCompressionUtils.TAR_GZ_FILE_EXTENSION); - try (MockedStatic mockPeerSegFinder = mockStatic(PeerServerSegmentFinder.class)) { - mockPeerSegFinder.when( - () -> PeerServerSegmentFinder.getPeerServerURIs(helixManager, TABLE_NAME_WITH_TYPE, "seg01", - CommonConstants.HTTP_PROTOCOL)).thenReturn(List.of(uri)); - tmgr.downloadFromPeersWithoutStreaming("seg01", mock(SegmentZKMetadata.class), destFile); - } - assertEquals(FileUtils.readFileToString(destFile), "this is from somewhere remote"); - } - @Test public void testUntarAndMoveSegment() throws IOException { From 1d807df40160ec8525b5a33847d67a61fef2c54e Mon Sep 17 00:00:00 2001 From: Jialiang Li Date: Tue, 16 Apr 2024 17:03:39 -0700 Subject: [PATCH 026/102] Add validation check for forward index disabled if it's a REALTIME table (#12838) --- .../segment/local/utils/TableConfigUtils.java | 20 ++++++++++------ .../local/utils/TableConfigUtilsTest.java | 23 +++++++++++++++++-- 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java index 14c4040a600e..6729f1b027c6 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java @@ -173,7 +173,7 @@ public static void validate(TableConfig tableConfig, @Nullable Schema schema, @N } validateTierConfigList(tableConfig.getTierConfigsList()); validateIndexingConfig(tableConfig.getIndexingConfig(), schema); - validateFieldConfigList(tableConfig.getFieldConfigList(), tableConfig.getIndexingConfig(), schema); + validateFieldConfigList(tableConfig, schema); validateInstancePartitionsTypeMapConfig(tableConfig); validatePartitionedReplicaGroupInstance(tableConfig); if (!skipTypes.contains(ValidationType.UPSERT)) { @@ -1209,8 +1209,10 @@ private static void validateIndexingConfig(IndexingConfig indexingConfig, @Nulla * Additional checks for TEXT and FST index types * Validates index compatibility for forward index disabled columns */ - private static void validateFieldConfigList(@Nullable List fieldConfigList, - IndexingConfig indexingConfig, @Nullable Schema schema) { + private static void validateFieldConfigList(TableConfig tableConfig, @Nullable Schema schema) { + List fieldConfigList = tableConfig.getFieldConfigList(); + IndexingConfig indexingConfig = tableConfig.getIndexingConfig(); + TableType tableType = tableConfig.getTableType(); if (fieldConfigList == null) { return; } @@ -1254,7 +1256,7 @@ private static void validateFieldConfigList(@Nullable List fieldCon "Column: %s defined in field config list must be a valid column defined in the schema", columnName); // Validate the forward index disabled compatibility with other indexes if enabled for this column - validateForwardIndexDisabledIndexCompatibility(columnName, fieldConfig, indexingConfig, schema); + validateForwardIndexDisabledIndexCompatibility(columnName, fieldConfig, indexingConfig, schema, tableType); if (CollectionUtils.isNotEmpty(fieldConfig.getIndexTypes())) { for (FieldConfig.IndexType indexType : fieldConfig.getIndexTypes()) { @@ -1300,7 +1302,7 @@ private static void validateFieldConfigList(@Nullable List fieldCon * back or generate a new index for existing segments is to either refresh or back-fill the segments. */ private static void validateForwardIndexDisabledIndexCompatibility(String columnName, FieldConfig fieldConfig, - IndexingConfig indexingConfig, Schema schema) { + IndexingConfig indexingConfig, Schema schema, TableType tableType) { Map fieldConfigProperties = fieldConfig.getProperties(); if (fieldConfigProperties == null) { return; @@ -1313,16 +1315,20 @@ private static void validateForwardIndexDisabledIndexCompatibility(String column return; } + // For tables with columnMajorSegmentBuilderEnabled being true, the forward index should not be disabled. + Preconditions.checkState(tableType != TableType.REALTIME, + String.format("Cannot disable forward index for column %s, as the table type is REALTIME.", columnName)); + FieldSpec fieldSpec = schema.getFieldSpecFor(columnName); // Check for the range index since the index itself relies on the existence of the forward index to work. if (indexingConfig.getRangeIndexColumns() != null && indexingConfig.getRangeIndexColumns().contains(columnName)) { Preconditions.checkState(fieldSpec.isSingleValueField(), String.format("Feature not supported for multi-value " + "columns with range index. Cannot disable forward index for column %s. Disable range index on this " - + "column to use this feature", columnName)); + + "column to use this feature.", columnName)); Preconditions.checkState(indexingConfig.getRangeIndexVersion() == BitSlicedRangeIndexCreator.VERSION, String.format("Feature not supported for single-value columns with range index version < 2. Cannot disable " + "forward index for column %s. Either disable range index or create range index with" - + " version >= 2 to use this feature", columnName)); + + " version >= 2 to use this feature.", columnName)); } Preconditions.checkState(!indexingConfig.isOptimizeDictionaryForMetrics() && !indexingConfig.isOptimizeDictionary(), diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java index 8f695f2b768a..6800f895756e 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java @@ -989,6 +989,7 @@ public void testTableName() { @Test public void testValidateFieldConfig() { Schema schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME) + .addDateTime(TIME_COLUMN, FieldSpec.DataType.LONG, "1:HOURS:EPOCH", "1:HOURS") .addSingleValueDimension("myCol1", FieldSpec.DataType.STRING) .addMultiValueDimension("myCol2", FieldSpec.DataType.INT) .addSingleValueDimension("intCol", FieldSpec.DataType.INT).build(); @@ -1190,7 +1191,7 @@ public void testValidateFieldConfig() { Assert.fail("Should fail for MV myCol2 with forward index disabled but has range and inverted index"); } catch (Exception e) { Assert.assertEquals(e.getMessage(), "Feature not supported for multi-value columns with range index. " - + "Cannot disable forward index for column myCol2. Disable range index on this column to use this feature"); + + "Cannot disable forward index for column myCol2. Disable range index on this column to use this feature."); } tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME) @@ -1210,7 +1211,7 @@ public void testValidateFieldConfig() { } catch (Exception e) { Assert.assertEquals(e.getMessage(), "Feature not supported for single-value columns with range index version " + "< 2. Cannot disable forward index for column myCol1. Either disable range index or create range index " - + "with version >= 2 to use this feature"); + + "with version >= 2 to use this feature."); } tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME) @@ -1277,6 +1278,24 @@ public void testValidateFieldConfig() { } catch (Exception e) { Assert.fail("Range index with forward index disabled no dictionary column is allowed"); } + + // Disabling forward index for realtime table will make the validation failed. + Map streamConfigs = getStreamConfigs(); + tableConfig = new TableConfigBuilder(TableType.REALTIME).setTableName(TABLE_NAME).setTimeColumnName(TIME_COLUMN) + .setNoDictionaryColumns(Arrays.asList("intCol")).setStreamConfigs(streamConfigs).build(); + try { + // Enable forward index disabled flag for a column with inverted index index and disable dictionary + Map fieldConfigProperties = new HashMap<>(); + fieldConfigProperties.put(FieldConfig.FORWARD_INDEX_DISABLED, Boolean.TRUE.toString()); + FieldConfig fieldConfig = + new FieldConfig("intCol", FieldConfig.EncodingType.RAW, FieldConfig.IndexType.INVERTED, null, null, null, + fieldConfigProperties); + tableConfig.setFieldConfigList(Arrays.asList(fieldConfig)); + TableConfigUtils.validate(tableConfig, schema); + } catch (Exception e) { + Assert.assertEquals(e.getMessage(), + "Cannot disable forward index for column intCol, as the table type is REALTIME."); + } } @Test From 7dbc3459299a18772baa27c53c3c36f0a6edf60b Mon Sep 17 00:00:00 2001 From: "Xiaotian (Jackie) Jiang" <17555551+Jackie-Jiang@users.noreply.github.com> Date: Tue, 16 Apr 2024 23:13:40 -0700 Subject: [PATCH 027/102] Move PinotRelExchangeType back to the original package to fix backward compatibility (#12944) --- .../calcite/rel/logical/PinotRelExchangeType.java | 8 ++++++-- .../pinot/calcite/rel/logical/PinotLogicalExchange.java | 1 + .../calcite/rel/logical/PinotLogicalSortExchange.java | 1 + .../rel/rules/PinotJoinToDynamicBroadcastRule.java | 2 +- .../query/planner/logical/PinotLogicalQueryPlanner.java | 2 +- .../pinot/query/planner/logical/PlanFragmenter.java | 2 +- .../query/planner/logical/RelToPlanNodeConverter.java | 2 +- .../pinot/query/planner/logical/SubPlanFragmenter.java | 2 +- .../apache/pinot/query/planner/plannode/ExchangeNode.java | 2 +- .../pinot/query/planner/plannode/MailboxReceiveNode.java | 2 +- .../pinot/query/planner/plannode/MailboxSendNode.java | 2 +- .../runtime/plan/pipeline/PipelineBreakerVisitor.java | 2 +- .../plan/pipeline/PipelineBreakerExecutorTest.java | 2 +- 13 files changed, 18 insertions(+), 12 deletions(-) rename pinot-query-planner/src/main/java/org/apache/{pinot => }/calcite/rel/logical/PinotRelExchangeType.java (88%) diff --git a/pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/logical/PinotRelExchangeType.java b/pinot-query-planner/src/main/java/org/apache/calcite/rel/logical/PinotRelExchangeType.java similarity index 88% rename from pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/logical/PinotRelExchangeType.java rename to pinot-query-planner/src/main/java/org/apache/calcite/rel/logical/PinotRelExchangeType.java index 49d5cff72839..d49a318ea4eb 100644 --- a/pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/logical/PinotRelExchangeType.java +++ b/pinot-query-planner/src/main/java/org/apache/calcite/rel/logical/PinotRelExchangeType.java @@ -16,9 +16,13 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.pinot.calcite.rel.logical; +package org.apache.calcite.rel.logical; -/** Type of exchange. */ +/** + * Type of exchange. + * TODO: Move this class to org.apache.calcite.rel.logical package after releasing 1.2.0. With release 1.1.0, + * ProtoSerializationUtils cannot handle class movement. + */ public enum PinotRelExchangeType { /** diff --git a/pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/logical/PinotLogicalExchange.java b/pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/logical/PinotLogicalExchange.java index 375a00500b78..fe54d57e5049 100644 --- a/pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/logical/PinotLogicalExchange.java +++ b/pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/logical/PinotLogicalExchange.java @@ -27,6 +27,7 @@ import org.apache.calcite.rel.RelShuttle; import org.apache.calcite.rel.RelWriter; import org.apache.calcite.rel.core.Exchange; +import org.apache.calcite.rel.logical.PinotRelExchangeType; /** diff --git a/pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/logical/PinotLogicalSortExchange.java b/pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/logical/PinotLogicalSortExchange.java index 141b20d422f7..2d2e159f55bf 100644 --- a/pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/logical/PinotLogicalSortExchange.java +++ b/pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/logical/PinotLogicalSortExchange.java @@ -29,6 +29,7 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelWriter; import org.apache.calcite.rel.core.SortExchange; +import org.apache.calcite.rel.logical.PinotRelExchangeType; /** diff --git a/pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/rules/PinotJoinToDynamicBroadcastRule.java b/pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/rules/PinotJoinToDynamicBroadcastRule.java index dd1cff07cfca..5398e58b77f7 100644 --- a/pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/rules/PinotJoinToDynamicBroadcastRule.java +++ b/pinot-query-planner/src/main/java/org/apache/pinot/calcite/rel/rules/PinotJoinToDynamicBroadcastRule.java @@ -31,11 +31,11 @@ import org.apache.calcite.rel.core.JoinInfo; import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.logical.LogicalJoin; +import org.apache.calcite.rel.logical.PinotRelExchangeType; import org.apache.calcite.tools.RelBuilderFactory; import org.apache.pinot.calcite.rel.hint.PinotHintOptions; import org.apache.pinot.calcite.rel.hint.PinotHintStrategyTable; import org.apache.pinot.calcite.rel.logical.PinotLogicalExchange; -import org.apache.pinot.calcite.rel.logical.PinotRelExchangeType; import org.apache.zookeeper.common.StringUtils; diff --git a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/PinotLogicalQueryPlanner.java b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/PinotLogicalQueryPlanner.java index 4e03059ac1ec..486b78da94e7 100644 --- a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/PinotLogicalQueryPlanner.java +++ b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/PinotLogicalQueryPlanner.java @@ -30,7 +30,7 @@ import org.apache.calcite.rel.RelDistribution; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelRoot; -import org.apache.pinot.calcite.rel.logical.PinotRelExchangeType; +import org.apache.calcite.rel.logical.PinotRelExchangeType; import org.apache.pinot.query.planner.PlanFragment; import org.apache.pinot.query.planner.QueryPlanMetadata; import org.apache.pinot.query.planner.SubPlan; diff --git a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/PlanFragmenter.java b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/PlanFragmenter.java index 118673aecf1b..136e1ca73f90 100644 --- a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/PlanFragmenter.java +++ b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/PlanFragmenter.java @@ -24,7 +24,7 @@ import java.util.ArrayList; import java.util.List; import org.apache.calcite.rel.RelDistribution; -import org.apache.pinot.calcite.rel.logical.PinotRelExchangeType; +import org.apache.calcite.rel.logical.PinotRelExchangeType; import org.apache.pinot.query.planner.PlanFragment; import org.apache.pinot.query.planner.SubPlan; import org.apache.pinot.query.planner.plannode.AggregateNode; diff --git a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RelToPlanNodeConverter.java b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RelToPlanNodeConverter.java index a505ab3102a7..bf6011d37d95 100644 --- a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RelToPlanNodeConverter.java +++ b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/RelToPlanNodeConverter.java @@ -40,13 +40,13 @@ import org.apache.calcite.rel.logical.LogicalTableScan; import org.apache.calcite.rel.logical.LogicalValues; import org.apache.calcite.rel.logical.LogicalWindow; +import org.apache.calcite.rel.logical.PinotRelExchangeType; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rel.type.RelRecordType; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.pinot.calcite.rel.logical.PinotLogicalExchange; import org.apache.pinot.calcite.rel.logical.PinotLogicalSortExchange; -import org.apache.pinot.calcite.rel.logical.PinotRelExchangeType; import org.apache.pinot.common.utils.DataSchema; import org.apache.pinot.common.utils.DataSchema.ColumnDataType; import org.apache.pinot.common.utils.DatabaseUtils; diff --git a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/SubPlanFragmenter.java b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/SubPlanFragmenter.java index e5e17cf05e84..989b52e971c1 100644 --- a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/SubPlanFragmenter.java +++ b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/logical/SubPlanFragmenter.java @@ -22,8 +22,8 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import org.apache.calcite.rel.logical.PinotRelExchangeType; import org.apache.calcite.runtime.ImmutablePairList; -import org.apache.pinot.calcite.rel.logical.PinotRelExchangeType; import org.apache.pinot.query.planner.SubPlanMetadata; import org.apache.pinot.query.planner.plannode.AggregateNode; import org.apache.pinot.query.planner.plannode.ExchangeNode; diff --git a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/plannode/ExchangeNode.java b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/plannode/ExchangeNode.java index c6d9024f5ecb..ea98b2fbb354 100644 --- a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/plannode/ExchangeNode.java +++ b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/plannode/ExchangeNode.java @@ -22,7 +22,7 @@ import java.util.Set; import org.apache.calcite.rel.RelDistribution; import org.apache.calcite.rel.RelFieldCollation; -import org.apache.pinot.calcite.rel.logical.PinotRelExchangeType; +import org.apache.calcite.rel.logical.PinotRelExchangeType; import org.apache.pinot.common.utils.DataSchema; import org.apache.pinot.query.planner.serde.ProtoProperties; diff --git a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/plannode/MailboxReceiveNode.java b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/plannode/MailboxReceiveNode.java index 825a32f9a4e2..e517eee8e555 100644 --- a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/plannode/MailboxReceiveNode.java +++ b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/plannode/MailboxReceiveNode.java @@ -27,8 +27,8 @@ import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelFieldCollation.Direction; import org.apache.calcite.rel.RelFieldCollation.NullDirection; +import org.apache.calcite.rel.logical.PinotRelExchangeType; import org.apache.commons.collections.CollectionUtils; -import org.apache.pinot.calcite.rel.logical.PinotRelExchangeType; import org.apache.pinot.common.utils.DataSchema; import org.apache.pinot.query.planner.logical.RexExpression; import org.apache.pinot.query.planner.serde.ProtoProperties; diff --git a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/plannode/MailboxSendNode.java b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/plannode/MailboxSendNode.java index efc4f20e852f..534e82effcbb 100644 --- a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/plannode/MailboxSendNode.java +++ b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/plannode/MailboxSendNode.java @@ -25,8 +25,8 @@ import javax.annotation.Nullable; import org.apache.calcite.rel.RelDistribution; import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.logical.PinotRelExchangeType; import org.apache.commons.collections.CollectionUtils; -import org.apache.pinot.calcite.rel.logical.PinotRelExchangeType; import org.apache.pinot.common.utils.DataSchema; import org.apache.pinot.query.planner.logical.RexExpression; import org.apache.pinot.query.planner.serde.ProtoProperties; diff --git a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/plan/pipeline/PipelineBreakerVisitor.java b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/plan/pipeline/PipelineBreakerVisitor.java index 7c192004ae17..efa97d663818 100644 --- a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/plan/pipeline/PipelineBreakerVisitor.java +++ b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/plan/pipeline/PipelineBreakerVisitor.java @@ -18,7 +18,7 @@ */ package org.apache.pinot.query.runtime.plan.pipeline; -import org.apache.pinot.calcite.rel.logical.PinotRelExchangeType; +import org.apache.calcite.rel.logical.PinotRelExchangeType; import org.apache.pinot.query.planner.plannode.DefaultPostOrderTraversalVisitor; import org.apache.pinot.query.planner.plannode.MailboxReceiveNode; import org.apache.pinot.query.planner.plannode.PlanNode; diff --git a/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/plan/pipeline/PipelineBreakerExecutorTest.java b/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/plan/pipeline/PipelineBreakerExecutorTest.java index c3953bef93f2..58dcd2106ef7 100644 --- a/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/plan/pipeline/PipelineBreakerExecutorTest.java +++ b/pinot-query-runtime/src/test/java/org/apache/pinot/query/runtime/plan/pipeline/PipelineBreakerExecutorTest.java @@ -27,7 +27,7 @@ import java.util.concurrent.Executors; import org.apache.calcite.rel.RelDistribution; import org.apache.calcite.rel.core.JoinRelType; -import org.apache.pinot.calcite.rel.logical.PinotRelExchangeType; +import org.apache.calcite.rel.logical.PinotRelExchangeType; import org.apache.pinot.common.utils.DataSchema; import org.apache.pinot.query.mailbox.MailboxService; import org.apache.pinot.query.mailbox.ReceivingMailbox; From 2f3db6e0539d5d3d514b3c0e609f2f1df7861144 Mon Sep 17 00:00:00 2001 From: Prashant Pandey <84911643+suddendust@users.noreply.github.com> Date: Wed, 17 Apr 2024 21:30:35 +0530 Subject: [PATCH 028/102] TLS Port for Minion (#12943) * Add tls port to minion * Update BaseMinionStarter.java * Update BaseMinionStarter.java --------- Co-authored-by: Xiang Fu --- .../pinot/minion/BaseMinionStarter.java | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/pinot-minion/src/main/java/org/apache/pinot/minion/BaseMinionStarter.java b/pinot-minion/src/main/java/org/apache/pinot/minion/BaseMinionStarter.java index cc4e17219c50..4f8bd79bd004 100644 --- a/pinot-minion/src/main/java/org/apache/pinot/minion/BaseMinionStarter.java +++ b/pinot-minion/src/main/java/org/apache/pinot/minion/BaseMinionStarter.java @@ -84,6 +84,7 @@ public abstract class BaseMinionStarter implements ServiceStartable { protected MinionConf _config; protected String _hostname; protected int _port; + protected int _tlsPort; protected String _instanceId; protected HelixManager _helixManager; protected TaskExecutorFactoryRegistry _taskExecutorFactoryRegistry; @@ -116,6 +117,7 @@ public void init(PinotConfiguration config) _instanceId = CommonConstants.Helix.PREFIX_OF_MINION_INSTANCE + _hostname + "_" + _port; } _listenerConfigs = ListenerConfigUtil.buildMinionConfigs(_config); + _tlsPort = ListenerConfigUtil.findLastTlsPort(_listenerConfigs, -1); _helixManager = new ZKHelixManager(helixClusterName, _instanceId, InstanceType.PARTICIPANT, zkAddress); MinionTaskZkMetadataManager minionTaskZkMetadataManager = new MinionTaskZkMetadataManager(_helixManager); _taskExecutorFactoryRegistry = new TaskExecutorFactoryRegistry(minionTaskZkMetadataManager, _config); @@ -125,6 +127,20 @@ public void init(PinotConfiguration config) MinionEventObservers.init(_config, _executorService); } + private void updateInstanceConfigIfNeeded() { + InstanceConfig instanceConfig = HelixHelper.getInstanceConfig(_helixManager, _instanceId); + boolean updated = HelixHelper.updateHostnamePort(instanceConfig, _hostname, _port); + if (_tlsPort > 0) { + updated |= HelixHelper.updateTlsPort(instanceConfig, _tlsPort); + } + updated |= HelixHelper.addDefaultTags(instanceConfig, + () -> Collections.singletonList(CommonConstants.Helix.UNTAGGED_MINION_INSTANCE)); + updated |= HelixHelper.removeDisabledPartitions(instanceConfig); + if (updated) { + HelixHelper.updateInstanceConfig(_helixManager, instanceConfig); + } + } + private void setupHelixSystemProperties() { // NOTE: Helix will disconnect the manager and disable the instance if it detects flapping (too frequent disconnect // from ZooKeeper). Setting flapping time window to a small value can avoid this from happening. Helix ignores the @@ -302,17 +318,6 @@ public String getStatusDescription() { LOGGER.info("Pinot minion started"); } - private void updateInstanceConfigIfNeeded() { - InstanceConfig instanceConfig = HelixHelper.getInstanceConfig(_helixManager, _instanceId); - boolean updated = HelixHelper.updateHostnamePort(instanceConfig, _hostname, _port); - updated |= HelixHelper.addDefaultTags(instanceConfig, - () -> Collections.singletonList(CommonConstants.Helix.UNTAGGED_MINION_INSTANCE)); - updated |= HelixHelper.removeDisabledPartitions(instanceConfig); - if (updated) { - HelixHelper.updateInstanceConfig(_helixManager, instanceConfig); - } - } - /** * Stops the Pinot Minion instance. */ From 263f4f6c0122c18fbd46bcfb62b74aed5cfc186e Mon Sep 17 00:00:00 2001 From: "Xiaotian (Jackie) Jiang" <17555551+Jackie-Jiang@users.noreply.github.com> Date: Wed, 17 Apr 2024 10:55:54 -0700 Subject: [PATCH 029/102] Fix a typo when calculating query freshness (#12947) --- .../pinot/core/query/executor/ServerQueryExecutorV1Impl.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/executor/ServerQueryExecutorV1Impl.java b/pinot-core/src/main/java/org/apache/pinot/core/query/executor/ServerQueryExecutorV1Impl.java index 82664cd57ffe..3e61bebf367f 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/query/executor/ServerQueryExecutorV1Impl.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/executor/ServerQueryExecutorV1Impl.java @@ -327,7 +327,7 @@ private InstanceResponseBlock executeInternal(ServerQueryRequest queryRequest, E } long minConsumingFreshnessTimeMs = 0; if (minIngestionTimeMs != Long.MAX_VALUE) { - minConsumingFreshnessTimeMs = minIndexTimeMs; + minConsumingFreshnessTimeMs = minIngestionTimeMs; } else if (minIndexTimeMs != Long.MAX_VALUE) { minConsumingFreshnessTimeMs = minIndexTimeMs; } else if (maxEndTimeMs != Long.MIN_VALUE) { From af2fcb78870a33015992c8d5ff886fc0909b6ebf Mon Sep 17 00:00:00 2001 From: "Xiaotian (Jackie) Jiang" <17555551+Jackie-Jiang@users.noreply.github.com> Date: Wed, 17 Apr 2024 11:42:30 -0700 Subject: [PATCH 030/102] Enhance ProtoSerializationUtils to handle class move (#12946) --- .../query/planner/plannode/ExchangeNode.java | 1 + .../serde/ProtoSerializationUtils.java | 114 +++++++++++------- .../serde/ProtoSerializationUtilsTest.java | 62 ++++++++++ 3 files changed, 131 insertions(+), 46 deletions(-) create mode 100644 pinot-query-planner/src/test/java/org/apache/pinot/query/planner/serde/ProtoSerializationUtilsTest.java diff --git a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/plannode/ExchangeNode.java b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/plannode/ExchangeNode.java index ea98b2fbb354..226ce18e2d1c 100644 --- a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/plannode/ExchangeNode.java +++ b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/plannode/ExchangeNode.java @@ -51,6 +51,7 @@ public class ExchangeNode extends AbstractPlanNode { @ProtoProperties private boolean _isPrePartitioned = false; + // FIXME: Ser/de doesn't work on this field. Currently it is always empty. @ProtoProperties private List _collations; diff --git a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/serde/ProtoSerializationUtils.java b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/serde/ProtoSerializationUtils.java index 683fed7ab56c..7145114b33a1 100644 --- a/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/serde/ProtoSerializationUtils.java +++ b/pinot-query-planner/src/main/java/org/apache/pinot/query/planner/serde/ProtoSerializationUtils.java @@ -19,15 +19,20 @@ package org.apache.pinot.query.planner.serde; import com.google.common.base.Preconditions; +import com.google.common.collect.Maps; import com.google.protobuf.ByteString; import java.lang.reflect.Field; +import java.lang.reflect.ParameterizedType; +import java.lang.reflect.Type; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; +import javax.annotation.Nullable; import org.apache.pinot.common.proto.Plan; import org.apache.pinot.spi.utils.ByteArray; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** @@ -35,15 +40,15 @@ */ @SuppressWarnings({"rawtypes", "unchecked"}) public class ProtoSerializationUtils { - private static final String ENUM_VALUE_KEY = "ENUM_VALUE_KEY"; - private static final String NULL_OBJECT_CLASSNAME = "null"; - private static final Plan.ObjectField NULL_OBJECT_VALUE = Plan.ObjectField.newBuilder() - .setObjectClassName(NULL_OBJECT_CLASSNAME).build(); - private ProtoSerializationUtils() { - // do not instantiate. } + private static final Logger LOGGER = LoggerFactory.getLogger(ProtoSerializationUtils.class); + private static final String ENUM_VALUE_KEY = "ENUM_VALUE_KEY"; + private static final String NULL_OBJECT_CLASS_NAME = "null"; + private static final Plan.ObjectField NULL_OBJECT_VALUE = + Plan.ObjectField.newBuilder().setObjectClassName(NULL_OBJECT_CLASS_NAME).build(); + /** * Reflectively set object's field based on {@link Plan.ObjectField} provided. * @@ -51,20 +56,22 @@ private ProtoSerializationUtils() { * @param objectField the proto ObjectField from which the object will be set. */ public static void setObjectFieldToObject(Object object, Plan.ObjectField objectField) { + Class clazz = object.getClass(); Map memberVariablesMap = objectField.getMemberVariablesMap(); - for (Map.Entry e : memberVariablesMap.entrySet()) { + for (Map.Entry entry : memberVariablesMap.entrySet()) { + String fieldName = entry.getKey(); try { - Field declaredField = object.getClass().getDeclaredField(e.getKey()); + Field declaredField = clazz.getDeclaredField(fieldName); if (declaredField.isAnnotationPresent(ProtoProperties.class)) { - Object memberVarObject = constructMemberVariable(e.getValue()); - if (memberVarObject != null) { + Object value = constructMemberVariable(entry.getValue(), declaredField.getGenericType()); + if (value != null) { declaredField.setAccessible(true); - declaredField.set(object, memberVarObject); + declaredField.set(object, value); } } - } catch (NoSuchFieldException | IllegalAccessException ex) { - throw new IllegalStateException("Unable to set Object " + object.getClass() + " on field " + e.getKey() - + "with object of type: " + objectField.getObjectClassName(), ex); + } catch (Exception e) { + throw new IllegalStateException( + String.format("Caught exception while setting field: %s on object: %s", fieldName, clazz), e); } } } @@ -75,7 +82,7 @@ public static void setObjectFieldToObject(Object object, Plan.ObjectField object * @param object object to be converted. * @return the converted proto ObjectField. */ - public static Plan.ObjectField convertObjectToObjectField(Object object) { + public static Plan.ObjectField convertObjectToObjectField(@Nullable Object object) { if (object != null) { Plan.ObjectField.Builder builder = Plan.ObjectField.newBuilder(); builder.setObjectClassName(object.getClass().getName()); @@ -182,22 +189,24 @@ private static Plan.MapField serializeMapMemberVariable(Object fieldObject) { // Deserialize Utils // -------------------------------------------------------------------------- - private static Object constructMemberVariable(Plan.MemberVariableField memberVariableField) { - switch (memberVariableField.getMemberVariableFieldCase()) { + @Nullable + private static Object constructMemberVariable(Plan.MemberVariableField value, Type type) { + switch (value.getMemberVariableFieldCase()) { case LITERALFIELD: - return constructLiteral(memberVariableField.getLiteralField()); + return constructLiteral(value.getLiteralField()); case LISTFIELD: - return constructList(memberVariableField.getListField()); + return constructList(value.getListField(), type); case MAPFIELD: - return constructMap(memberVariableField.getMapField()); + return constructMap(value.getMapField(), type); case OBJECTFIELD: - return constructObject(memberVariableField.getObjectField()); + return constructObject(value.getObjectField(), type); case MEMBERVARIABLEFIELD_NOT_SET: default: return null; } } + @Nullable private static Object constructLiteral(Plan.LiteralField literalField) { switch (literalField.getLiteralFieldCase()) { case BOOLFIELD: @@ -220,39 +229,52 @@ private static Object constructLiteral(Plan.LiteralField literalField) { } } - private static List constructList(Plan.ListField listField) { - List list = new ArrayList(); - for (Plan.MemberVariableField e : listField.getContentList()) { - list.add(constructMemberVariable(e)); + private static List constructList(Plan.ListField listValue, Type type) { + Preconditions.checkState(type instanceof ParameterizedType, "List field must be parameterized"); + Type elementType = ((ParameterizedType) type).getActualTypeArguments()[0]; + List values = listValue.getContentList(); + List list = new ArrayList(values.size()); + for (Plan.MemberVariableField value : values) { + list.add(constructMemberVariable(value, elementType)); } return list; } - private static Map constructMap(Plan.MapField mapField) { - Map map = new HashMap(); - for (Map.Entry e : mapField.getContentMap().entrySet()) { - map.put(e.getKey(), constructMemberVariable(e.getValue())); + private static Map constructMap(Plan.MapField mapValue, Type type) { + Preconditions.checkState(type instanceof ParameterizedType, "Map field must be parameterized"); + Type valueType = ((ParameterizedType) type).getActualTypeArguments()[1]; + Map values = mapValue.getContentMap(); + Map map = Maps.newHashMapWithExpectedSize(values.size()); + for (Map.Entry entry : values.entrySet()) { + map.put(entry.getKey(), constructMemberVariable(entry.getValue(), valueType)); } return map; } - private static Object constructObject(Plan.ObjectField objectField) { - if (!NULL_OBJECT_CLASSNAME.equals(objectField.getObjectClassName())) { - try { - Class clazz = Class.forName(objectField.getObjectClassName()); - if (clazz.isEnum()) { - return Enum.valueOf((Class) clazz, - objectField.getMemberVariablesOrDefault(ENUM_VALUE_KEY, null).getLiteralField().getStringField()); - } else { - Object obj = clazz.newInstance(); - setObjectFieldToObject(obj, objectField); - return obj; - } - } catch (ClassNotFoundException | InstantiationException | IllegalAccessException e) { - throw new IllegalStateException("Unable to create Object of type: " + objectField.getObjectClassName(), e); - } - } else { + @Nullable + static Object constructObject(Plan.ObjectField value, Type type) { + String objectClassName = value.getObjectClassName(); + if (objectClassName.equals(NULL_OBJECT_CLASS_NAME)) { return null; } + Class clazz; + try { + clazz = Class.forName(objectClassName); + } catch (ClassNotFoundException e) { + LOGGER.debug("Failed to find class: {}, falling back to: {}", objectClassName, type); + clazz = (Class) type; + } + try { + if (clazz.isEnum()) { + return Enum.valueOf((Class) clazz, + value.getMemberVariablesOrDefault(ENUM_VALUE_KEY, null).getLiteralField().getStringField()); + } else { + Object object = clazz.getConstructor().newInstance(); + setObjectFieldToObject(object, value); + return object; + } + } catch (Exception e) { + throw new IllegalStateException("Caught exception while creating object of type: " + clazz, e); + } } } diff --git a/pinot-query-planner/src/test/java/org/apache/pinot/query/planner/serde/ProtoSerializationUtilsTest.java b/pinot-query-planner/src/test/java/org/apache/pinot/query/planner/serde/ProtoSerializationUtilsTest.java new file mode 100644 index 000000000000..e26d5482d399 --- /dev/null +++ b/pinot-query-planner/src/test/java/org/apache/pinot/query/planner/serde/ProtoSerializationUtilsTest.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.query.planner.serde; + +import java.util.Map; +import org.apache.pinot.common.proto.Plan; +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; + + +public class ProtoSerializationUtilsTest { + + @Test + public void testMoveClass() { + TestClass testClass = new TestClass(); + testClass._enum = TestEnum.VALUE1; + Plan.ObjectField objectField = ProtoSerializationUtils.convertObjectToObjectField(testClass); + + // Regular case + TestClass deserialized = new TestClass(); + ProtoSerializationUtils.setObjectFieldToObject(deserialized, objectField); + assertEquals(deserialized._enum, TestEnum.VALUE1); + + // Set wrong class name for the enum field + Plan.MemberVariableField enumField = objectField.getMemberVariablesMap().get("_enum"); + Plan.MemberVariableField enumFieldWithWrongClass = Plan.MemberVariableField.newBuilder().setObjectField( + Plan.ObjectField.newBuilder().setObjectClassName("wrongClass") + .putAllMemberVariables(enumField.getObjectField().getMemberVariablesMap())).build(); + Plan.ObjectField objectFieldWithWrongClass = + Plan.ObjectField.newBuilder().setObjectClassName(objectField.getObjectClassName()) + .putAllMemberVariables(Map.of("_enum", enumFieldWithWrongClass)).build(); + TestClass deserializedWithWrongClass = new TestClass(); + ProtoSerializationUtils.setObjectFieldToObject(deserializedWithWrongClass, objectFieldWithWrongClass); + assertEquals(deserializedWithWrongClass._enum, TestEnum.VALUE1); + } + + private static class TestClass { + @ProtoProperties + private TestEnum _enum; + } + + private enum TestEnum { + VALUE1, VALUE2 + } +} From 3b46d2c5e6913c59caea9f52da9e1daf50a9cf97 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 17 Apr 2024 14:58:01 -0700 Subject: [PATCH 031/102] Bump org.apache.commons:commons-text from 1.11.0 to 1.12.0 (#12950) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index bcf5614a39f0..ebec1b58b5b2 100644 --- a/pom.xml +++ b/pom.xml @@ -178,7 +178,7 @@ 3.14.0 4.4 - 1.11.0 + 1.12.0 1.26.1 3.6.1 1.10.0 From d206f127f33f2843c288b483e16e22963dac6b4f Mon Sep 17 00:00:00 2001 From: Pratik Tibrewal Date: Thu, 18 Apr 2024 03:44:20 +0530 Subject: [PATCH 032/102] Allowing users to pass minionInstanceTag as a param in /tasks/schedule API (#12786) * Allowing users to pass minionInstanceTag in tasks/schedule API * add nullable annotation --- .../resources/PinotTaskRestletResource.java | 11 +++-- .../helix/core/minion/PinotTaskManager.java | 48 ++++++++++++------- .../SimpleMinionClusterIntegrationTest.java | 4 +- 3 files changed, 40 insertions(+), 23 deletions(-) diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotTaskRestletResource.java b/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotTaskRestletResource.java index c51c26658737..e09bde84668a 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotTaskRestletResource.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotTaskRestletResource.java @@ -618,19 +618,22 @@ public Map getCronSchedulerJobDetails( @ApiOperation("Schedule tasks and return a map from task type to task name scheduled") public Map scheduleTasks(@ApiParam(value = "Task type") @QueryParam("taskType") String taskType, @ApiParam(value = "Table name (with type suffix)") @QueryParam("tableName") String tableName, + @ApiParam(value = "Minion Instance tag to schedule the task explicitly on") + @QueryParam("minionInstanceTag") @Nullable String minionInstanceTag, @Context HttpHeaders headers) { String database = headers != null ? headers.getHeaderString(DATABASE) : DEFAULT_DATABASE; if (taskType != null) { // Schedule task for the given task type List taskNames = tableName != null - ? _pinotTaskManager.scheduleTask(taskType, DatabaseUtils.translateTableName(tableName, headers)) - : _pinotTaskManager.scheduleTaskForDatabase(taskType, database); + ? _pinotTaskManager.scheduleTask(taskType, + DatabaseUtils.translateTableName(tableName, headers), minionInstanceTag) + : _pinotTaskManager.scheduleTaskForDatabase(taskType, database, minionInstanceTag); return Collections.singletonMap(taskType, taskNames == null ? null : StringUtils.join(taskNames, ',')); } else { // Schedule tasks for all task types Map> allTaskNames = tableName != null - ? _pinotTaskManager.scheduleTasks(DatabaseUtils.translateTableName(tableName, headers)) - : _pinotTaskManager.scheduleTasksForDatabase(database); + ? _pinotTaskManager.scheduleTasks(DatabaseUtils.translateTableName(tableName, headers), minionInstanceTag) + : _pinotTaskManager.scheduleTasksForDatabase(database, minionInstanceTag); return allTaskNames.entrySet().stream() .collect(Collectors.toMap(Map.Entry::getKey, entry -> String.join(",", entry.getValue()))); } diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotTaskManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotTaskManager.java index 2cdbf8c1dfe2..40299441390a 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotTaskManager.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotTaskManager.java @@ -485,7 +485,7 @@ public void registerTaskGenerator(PinotTaskGenerator taskGenerator) { * Returns a map from the task type to the list of tasks scheduled. */ public synchronized Map> scheduleTasks() { - return scheduleTasks(_pinotHelixResourceManager.getAllTables(), false); + return scheduleTasks(_pinotHelixResourceManager.getAllTables(), false, null); } /** @@ -493,15 +493,17 @@ public synchronized Map> scheduleTasks() { * It might be called from the non-leader controller. * Returns a map from the task type to the list of tasks scheduled. */ - public synchronized Map> scheduleTasksForDatabase(@Nullable String database) { - return scheduleTasks(_pinotHelixResourceManager.getAllTables(database), false); + public synchronized Map> scheduleTasksForDatabase(@Nullable String database, + @Nullable String minionInstanceTag) { + return scheduleTasks(_pinotHelixResourceManager.getAllTables(database), false, minionInstanceTag); } /** * Helper method to schedule tasks (all task types) for the given tables that have the tasks enabled. Returns a map * from the task type to the list of the tasks scheduled. */ - private synchronized Map> scheduleTasks(List tableNamesWithType, boolean isLeader) { + private synchronized Map> scheduleTasks(List tableNamesWithType, + boolean isLeader, @Nullable String minionInstanceTag) { _controllerMetrics.addMeteredGlobalValue(ControllerMeter.NUMBER_TIMES_SCHEDULE_TASKS_CALLED, 1L); // Scan all table configs to get the tables with tasks enabled @@ -525,7 +527,7 @@ private synchronized Map> scheduleTasks(List tableN if (taskGenerator != null) { _helixTaskResourceManager.ensureTaskQueueExists(taskType); addTaskTypeMetricsUpdaterIfNeeded(taskType); - tasksScheduled.put(taskType, scheduleTask(taskGenerator, enabledTableConfigs, isLeader)); + tasksScheduled.put(taskType, scheduleTask(taskGenerator, enabledTableConfigs, isLeader, minionInstanceTag)); } else { List enabledTables = new ArrayList<>(enabledTableConfigs.size()); for (TableConfig enabledTableConfig : enabledTableConfigs) { @@ -545,14 +547,15 @@ private synchronized Map> scheduleTasks(List tableN */ @Nullable private List scheduleTask(PinotTaskGenerator taskGenerator, List enabledTableConfigs, - boolean isLeader) { + boolean isLeader, @Nullable String minionInstanceTagForTask) { LOGGER.info("Trying to schedule task type: {}, isLeader: {}", taskGenerator.getTaskType(), isLeader); Map> minionInstanceTagToTaskConfigs = new HashMap<>(); String taskType = taskGenerator.getTaskType(); for (TableConfig tableConfig : enabledTableConfigs) { String tableName = tableConfig.getTableName(); try { - String minionInstanceTag = taskGenerator.getMinionInstanceTag(tableConfig); + String minionInstanceTag = minionInstanceTagForTask != null + ? minionInstanceTagForTask : taskGenerator.getMinionInstanceTag(tableConfig); List presentTaskConfig = minionInstanceTagToTaskConfigs.computeIfAbsent(minionInstanceTag, k -> new ArrayList<>()); taskGenerator.generateTasks(List.of(tableConfig), presentTaskConfig); @@ -624,7 +627,16 @@ private List scheduleTask(PinotTaskGenerator taskGenerator, List> scheduleTasks(String tableNameWithType) { - return scheduleTasks(Collections.singletonList(tableNameWithType), false); + return scheduleTasks(Collections.singletonList(tableNameWithType), false, null); + } + + /** + * Public API to schedule tasks (all task types) for the given table on a specific instance tag. + * It might be called from the non-leader controller. Returns a map from the task type to the list of tasks scheduled. + */ + public synchronized Map> scheduleTasks(String tableNameWithType, + @Nullable String minionInstanceTag) { + return scheduleTasks(Collections.singletonList(tableNameWithType), false, minionInstanceTag); } /** @@ -633,8 +645,8 @@ public synchronized Map> scheduleTasks(String tableNameWith * Returns the list of task names, or {@code null} if no task is scheduled. */ @Nullable - public synchronized List scheduleTask(String taskType) { - return scheduleTask(taskType, _pinotHelixResourceManager.getAllTables()); + public synchronized List scheduleTask(String taskType, @Nullable String minionInstanceTag) { + return scheduleTask(taskType, _pinotHelixResourceManager.getAllTables(), minionInstanceTag); } /** @@ -643,12 +655,13 @@ public synchronized List scheduleTask(String taskType) { * Returns the list of task name, or {@code null} if no task is scheduled. */ @Nullable - public synchronized List scheduleTaskForDatabase(String taskType, @Nullable String database) { - return scheduleTask(taskType, _pinotHelixResourceManager.getAllTables(database)); + public synchronized List scheduleTaskForDatabase(String taskType, @Nullable String database, + @Nullable String minionInstanceTag) { + return scheduleTask(taskType, _pinotHelixResourceManager.getAllTables(database), minionInstanceTag); } @Nullable - private List scheduleTask(String taskType, List tables) { + private List scheduleTask(String taskType, List tables, @Nullable String minionInstanceTag) { PinotTaskGenerator taskGenerator = _taskGeneratorRegistry.getTaskGenerator(taskType); Preconditions.checkState(taskGenerator != null, "Task type: %s is not registered", taskType); @@ -664,7 +677,7 @@ private List scheduleTask(String taskType, List tables) { _helixTaskResourceManager.ensureTaskQueueExists(taskType); addTaskTypeMetricsUpdaterIfNeeded(taskType); - return scheduleTask(taskGenerator, enabledTableConfigs, false); + return scheduleTask(taskGenerator, enabledTableConfigs, false, minionInstanceTag); } /** @@ -672,7 +685,8 @@ private List scheduleTask(String taskType, List tables) { * controller. Returns the list of task names, or {@code null} if no task is scheduled. */ @Nullable - public synchronized List scheduleTask(String taskType, String tableNameWithType) { + public synchronized List scheduleTask(String taskType, String tableNameWithType, + @Nullable String minionInstanceTag) { PinotTaskGenerator taskGenerator = _taskGeneratorRegistry.getTaskGenerator(taskType); Preconditions.checkState(taskGenerator != null, "Task type: %s is not registered", taskType); @@ -685,12 +699,12 @@ public synchronized List scheduleTask(String taskType, String tableNameW _helixTaskResourceManager.ensureTaskQueueExists(taskType); addTaskTypeMetricsUpdaterIfNeeded(taskType); - return scheduleTask(taskGenerator, Collections.singletonList(tableConfig), false); + return scheduleTask(taskGenerator, Collections.singletonList(tableConfig), false, minionInstanceTag); } @Override protected void processTables(List tableNamesWithType, Properties taskProperties) { - scheduleTasks(tableNamesWithType, true); + scheduleTasks(tableNamesWithType, true, null); } @Override diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/SimpleMinionClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/SimpleMinionClusterIntegrationTest.java index 1db953f00f40..241c1c0876ff 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/SimpleMinionClusterIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/SimpleMinionClusterIntegrationTest.java @@ -150,7 +150,7 @@ public void testStopResumeDeleteTaskQueue() { verifyTaskCount(task1.get(0), 0, 1, 1, 2); // Should generate one more task, with two sub-tasks. Both of these sub-tasks will wait // since we have one minion instance that is still running one of the sub-tasks. - List task2 = _taskManager.scheduleTask(TASK_TYPE); + List task2 = _taskManager.scheduleTask(TASK_TYPE, null); assertNotNull(task2); assertEquals(task2.size(), 1); assertTrue(_helixTaskResourceManager.getTasksInProgress(TASK_TYPE).contains(task2.get(0))); @@ -160,7 +160,7 @@ public void testStopResumeDeleteTaskQueue() { // Our test task generator does not generate if there are already this many sub-tasks in the // running+waiting count already. assertNull(_taskManager.scheduleTasks().get(TASK_TYPE)); - assertNull(_taskManager.scheduleTask(TASK_TYPE)); + assertNull(_taskManager.scheduleTask(TASK_TYPE, null)); // Wait at most 60 seconds for all tasks IN_PROGRESS TestUtils.waitForCondition(input -> { From 2a38d14645b32573786a9d713d33fc50fc65c521 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 17 Apr 2024 15:50:11 -0700 Subject: [PATCH 033/102] Bump org.apache:apache from 31 to 32 (#12952) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index ebec1b58b5b2..7efad7b690ca 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache apache - 31 + 32 org.apache.pinot From ece96d35382bc4334fea5ed07796cfb7c223d380 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 17 Apr 2024 15:50:30 -0700 Subject: [PATCH 034/102] Bump aws.sdk.version from 2.25.31 to 2.25.32 (#12951) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 7efad7b690ca..62166a1bca0a 100644 --- a/pom.xml +++ b/pom.xml @@ -168,7 +168,7 @@ 0.15.0 0.4.4 4.2.2 - 2.25.31 + 2.25.32 2.12.7 3.1.12 7.10.1 From 07d50fb688fd8a8e8571b26df433207a420cd6bd Mon Sep 17 00:00:00 2001 From: lnbest0707 <106711887+lnbest0707-uber@users.noreply.github.com> Date: Wed, 17 Apr 2024 18:14:15 -0700 Subject: [PATCH 035/102] Isolate bad server configs during broker startup phase (#12931) --- .../broker/routing/BrokerRoutingManager.java | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/pinot-broker/src/main/java/org/apache/pinot/broker/routing/BrokerRoutingManager.java b/pinot-broker/src/main/java/org/apache/pinot/broker/routing/BrokerRoutingManager.java index 9031e87580b5..f2f65c91e800 100644 --- a/pinot-broker/src/main/java/org/apache/pinot/broker/routing/BrokerRoutingManager.java +++ b/pinot-broker/src/main/java/org/apache/pinot/broker/routing/BrokerRoutingManager.java @@ -245,21 +245,27 @@ private void processInstanceConfigChange() { Set enabledServers = new HashSet<>(); List newEnabledServers = new ArrayList<>(); for (ZNRecord instanceConfigZNRecord : instanceConfigZNRecords) { + // Put instance initialization logics into try-catch block to prevent bad server configs affecting the entire + // cluster String instanceId = instanceConfigZNRecord.getId(); - if (isEnabledServer(instanceConfigZNRecord)) { - enabledServers.add(instanceId); - - // Always refresh the server instance with the latest instance config in case it changes - InstanceConfig instanceConfig = new InstanceConfig(instanceConfigZNRecord); - ServerInstance serverInstance = new ServerInstance(instanceConfig); - if (_enabledServerInstanceMap.put(instanceId, serverInstance) == null) { - newEnabledServers.add(instanceId); - - // NOTE: Remove new enabled server from excluded servers because the server is likely being restarted - if (_excludedServers.remove(instanceId)) { - LOGGER.info("Got excluded server: {} re-enabled, including it into the routing", instanceId); + try { + if (isEnabledServer(instanceConfigZNRecord)) { + enabledServers.add(instanceId); + + // Always refresh the server instance with the latest instance config in case it changes + InstanceConfig instanceConfig = new InstanceConfig(instanceConfigZNRecord); + ServerInstance serverInstance = new ServerInstance(instanceConfig); + if (_enabledServerInstanceMap.put(instanceId, serverInstance) == null) { + newEnabledServers.add(instanceId); + + // NOTE: Remove new enabled server from excluded servers because the server is likely being restarted + if (_excludedServers.remove(instanceId)) { + LOGGER.info("Got excluded server: {} re-enabled, including it into the routing", instanceId); + } } } + } catch (Exception e) { + LOGGER.error("Caught exception while adding instance: {}, ignoring it", instanceId, e); } } List newDisabledServers = new ArrayList<>(); From ca0d381b5045b09e12751850b75ea3958dd82489 Mon Sep 17 00:00:00 2001 From: Jia Guo Date: Wed, 17 Apr 2024 18:23:34 -0700 Subject: [PATCH 036/102] make reflection calls compatible with 0.9.11 (#12958) --- .../org/apache/pinot/spi/utils/PinotReflectionUtils.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/PinotReflectionUtils.java b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/PinotReflectionUtils.java index 582f4ae8ab62..0192aa5cee70 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/PinotReflectionUtils.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/PinotReflectionUtils.java @@ -52,8 +52,9 @@ public static Set> getClassesThroughReflection(String packageName, Stri Class annotation) { try { synchronized (REFLECTION_LOCK) { + // we use deprecated method include here to avoid the compatibility issue with reflections 0.9.11 -> 0.10.2 return new Reflections(new ConfigurationBuilder().setUrls(ClasspathHelper.forPackage(packageName)) - .filterInputsBy(new FilterBuilder().includePattern(regexPattern))).getTypesAnnotatedWith(annotation); + .filterInputsBy(new FilterBuilder().include(regexPattern))).getTypesAnnotatedWith(annotation); } } catch (Throwable t) { // Log an error then re-throw it because this method is usually called in a static block, where exception might @@ -72,8 +73,9 @@ public static Set> getClassesThroughReflection(List packages, S for (String packageName : packages) { urls.addAll(ClasspathHelper.forPackage(packageName)); } + // we use deprecated method include here to avoid the compatibility issue with reflections 0.9.11 -> 0.10.2 return new Reflections(new ConfigurationBuilder().setUrls(urls) - .filterInputsBy(new FilterBuilder().includePattern(regexPattern))).getTypesAnnotatedWith(annotation); + .filterInputsBy(new FilterBuilder().include(regexPattern))).getTypesAnnotatedWith(annotation); } } catch (Throwable t) { // Log an error then re-throw it because this method is usually called in a static block, where exception might @@ -92,8 +94,9 @@ public static Set getMethodsThroughReflection(String packageName, String Class annotation) { try { synchronized (REFLECTION_LOCK) { + // we use deprecated method include here to avoid the compatibility issue with reflections 0.9.11 -> 0.10.2 return new Reflections(new ConfigurationBuilder().setUrls(ClasspathHelper.forPackage(packageName)) - .filterInputsBy(new FilterBuilder().includePattern(regexPattern)) + .filterInputsBy(new FilterBuilder().include(regexPattern)) .setScanners(new MethodAnnotationsScanner())).getMethodsAnnotatedWith(annotation); } } catch (Throwable t) { From 02b1e3dc2624612e13ddd39e3ea877c5e9803f62 Mon Sep 17 00:00:00 2001 From: Gonzalo Ortiz Jaureguizar Date: Thu, 18 Apr 2024 09:09:18 +0200 Subject: [PATCH 037/102] add some tests on jsonPathString (#12954) --- .../common/function/JsonFunctionsTest.java | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/pinot-common/src/test/java/org/apache/pinot/common/function/JsonFunctionsTest.java b/pinot-common/src/test/java/org/apache/pinot/common/function/JsonFunctionsTest.java index 9ca44e7fc605..a9e48053b4ce 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/function/JsonFunctionsTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/function/JsonFunctionsTest.java @@ -21,12 +21,14 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import com.jayway.jsonpath.InvalidJsonException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; import org.apache.pinot.common.function.scalar.JsonFunctions; import org.apache.pinot.spi.utils.JsonUtils; +import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -87,6 +89,65 @@ public void testJsonFunction() assertTrue(Double.isNaN(JsonFunctions.jsonPathDouble(jsonString, "$.actor.aaa"))); } + @Test + public void testJsonPathStringWithDefaultValue() + throws JsonProcessingException { + String jsonString = "{\"name\": \"Pete\", \"age\": 24}"; + assertEquals(JsonFunctions.jsonPathString(jsonString, "$.name", "default"), "Pete"); + assertEquals(JsonFunctions.jsonPathString(jsonString, "$.missing", "default"), "default"); + assertNull(JsonFunctions.jsonPathString(jsonString, "$.missing", null)); + assertEquals(JsonFunctions.jsonPathString(jsonString, "$.age", "default"), "24"); + assertEquals(JsonFunctions.jsonPathString(jsonString, "$.age"), "24"); + assertEquals(JsonFunctions.jsonPathString(jsonString, "$.age", null), "24"); + } + + @Test + public void testJsonPathStringWithoutDefaultValue() + throws JsonProcessingException { + String jsonString = "{\"name\": \"Pete\", \"age\": 24}"; + assertEquals(JsonFunctions.jsonPathString(jsonString, "$.name"), "Pete"); + assertNull(JsonFunctions.jsonPathString(jsonString, "$.missing")); + assertNull(JsonFunctions.jsonPathString(jsonString, "$.missing", null)); + assertEquals(JsonFunctions.jsonPathString(jsonString, "$.age"), "24"); + } + + @Test + public void testJsonPathStringWithInvalidJson() + throws JsonProcessingException { + try { + JsonFunctions.jsonPathString("not json", "$.anything"); + Assert.fail("Should have thrown InvalidJsonException"); + } catch (InvalidJsonException e) { + // Expected + } + try { + JsonFunctions.jsonPathString(null, "$.anything"); + Assert.fail("Should have thrown IllegalArgumentException"); + } catch (IllegalArgumentException e) { + // Expected + } + + assertEquals(JsonFunctions.jsonPathString(null, "$.actor.aaa", "foo"), "foo"); + } + + @Test + public void testJsonPathStringWithNullValue() + throws JsonProcessingException { + String result = JsonFunctions.jsonPathString("{\"foo\": null}", "$.foo"); + + assertNull(result, "Expected null json value. Received instead " + + (result == null ? "Java null value" : result + " of type " + result.getClass())); + + assertEquals(JsonFunctions.jsonPathString("{\"foo\": null}", "$.foo", "default"), "default"); + } + + @Test + public void testJsonPathStringWithStringNull() + throws JsonProcessingException { + assertEquals(JsonFunctions.jsonPathString("{\"foo\": \"null\"}", "$.foo"), "null"); + assertEquals(JsonFunctions.jsonPathString("{\"foo\": \"null\"}", "$.foo", "default"), "null"); + } + @Test public void testJsonFunctionExtractingArray() throws JsonProcessingException { From e0571294e27a0b7243b8bfa0257f38665208bf45 Mon Sep 17 00:00:00 2001 From: swaminathanmanish <126024920+swaminathanmanish@users.noreply.github.com> Date: Thu, 18 Apr 2024 10:25:53 -0700 Subject: [PATCH 038/102] Enable complexType handling in SegmentProcessFramework (#12942) * Enable complexType handling in SegmentProcessFramework --- .../SegmentProcessorFrameworkTest.java | 48 +++++++++++++++++++ .../CompositeTransformer.java | 4 ++ 2 files changed, 52 insertions(+) diff --git a/pinot-core/src/test/java/org/apache/pinot/core/segment/processing/framework/SegmentProcessorFrameworkTest.java b/pinot-core/src/test/java/org/apache/pinot/core/segment/processing/framework/SegmentProcessorFrameworkTest.java index c2c4c51789ee..00631e778b4e 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/segment/processing/framework/SegmentProcessorFrameworkTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/segment/processing/framework/SegmentProcessorFrameworkTest.java @@ -25,7 +25,9 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.stream.IntStream; import org.apache.commons.io.FileUtils; import org.apache.pinot.core.segment.processing.timehandler.TimeHandler; @@ -43,6 +45,8 @@ import org.apache.pinot.segment.spi.index.reader.NullValueVectorReader; import org.apache.pinot.spi.config.table.TableConfig; import org.apache.pinot.spi.config.table.TableType; +import org.apache.pinot.spi.config.table.ingestion.ComplexTypeConfig; +import org.apache.pinot.spi.config.table.ingestion.IngestionConfig; import org.apache.pinot.spi.data.FieldSpec.DataType; import org.apache.pinot.spi.data.Schema; import org.apache.pinot.spi.data.readers.FileFormat; @@ -52,6 +56,7 @@ import org.apache.pinot.spi.data.readers.RecordReaderFileConfig; import org.apache.pinot.spi.utils.ReadMode; import org.apache.pinot.spi.utils.builder.TableConfigBuilder; +import org.testng.Assert; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -73,6 +78,8 @@ public class SegmentProcessorFrameworkTest { private List _singleSegment; private List _multipleSegments; private List _multiValueSegments; + private List _recordReaderWithComplexType; + private TableConfig _tableConfig; private TableConfig _tableConfigNullValueEnabled; @@ -113,6 +120,8 @@ public void setup() _schema = new Schema.SchemaBuilder().setSchemaName("mySchema").addSingleValueDimension("campaign", DataType.STRING, "") + .addSingleValueDimension("campaign.inner1", DataType.STRING) + .addSingleValueDimension("campaign.inner1.inner2", DataType.STRING) // NOTE: Intentionally put 1000 as default value to test skipping null values during rollup .addMetric("clicks", DataType.INT, 1000) .addDateTime("time", DataType.LONG, "1:MILLISECONDS:EPOCH", "1:MILLISECONDS").build(); @@ -127,6 +136,7 @@ public void setup() _multipleSegments = createInputSegments(new File(TEMP_DIR, "multiple_segments"), _rawData, 3, _schema); _multiValueSegments = createInputSegments(new File(TEMP_DIR, "multi_value_segment"), _rawDataMultiValue, 1, _schemaMV); + _recordReaderWithComplexType = createRecordReaderWithComplexType(); } private List createInputSegments(File inputDir, List rawData, int numSegments, Schema schema) @@ -168,6 +178,22 @@ private List createInputSegments(File inputDir, List raw return segmentRecordReaders; } + private List createRecordReaderWithComplexType() { + GenericRow genericRow = new GenericRow(); + genericRow.putValue("a", 1L); + Map map1 = new HashMap<>(); + genericRow.putValue("campaign", map1); + map1.put("inner", "innerv"); + Map innerMap1 = new HashMap<>(); + innerMap1.put("inner2", "inner2v"); + + map1.put("inner1", innerMap1); + Map map2 = new HashMap<>(); + map2.put("c", 3); + genericRow.putValue("map2", map2); + return List.of(new GenericRowRecordReader(List.of(genericRow))); + } + private GenericRow getGenericRow(Object[] rawRow) { GenericRow row = new GenericRow(); row.putValue("campaign", rawRow[0]); @@ -222,6 +248,28 @@ public void testRecordReaderFileConfigInit() throws Exception { assertEquals(recordReaderFileConfig.isRecordReaderClosedFromRecordReaderFileConfig(), true); } + @Test + public void testSegmentGenerationWithComplexType() throws Exception { + File workingDir = new File(TEMP_DIR, "single_segment_complex_type_output"); + FileUtils.forceMkdir(workingDir); + IngestionConfig ingestionConfig = new IngestionConfig(); + ingestionConfig.setComplexTypeConfig( + new ComplexTypeConfig(null, ".", null, null)); + _tableConfig.setIngestionConfig(ingestionConfig); + // Default configs + SegmentProcessorConfig config = + new SegmentProcessorConfig.Builder().setTableConfig(_tableConfig).setSchema(_schema).build(); + SegmentProcessorFramework framework = + new SegmentProcessorFramework(_recordReaderWithComplexType, config, workingDir); + List outputSegments = framework.process(); + ImmutableSegment segment = ImmutableSegmentLoader.load(outputSegments.get(0), ReadMode.mmap); + SegmentMetadata segmentMetadata = segment.getSegmentMetadata(); + // Pick the column created from complex type + ColumnMetadata campaignMetadata = segmentMetadata.getColumnMetadataFor("campaign.inner1.inner2"); + // Verify we see a specific value parsed from the complexType + Assert.assertEquals(campaignMetadata.getMinValue().compareTo("inner2v"), 0); + } + @Test public void testSingleSegment() throws Exception { diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/CompositeTransformer.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/CompositeTransformer.java index a1bfcba52a20..c32ef1dafa47 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/CompositeTransformer.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/CompositeTransformer.java @@ -104,6 +104,10 @@ public static CompositeTransformer getDefaultTransformer(TableConfig tableConfig public static CompositeTransformer composeAllTransformers(List customTransformers, TableConfig tableConfig, Schema schema) { List allTransformers = new ArrayList<>(customTransformers); + ComplexTypeTransformer complexTypeTransformer = ComplexTypeTransformer.getComplexTypeTransformer(tableConfig); + if (complexTypeTransformer != null) { + allTransformers.add(complexTypeTransformer); + } allTransformers.addAll(getDefaultTransformers(tableConfig, schema)); return new CompositeTransformer(allTransformers); } From 8d6bcec6d235d4040464b7ba7312d05bf0a00788 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 18 Apr 2024 11:02:33 -0700 Subject: [PATCH 039/102] Bump aws.sdk.version from 2.25.32 to 2.25.33 (#12962) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 62166a1bca0a..865e9df5ade4 100644 --- a/pom.xml +++ b/pom.xml @@ -168,7 +168,7 @@ 0.15.0 0.4.4 4.2.2 - 2.25.32 + 2.25.33 2.12.7 3.1.12 7.10.1 From f34abb4e5f8bc094b8503997eead5f6973c22777 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 18 Apr 2024 11:02:51 -0700 Subject: [PATCH 040/102] Bump commons-cli:commons-cli from 1.6.0 to 1.7.0 (#12963) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 865e9df5ade4..3300aba43abc 100644 --- a/pom.xml +++ b/pom.xml @@ -185,7 +185,7 @@ 2.10.1 2.16.1 1.16.1 - 1.6.0 + 1.7.0 3.10.0 1.8.0 From a843ad4e3dd5a753e574ab9c90c699218255e7af Mon Sep 17 00:00:00 2001 From: Abhishek Sharma Date: Thu, 18 Apr 2024 14:38:47 -0400 Subject: [PATCH 041/102] Upgrade ORC version to 1.9.3 (#12956) --- pinot-plugins/pinot-input-format/pinot-orc/pom.xml | 12 ++++++++---- pom.xml | 8 +++++++- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/pinot-plugins/pinot-input-format/pinot-orc/pom.xml b/pinot-plugins/pinot-input-format/pinot-orc/pom.xml index 4984c17fd3e8..8f9480d960fd 100644 --- a/pinot-plugins/pinot-input-format/pinot-orc/pom.xml +++ b/pinot-plugins/pinot-input-format/pinot-orc/pom.xml @@ -38,10 +38,6 @@ package - - org.apache.orc - orc-core - org.apache.hadoop hadoop-common @@ -70,5 +66,13 @@ org.apache.hadoop.thirdparty hadoop-shaded-protobuf_3_21 + + org.apache.hive + hive-storage-api + + + org.apache.orc + orc-core + diff --git a/pom.xml b/pom.xml index 3300aba43abc..e1714b98ef90 100644 --- a/pom.xml +++ b/pom.xml @@ -134,7 +134,8 @@ 1.11.3 1.13.1 - 1.5.9 + 1.9.3 + 2.8.1 1.3.1 0.11 2.12.7.20221012 @@ -525,6 +526,11 @@ orc-mapreduce ${orc.version} + + org.apache.hive + hive-storage-api + ${hive.version} + org.xerial.snappy snappy-java From 022e0a08937d8f406bd41cfb6893d0a9b7423b08 Mon Sep 17 00:00:00 2001 From: Pratik Tibrewal Date: Fri, 19 Apr 2024 01:44:50 +0530 Subject: [PATCH 042/102] Add ability to track filtered messages offset (#12602) --- .../realtime/RealtimeSegmentDataManager.java | 17 +++++++++++++++++ .../table/ingestion/StreamIngestionConfig.java | 11 +++++++++++ 2 files changed, 28 insertions(+) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeSegmentDataManager.java b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeSegmentDataManager.java index d15e79116052..6771e038d14d 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeSegmentDataManager.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeSegmentDataManager.java @@ -307,6 +307,8 @@ public void deleteSegmentFile() { private final StreamPartitionMsgOffset _latestStreamOffsetAtStartupTime; private final CompletionMode _segmentCompletionMode; + private final List _filteredMessageOffsets = new ArrayList<>(); + private boolean _trackFilteredMessageOffsets = false; // TODO each time this method is called, we print reason for stop. Good to print only once. private boolean endCriteriaReached() { @@ -609,6 +611,9 @@ private boolean processStreamEvents(MessageBatch messageBatch, long idlePipeSlee if (reusedResult.getSkippedRowCount() > 0) { realtimeRowsDroppedMeter = _serverMetrics.addMeteredTableValue(_clientId, ServerMeter.REALTIME_ROWS_FILTERED, reusedResult.getSkippedRowCount(), realtimeRowsDroppedMeter); + if (_trackFilteredMessageOffsets) { + _filteredMessageOffsets.add(offset.toString()); + } } if (reusedResult.getIncompleteRowCount() > 0) { realtimeIncompleteRowsConsumedMeter = @@ -1421,6 +1426,12 @@ public RealtimeSegmentDataManager(SegmentZKMetadata segmentZKMetadata, TableConf .createRateLimiter(_streamConfig, _tableNameWithType, _serverMetrics, _clientId); _serverRateLimiter = RealtimeConsumptionRateManager.getInstance().getServerRateLimiter(); + if (tableConfig.getIngestionConfig() != null + && tableConfig.getIngestionConfig().getStreamIngestionConfig() != null) { + _trackFilteredMessageOffsets = + tableConfig.getIngestionConfig().getStreamIngestionConfig().isTrackFilteredMessageOffsets(); + } + List sortedColumns = indexLoadingConfig.getSortedColumns(); String sortedColumn; if (sortedColumns.isEmpty()) { @@ -1758,6 +1769,12 @@ private void updateCurrentDocumentCountMetrics() { _segmentLogger.info( "Consumed {} events from (rate:{}/s), currentOffset={}, numRowsConsumedSoFar={}, numRowsIndexedSoFar={}", rowsConsumed, consumedRate, _currentOffset, _numRowsConsumed, _numRowsIndexed); + if (_filteredMessageOffsets.size() > 0) { + if (_trackFilteredMessageOffsets) { + _segmentLogger.info("Filtered events with offsets: {}", _filteredMessageOffsets); + } + _filteredMessageOffsets.clear(); + } _lastConsumedCount = _numRowsConsumed; _lastLogTime = now; } diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/ingestion/StreamIngestionConfig.java b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/ingestion/StreamIngestionConfig.java index 2d832dd4b2fd..5b216ca9d2e2 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/ingestion/StreamIngestionConfig.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/config/table/ingestion/StreamIngestionConfig.java @@ -37,6 +37,9 @@ public class StreamIngestionConfig extends BaseJsonConfig { @JsonPropertyDescription("Whether to use column major mode when creating the segment.") private boolean _columnMajorSegmentBuilderEnabled = true; + @JsonPropertyDescription("Whether to track offsets of the filtered stream messages during consumption.") + private boolean _trackFilteredMessageOffsets = false; + @JsonCreator public StreamIngestionConfig(@JsonProperty("streamConfigMaps") List> streamConfigMaps) { _streamConfigMaps = streamConfigMaps; @@ -53,4 +56,12 @@ public void setColumnMajorSegmentBuilderEnabled(boolean enableColumnMajorSegment public boolean getColumnMajorSegmentBuilderEnabled() { return _columnMajorSegmentBuilderEnabled; } + + public void setTrackFilteredMessageOffsets(boolean trackFilteredMessageOffsets) { + _trackFilteredMessageOffsets = trackFilteredMessageOffsets; + } + + public boolean isTrackFilteredMessageOffsets() { + return _trackFilteredMessageOffsets; + } } From 6a5739f076107df16f303c1e0ea0e082a94f8004 Mon Sep 17 00:00:00 2001 From: "Xiaotian (Jackie) Jiang" <17555551+Jackie-Jiang@users.noreply.github.com> Date: Thu, 18 Apr 2024 13:22:21 -0700 Subject: [PATCH 043/102] Enhance PulsarConsumerTest (#12948) --- .../stream/pulsar/PulsarConsumerTest.java | 187 ++++++++---------- 1 file changed, 80 insertions(+), 107 deletions(-) diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/test/java/org/apache/pinot/plugin/stream/pulsar/PulsarConsumerTest.java b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/test/java/org/apache/pinot/plugin/stream/pulsar/PulsarConsumerTest.java index 01cd5cd26e0f..1baf212f170e 100644 --- a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/test/java/org/apache/pinot/plugin/stream/pulsar/PulsarConsumerTest.java +++ b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/test/java/org/apache/pinot/plugin/stream/pulsar/PulsarConsumerTest.java @@ -23,6 +23,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import org.apache.pinot.spi.stream.BytesStreamMessage; import org.apache.pinot.spi.stream.PartitionGroupConsumptionStatus; @@ -31,7 +32,7 @@ import org.apache.pinot.spi.stream.StreamConsumerFactoryProvider; import org.apache.pinot.spi.stream.StreamMessageMetadata; import org.apache.pulsar.client.admin.PulsarAdmin; -import org.apache.pulsar.client.admin.PulsarAdminException; +import org.apache.pulsar.client.admin.Topics; import org.apache.pulsar.client.api.Message; import org.apache.pulsar.client.api.MessageId; import org.apache.pulsar.client.api.MessageRouter; @@ -39,7 +40,7 @@ import org.apache.pulsar.client.api.PulsarClient; import org.apache.pulsar.client.api.Schema; import org.apache.pulsar.client.api.TopicMetadata; -import org.apache.pulsar.common.policies.data.InactiveTopicPolicies; +import org.apache.pulsar.client.impl.BatchMessageIdImpl; import org.testcontainers.containers.PulsarContainer; import org.testcontainers.utility.DockerImageName; import org.testng.annotations.AfterClass; @@ -49,6 +50,7 @@ import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; public class PulsarConsumerTest { @@ -62,103 +64,85 @@ public class PulsarConsumerTest { public static final int NUM_PARTITIONS = 2; public static final int NUM_RECORDS_PER_PARTITION = 1000; public static final int BATCH_SIZE = 10; - public static final int CONSUMER_FETCH_TIMEOUT_MILLIS = (int) TimeUnit.MINUTES.toMillis(1); + public static final int CONSUMER_FETCH_TIMEOUT_MILLIS = (int) TimeUnit.SECONDS.toMillis(1); private final List> _partitionToMessageIdMapping = new ArrayList<>(NUM_PARTITIONS); private final List> _partitionToMessageIdMappingBatch = new ArrayList<>(NUM_PARTITIONS); private PulsarContainer _pulsar; - private PulsarClient _pulsarClient; @BeforeClass public void setUp() throws Exception { _pulsar = new PulsarContainer(PULSAR_IMAGE).withStartupTimeout(Duration.ofMinutes(5)); - try { - _pulsar.start(); - _pulsarClient = PulsarClient.builder().serviceUrl(_pulsar.getPulsarBrokerUrl()).build(); - - try (PulsarAdmin admin = PulsarAdmin.builder().serviceHttpUrl(_pulsar.getHttpServiceUrl()).build()) { - createTopics(admin); - publishRecords(); - publishRecordsBatch(); - waitForMessagesToPublish(admin, TEST_TOPIC); - waitForMessagesToPublish(admin, TEST_TOPIC_BATCH); - } - } catch (Exception e) { - _pulsar.stop(); - throw new RuntimeException("Failed to setUp test environment", e); + _pulsar.start(); + try (PulsarAdmin admin = PulsarAdmin.builder().serviceHttpUrl(_pulsar.getHttpServiceUrl()).build()) { + Topics topics = admin.topics(); + topics.createPartitionedTopic(TEST_TOPIC, NUM_PARTITIONS); + topics.createPartitionedTopic(TEST_TOPIC_BATCH, NUM_PARTITIONS); } - } - - private void createTopics(PulsarAdmin admin) - throws PulsarAdminException { - InactiveTopicPolicies inactiveTopicPolicies = new InactiveTopicPolicies(); - inactiveTopicPolicies.setDeleteWhileInactive(false); - admin.namespaces().setInactiveTopicPolicies("public/default", inactiveTopicPolicies); - - admin.topics().createPartitionedTopic(TEST_TOPIC, NUM_PARTITIONS); - admin.topics().createPartitionedTopic(TEST_TOPIC_BATCH, NUM_PARTITIONS); - } - - private void waitForMessagesToPublish(PulsarAdmin admin, String topicName) - throws Exception { - long endTimeMs = System.currentTimeMillis() + TimeUnit.MINUTES.toMillis(5); - while (System.currentTimeMillis() < endTimeMs) { - if (admin.topics().getPartitionedStats(topicName, false).getMsgInCounter() - == NUM_RECORDS_PER_PARTITION * NUM_PARTITIONS) { - return; - } - Thread.sleep(1000); + try (PulsarClient client = PulsarClient.builder().serviceUrl(_pulsar.getPulsarBrokerUrl()).build()) { + publishRecords(client); + publishRecordsBatch(client); } - throw new RuntimeException("Failed to publish messages to topic: " + topicName); } @AfterClass public void tearDown() throws Exception { - _pulsarClient.close(); _pulsar.stop(); } - public void publishRecords() + public void publishRecords(PulsarClient client) throws Exception { for (int p = 0; p < NUM_PARTITIONS; p++) { List messageIds = new ArrayList<>(NUM_RECORDS_PER_PARTITION); _partitionToMessageIdMapping.add(messageIds); int partition = p; - try (Producer producer = _pulsarClient.newProducer(Schema.STRING).topic(TEST_TOPIC) + try (Producer producer = client.newProducer(Schema.STRING).topic(TEST_TOPIC) .messageRouter(new MessageRouter() { @Override public int choosePartition(Message msg, TopicMetadata metadata) { return partition; } - }).create()) { + }).enableBatching(false).create()) { + List> futures = new ArrayList<>(NUM_RECORDS_PER_PARTITION); for (int i = 0; i < NUM_RECORDS_PER_PARTITION; i++) { - messageIds.add(producer.send(MESSAGE_PREFIX + i)); + futures.add(producer.sendAsync(MESSAGE_PREFIX + i)); } producer.flush(); + for (int i = 0; i < NUM_RECORDS_PER_PARTITION; i++) { + MessageId messageId = futures.get(i).get(); + assertFalse(messageId instanceof BatchMessageIdImpl); + messageIds.add(messageId); + } } } } - public void publishRecordsBatch() + public void publishRecordsBatch(PulsarClient client) throws Exception { for (int p = 0; p < NUM_PARTITIONS; p++) { List messageIds = new ArrayList<>(NUM_RECORDS_PER_PARTITION); _partitionToMessageIdMappingBatch.add(messageIds); int partition = p; - try (Producer producer = _pulsarClient.newProducer(Schema.STRING).topic(TEST_TOPIC_BATCH) + try (Producer producer = client.newProducer(Schema.STRING).topic(TEST_TOPIC_BATCH) .messageRouter(new MessageRouter() { @Override public int choosePartition(Message msg, TopicMetadata metadata) { return partition; } }).batchingMaxMessages(BATCH_SIZE).batchingMaxPublishDelay(1, TimeUnit.SECONDS).create()) { + List> futures = new ArrayList<>(NUM_RECORDS_PER_PARTITION); for (int i = 0; i < NUM_RECORDS_PER_PARTITION; i++) { - messageIds.add(producer.send(MESSAGE_PREFIX + i)); + futures.add(producer.sendAsync(MESSAGE_PREFIX + i)); } producer.flush(); + for (int i = 0; i < NUM_RECORDS_PER_PARTITION; i++) { + MessageId messageId = futures.get(i).get(); + assertTrue(messageId instanceof BatchMessageIdImpl); + messageIds.add(messageId); + } } } } @@ -179,92 +163,81 @@ public StreamConfig getStreamConfig(String topicName) { public void testPartitionLevelConsumer() throws Exception { StreamConsumerFactory streamConsumerFactory = StreamConsumerFactoryProvider.create(getStreamConfig(TEST_TOPIC)); - int numPartitions; try (PulsarStreamMetadataProvider metadataProvider = new PulsarStreamMetadataProvider(CLIENT_ID, getStreamConfig(TEST_TOPIC))) { - numPartitions = metadataProvider.fetchPartitionCount(CONSUMER_FETCH_TIMEOUT_MILLIS); + assertEquals(metadataProvider.fetchPartitionCount(CONSUMER_FETCH_TIMEOUT_MILLIS), NUM_PARTITIONS); } - - for (int partition = 0; partition < numPartitions; partition++) { + for (int i = 0; i < NUM_PARTITIONS; i++) { + List messageIds = _partitionToMessageIdMapping.get(i); PartitionGroupConsumptionStatus partitionGroupConsumptionStatus = - new PartitionGroupConsumptionStatus(partition, 0, new MessageIdStreamOffset(MessageId.earliest), null, - "CONSUMING"); + new PartitionGroupConsumptionStatus(i, 0, new MessageIdStreamOffset(MessageId.earliest), null, "CONSUMING"); try ( PulsarPartitionLevelConsumer consumer = (PulsarPartitionLevelConsumer) streamConsumerFactory.createPartitionGroupConsumer( CLIENT_ID, partitionGroupConsumptionStatus)) { - PulsarMessageBatch messageBatch = - consumer.fetchMessages(new MessageIdStreamOffset(MessageId.earliest), CONSUMER_FETCH_TIMEOUT_MILLIS); - assertEquals(messageBatch.getMessageCount(), 1000); - assertFalse(messageBatch.isEndOfPartitionGroup()); - for (int i = 0; i < 1000; i++) { - verifyMessage(messageBatch.getStreamMessage(i), partition, i, false); - } - - messageBatch = - consumer.fetchMessages(new MessageIdStreamOffset(_partitionToMessageIdMapping.get(partition).get(500)), - CONSUMER_FETCH_TIMEOUT_MILLIS); - assertEquals(messageBatch.getMessageCount(), 500); - assertFalse(messageBatch.isEndOfPartitionGroup()); - for (int i = 0; i < 500; i++) { - verifyMessage(messageBatch.getStreamMessage(i), partition, 500 + i, false); - } + // Start from earliest + testConsumer(consumer, 0, messageIds); + // Start from middle + testConsumer(consumer, 500, messageIds); } } } - private void verifyMessage(BytesStreamMessage streamMessage, int partition, int index, boolean batch) { - assertEquals(new String(streamMessage.getValue()), MESSAGE_PREFIX + index); - StreamMessageMetadata messageMetadata = streamMessage.getMetadata(); - assertNotNull(messageMetadata); - MessageIdStreamOffset offset = (MessageIdStreamOffset) messageMetadata.getOffset(); - assertNotNull(offset); - MessageIdStreamOffset nextOffset = (MessageIdStreamOffset) messageMetadata.getNextOffset(); - assertNotNull(nextOffset); - List messageIds = - batch ? _partitionToMessageIdMappingBatch.get(partition) : _partitionToMessageIdMapping.get(partition); - assertEquals(offset.getMessageId(), messageIds.get(index)); - if (index < NUM_RECORDS_PER_PARTITION - 1) { - assertEquals(nextOffset.getMessageId(), messageIds.get(index + 1)); - } - } - @Test public void testPartitionLevelConsumerBatchMessages() throws Exception { StreamConsumerFactory streamConsumerFactory = StreamConsumerFactoryProvider.create(getStreamConfig(TEST_TOPIC_BATCH)); - int numPartitions; try (PulsarStreamMetadataProvider metadataProvider = new PulsarStreamMetadataProvider(CLIENT_ID, getStreamConfig(TEST_TOPIC_BATCH))) { - numPartitions = metadataProvider.fetchPartitionCount(CONSUMER_FETCH_TIMEOUT_MILLIS); + assertEquals(metadataProvider.fetchPartitionCount(CONSUMER_FETCH_TIMEOUT_MILLIS), NUM_PARTITIONS); } - - for (int partition = 0; partition < numPartitions; partition++) { + for (int i = 0; i < NUM_PARTITIONS; i++) { + List messageIds = _partitionToMessageIdMappingBatch.get(i); PartitionGroupConsumptionStatus partitionGroupConsumptionStatus = - new PartitionGroupConsumptionStatus(partition, 0, new MessageIdStreamOffset(MessageId.earliest), null, - "CONSUMING"); + new PartitionGroupConsumptionStatus(i, 0, new MessageIdStreamOffset(MessageId.earliest), null, "CONSUMING"); try ( PulsarPartitionLevelConsumer consumer = (PulsarPartitionLevelConsumer) streamConsumerFactory.createPartitionGroupConsumer( CLIENT_ID, partitionGroupConsumptionStatus)) { - PulsarMessageBatch messageBatch = - consumer.fetchMessages(new MessageIdStreamOffset(MessageId.earliest), CONSUMER_FETCH_TIMEOUT_MILLIS); - assertEquals(messageBatch.getMessageCount(), 1000); - assertFalse(messageBatch.isEndOfPartitionGroup()); - for (int i = 0; i < 1000; i++) { - verifyMessage(messageBatch.getStreamMessage(i), partition, i, true); - } + // Start from earliest + testConsumer(consumer, 0, messageIds); + // Start from middle + testConsumer(consumer, 500, messageIds); + } + } + } - messageBatch = - consumer.fetchMessages(new MessageIdStreamOffset(_partitionToMessageIdMappingBatch.get(partition).get(500)), - CONSUMER_FETCH_TIMEOUT_MILLIS); - assertEquals(messageBatch.getMessageCount(), 500); - assertFalse(messageBatch.isEndOfPartitionGroup()); - for (int i = 0; i < 500; i++) { - verifyMessage(messageBatch.getStreamMessage(i), partition, 500 + i, true); - } + private void testConsumer(PulsarPartitionLevelConsumer consumer, int startIndex, List messageIds) { + MessageId startMessageId = startIndex == 0 ? MessageId.earliest : messageIds.get(startIndex); + int numMessagesFetched = startIndex; + while (numMessagesFetched < NUM_RECORDS_PER_PARTITION) { + PulsarMessageBatch messageBatch = + consumer.fetchMessages(new MessageIdStreamOffset(startMessageId), CONSUMER_FETCH_TIMEOUT_MILLIS); + int messageCount = messageBatch.getMessageCount(); + assertFalse(messageBatch.isEndOfPartitionGroup()); + for (int i = 0; i < messageCount; i++) { + verifyMessage(messageBatch.getStreamMessage(i), numMessagesFetched + i, messageIds); + } + numMessagesFetched += messageCount; + if (numMessagesFetched < NUM_RECORDS_PER_PARTITION) { + startMessageId = messageIds.get(numMessagesFetched); } } + assertEquals(numMessagesFetched, NUM_RECORDS_PER_PARTITION); + } + + private void verifyMessage(BytesStreamMessage streamMessage, int index, List messageIds) { + assertEquals(new String(streamMessage.getValue()), MESSAGE_PREFIX + index); + StreamMessageMetadata messageMetadata = streamMessage.getMetadata(); + assertNotNull(messageMetadata); + MessageIdStreamOffset offset = (MessageIdStreamOffset) messageMetadata.getOffset(); + assertNotNull(offset); + MessageIdStreamOffset nextOffset = (MessageIdStreamOffset) messageMetadata.getNextOffset(); + assertNotNull(nextOffset); + assertEquals(offset.getMessageId(), messageIds.get(index)); + if (index < NUM_RECORDS_PER_PARTITION - 1) { + assertEquals(nextOffset.getMessageId(), messageIds.get(index + 1)); + } } } From 7a4c0b8b256733ca7c4d9613a2cf618ecb7cca33 Mon Sep 17 00:00:00 2001 From: Xiang Fu Date: Fri, 19 Apr 2024 06:28:25 +0800 Subject: [PATCH 044/102] upgrade maven-shade-plugin version to 3.5.2 (#12712) * upgrade maven-shade-plugin to 3.5.2 * Remove build-shade jar profile --- .../scripts/pr-tests/.pinot_tests_unit.sh | 2 + pinot-clients/pinot-java-client/pom.xml | 1 + pinot-clients/pinot-jdbc-client/pom.xml | 35 +-------- pinot-common/pom.xml | 38 +--------- pinot-core/pom.xml | 35 --------- pinot-distribution/pom.xml | 59 +-------------- pinot-perf/pom.xml | 1 - .../pinot-batch-ingestion-common/pom.xml | 1 - .../pinot-batch-ingestion-hadoop/pom.xml | 2 +- .../pinot-batch-ingestion-spark-2.4/pom.xml | 2 +- .../pinot-batch-ingestion-spark-3/pom.xml | 2 +- .../pinot-batch-ingestion-standalone/pom.xml | 2 +- .../pinot-environment/pinot-azure/pom.xml | 2 +- .../pinot-file-system/pinot-adls/pom.xml | 2 +- .../pinot-file-system/pinot-gcs/pom.xml | 2 +- .../pinot-file-system/pinot-hdfs/pom.xml | 2 +- .../pinot-file-system/pinot-s3/pom.xml | 53 +------------- .../pinot-avro-base/pom.xml | 2 +- .../pinot-input-format/pinot-avro/pom.xml | 2 +- .../pinot-input-format/pinot-clp-log/pom.xml | 2 +- .../pinot-confluent-avro/pom.xml | 2 +- .../pinot-input-format/pinot-csv/pom.xml | 2 +- .../pinot-input-format/pinot-json/pom.xml | 2 +- .../pinot-input-format/pinot-orc/pom.xml | 2 +- .../pinot-input-format/pinot-parquet/pom.xml | 2 +- .../pinot-input-format/pinot-protobuf/pom.xml | 2 +- .../pinot-input-format/pinot-thrift/pom.xml | 2 +- .../pinot-metrics/pinot-dropwizard/pom.xml | 2 +- .../pinot-metrics/pinot-yammer/pom.xml | 2 +- .../pinot-minion-builtin-tasks/pom.xml | 1 - .../pinot-segment-uploader-default/pom.xml | 1 - .../pinot-segment-writer-file-based/pom.xml | 1 - .../pinot-kafka-2.0/pom.xml | 2 +- .../pinot-kafka-base/pom.xml | 2 +- .../pinot-kinesis/pom.xml | 67 +---------------- .../pinot-pulsar/pom.xml | 2 +- pinot-plugins/pom.xml | 73 ------------------- pinot-spi/pom.xml | 35 --------- pom.xml | 51 ++++++++++++- 39 files changed, 81 insertions(+), 419 deletions(-) diff --git a/.github/workflows/scripts/pr-tests/.pinot_tests_unit.sh b/.github/workflows/scripts/pr-tests/.pinot_tests_unit.sh index fe440ff5eb1f..e50cdc93b633 100755 --- a/.github/workflows/scripts/pr-tests/.pinot_tests_unit.sh +++ b/.github/workflows/scripts/pr-tests/.pinot_tests_unit.sh @@ -33,6 +33,7 @@ if [ "$RUN_TEST_SET" == "1" ]; then -pl 'pinot-spi' \ -pl 'pinot-segment-spi' \ -pl 'pinot-common' \ + -pl ':pinot-yammer' \ -pl 'pinot-core' \ -pl 'pinot-query-planner' \ -pl 'pinot-query-runtime' \ @@ -46,5 +47,6 @@ if [ "$RUN_TEST_SET" == "2" ]; then -pl '!pinot-core' \ -pl '!pinot-query-planner' \ -pl '!pinot-query-runtime' \ + -pl '!:pinot-yammer' \ -P github-actions,no-integration-tests || exit 1 fi diff --git a/pinot-clients/pinot-java-client/pom.xml b/pinot-clients/pinot-java-client/pom.xml index 58d8add75a06..6004a9ee399b 100644 --- a/pinot-clients/pinot-java-client/pom.xml +++ b/pinot-clients/pinot-java-client/pom.xml @@ -33,6 +33,7 @@ https://pinot.apache.org/ ${basedir}/../.. + package diff --git a/pinot-clients/pinot-jdbc-client/pom.xml b/pinot-clients/pinot-jdbc-client/pom.xml index c0fd34c9f81a..08c3880a483f 100644 --- a/pinot-clients/pinot-jdbc-client/pom.xml +++ b/pinot-clients/pinot-jdbc-client/pom.xml @@ -33,6 +33,7 @@ https://pinot.apache.org/ ${basedir}/../.. + package @@ -81,38 +82,4 @@ jsr305 - - - - build-shaded-jar - - - skipShade - !true - - - - - - maven-shade-plugin - 3.2.1 - - - package - - shade - - - - - - - - - - - - - diff --git a/pinot-common/pom.xml b/pinot-common/pom.xml index a1002ca458a5..16e5b6d41bb9 100644 --- a/pinot-common/pom.xml +++ b/pinot-common/pom.xml @@ -33,6 +33,7 @@ https://pinot.apache.org/ ${basedir}/.. + package @@ -355,43 +356,6 @@ - - build-shaded-jar - - true - - - - - maven-shade-plugin - - - package - - shade - - - - - com.google.common.base - ${shade.prefix}.com.google.common.base - - - com.google.common.cache - ${shade.prefix}.com.google.common.cache - - - org.apache.http - ${shade.prefix}.org.apache.http - - - - - - - - - profile-buildthrift diff --git a/pinot-core/pom.xml b/pinot-core/pom.xml index bd6217c24101..6aa29f7d8e2d 100644 --- a/pinot-core/pom.xml +++ b/pinot-core/pom.xml @@ -179,39 +179,4 @@ - - - build-shaded-jar - - false - - - - - maven-shade-plugin - - - package - - shade - - - - - com.google.common.base - ${shade.prefix}.com.google.common.base - - - org.apache.http - ${shade.prefix}.org.apache.http - - - - - - - - - - diff --git a/pinot-distribution/pom.xml b/pinot-distribution/pom.xml index 5a024f142d56..99ce14fb4254 100644 --- a/pinot-distribution/pom.xml +++ b/pinot-distribution/pom.xml @@ -34,6 +34,7 @@ ${basedir}/.. yyyy-MM-dd'T'HHmmss'Z' + package @@ -144,64 +145,6 @@ - - maven-shade-plugin - - - - shade - - - - - - - - true - - - - - - - com.google.common - ${shade.prefix}.com.google.common - - - com.fasterxml.jackson - ${shade.prefix}.com.fasterxml.jackson - - - org.apache.http - ${shade.prefix}.org.apache.http - - - software.amazon - ${shade.prefix}.software.amazon - - - org.reflections - ${shade.prefix}.org.reflections - - - io.netty - ${shade.prefix}.io.netty - - - org.apache.parquet - ${shade.prefix}.org.apache.parquet - - - - - - diff --git a/pinot-perf/pom.xml b/pinot-perf/pom.xml index 6a4f64c5f6d1..a5d06f5d4bbc 100644 --- a/pinot-perf/pom.xml +++ b/pinot-perf/pom.xml @@ -214,7 +214,6 @@ org.apache.maven.plugins maven-shade-plugin - 3.1.0 diff --git a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-common/pom.xml b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-common/pom.xml index e569bfdd97d7..6ac4cd73cbfd 100644 --- a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-common/pom.xml +++ b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-common/pom.xml @@ -35,6 +35,5 @@ https://pinot.apache.org/ ${basedir}/../../.. - none diff --git a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-hadoop/pom.xml b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-hadoop/pom.xml index 099292d9dada..d2acdedb4a2b 100644 --- a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-hadoop/pom.xml +++ b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-hadoop/pom.xml @@ -35,7 +35,7 @@ https://pinot.apache.org/ ${basedir}/../../.. - package + package diff --git a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-2.4/pom.xml b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-2.4/pom.xml index 45568ae319a7..748d6de20c34 100644 --- a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-2.4/pom.xml +++ b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-2.4/pom.xml @@ -35,7 +35,7 @@ https://pinot.apache.org/ ${basedir}/../../.. - package + package 2.11 2.4.6 2.11.11 diff --git a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-3/pom.xml b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-3/pom.xml index 1dfba13c999b..1e8e5efd5727 100644 --- a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-3/pom.xml +++ b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-3/pom.xml @@ -35,7 +35,7 @@ https://pinot.apache.org/ ${basedir}/../../.. - package + package 3.5.1 diff --git a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/pom.xml b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/pom.xml index 34b82ce0e7d4..468d0ff3b049 100644 --- a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/pom.xml +++ b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/pom.xml @@ -35,7 +35,7 @@ https://pinot.apache.org/ ${basedir}/../../.. - package + package diff --git a/pinot-plugins/pinot-environment/pinot-azure/pom.xml b/pinot-plugins/pinot-environment/pinot-azure/pom.xml index 3fa9df89acad..50d109131c11 100644 --- a/pinot-plugins/pinot-environment/pinot-azure/pom.xml +++ b/pinot-plugins/pinot-environment/pinot-azure/pom.xml @@ -34,7 +34,7 @@ https://pinot.apache.org/ ${basedir}/../../.. - package + package diff --git a/pinot-plugins/pinot-file-system/pinot-adls/pom.xml b/pinot-plugins/pinot-file-system/pinot-adls/pom.xml index 30d7cbc051be..bd5219be623e 100644 --- a/pinot-plugins/pinot-file-system/pinot-adls/pom.xml +++ b/pinot-plugins/pinot-file-system/pinot-adls/pom.xml @@ -33,7 +33,7 @@ https://pinot.apache.org/ ${basedir}/../../.. - package + package diff --git a/pinot-plugins/pinot-file-system/pinot-gcs/pom.xml b/pinot-plugins/pinot-file-system/pinot-gcs/pom.xml index 30525f47ee18..dbf8f5cc8a90 100644 --- a/pinot-plugins/pinot-file-system/pinot-gcs/pom.xml +++ b/pinot-plugins/pinot-file-system/pinot-gcs/pom.xml @@ -35,7 +35,7 @@ https://pinot.apache.org ${basedir}/../../.. - package + package diff --git a/pinot-plugins/pinot-file-system/pinot-hdfs/pom.xml b/pinot-plugins/pinot-file-system/pinot-hdfs/pom.xml index 9691e9a3c93d..904a178ff4bd 100644 --- a/pinot-plugins/pinot-file-system/pinot-hdfs/pom.xml +++ b/pinot-plugins/pinot-file-system/pinot-hdfs/pom.xml @@ -33,7 +33,7 @@ https://pinot.apache.org/ ${basedir}/../../.. - package + package diff --git a/pinot-plugins/pinot-file-system/pinot-s3/pom.xml b/pinot-plugins/pinot-file-system/pinot-s3/pom.xml index 4eec102625a2..bd650eadf229 100644 --- a/pinot-plugins/pinot-file-system/pinot-s3/pom.xml +++ b/pinot-plugins/pinot-file-system/pinot-s3/pom.xml @@ -36,7 +36,7 @@ ${basedir}/../../.. 2.12.2 - package + package @@ -65,55 +65,4 @@ test - - - - build-shaded-jar - - - skipShade - !true - - - - - - maven-shade-plugin - 3.2.1 - - - ${phase.prop} - - shade - - - - - - - - - com.google.common - ${shade.prefix}.com.google.common - - - com.fasterxml.jackson - ${shade.prefix}.com.fasterxml.jackson - - - - - - - - - - diff --git a/pinot-plugins/pinot-input-format/pinot-avro-base/pom.xml b/pinot-plugins/pinot-input-format/pinot-avro-base/pom.xml index 71373664a043..a55b08d10033 100644 --- a/pinot-plugins/pinot-input-format/pinot-avro-base/pom.xml +++ b/pinot-plugins/pinot-input-format/pinot-avro-base/pom.xml @@ -34,7 +34,7 @@ https://pinot.apache.org/ ${basedir}/../../.. - package + package diff --git a/pinot-plugins/pinot-input-format/pinot-avro/pom.xml b/pinot-plugins/pinot-input-format/pinot-avro/pom.xml index 63540fc056f2..ba1640a48b56 100644 --- a/pinot-plugins/pinot-input-format/pinot-avro/pom.xml +++ b/pinot-plugins/pinot-input-format/pinot-avro/pom.xml @@ -34,7 +34,7 @@ https://pinot.apache.org/ ${basedir}/../../.. - package + package diff --git a/pinot-plugins/pinot-input-format/pinot-clp-log/pom.xml b/pinot-plugins/pinot-input-format/pinot-clp-log/pom.xml index 49bff523fd73..46ea0fc6bfc0 100644 --- a/pinot-plugins/pinot-input-format/pinot-clp-log/pom.xml +++ b/pinot-plugins/pinot-input-format/pinot-clp-log/pom.xml @@ -34,7 +34,7 @@ https://pinot.apache.org/ ${basedir}/../../.. - package + package diff --git a/pinot-plugins/pinot-input-format/pinot-confluent-avro/pom.xml b/pinot-plugins/pinot-input-format/pinot-confluent-avro/pom.xml index cd21edb99b39..b489d28c8937 100644 --- a/pinot-plugins/pinot-input-format/pinot-confluent-avro/pom.xml +++ b/pinot-plugins/pinot-input-format/pinot-confluent-avro/pom.xml @@ -35,7 +35,7 @@ ${basedir}/../../.. 2.8.1 - package + package diff --git a/pinot-plugins/pinot-input-format/pinot-csv/pom.xml b/pinot-plugins/pinot-input-format/pinot-csv/pom.xml index d323171600be..8b9dc01252a2 100644 --- a/pinot-plugins/pinot-input-format/pinot-csv/pom.xml +++ b/pinot-plugins/pinot-input-format/pinot-csv/pom.xml @@ -34,7 +34,7 @@ https://pinot.apache.org/ ${basedir}/../../.. - package + package diff --git a/pinot-plugins/pinot-input-format/pinot-json/pom.xml b/pinot-plugins/pinot-input-format/pinot-json/pom.xml index 608cb65787d5..e1d57197b2bf 100644 --- a/pinot-plugins/pinot-input-format/pinot-json/pom.xml +++ b/pinot-plugins/pinot-input-format/pinot-json/pom.xml @@ -34,6 +34,6 @@ https://pinot.apache.org/ ${basedir}/../../.. - package + package diff --git a/pinot-plugins/pinot-input-format/pinot-orc/pom.xml b/pinot-plugins/pinot-input-format/pinot-orc/pom.xml index 8f9480d960fd..1a92bf7cd4d5 100644 --- a/pinot-plugins/pinot-input-format/pinot-orc/pom.xml +++ b/pinot-plugins/pinot-input-format/pinot-orc/pom.xml @@ -35,7 +35,7 @@ https://pinot.apache.org/ ${basedir}/../../.. - package + package diff --git a/pinot-plugins/pinot-input-format/pinot-parquet/pom.xml b/pinot-plugins/pinot-input-format/pinot-parquet/pom.xml index 62d70ba5ec1c..2a94606ebcaa 100644 --- a/pinot-plugins/pinot-input-format/pinot-parquet/pom.xml +++ b/pinot-plugins/pinot-input-format/pinot-parquet/pom.xml @@ -34,7 +34,7 @@ https://pinot.apache.org/ ${basedir}/../../.. - package + package diff --git a/pinot-plugins/pinot-input-format/pinot-protobuf/pom.xml b/pinot-plugins/pinot-input-format/pinot-protobuf/pom.xml index da6832e869e2..1e504deafd15 100644 --- a/pinot-plugins/pinot-input-format/pinot-protobuf/pom.xml +++ b/pinot-plugins/pinot-input-format/pinot-protobuf/pom.xml @@ -37,7 +37,7 @@ ${basedir}/../../.. 2.8.1 1.19.7 - package + package diff --git a/pinot-plugins/pinot-input-format/pinot-thrift/pom.xml b/pinot-plugins/pinot-input-format/pinot-thrift/pom.xml index ab2b565f1bc0..1c746ef3e9b1 100644 --- a/pinot-plugins/pinot-input-format/pinot-thrift/pom.xml +++ b/pinot-plugins/pinot-input-format/pinot-thrift/pom.xml @@ -34,7 +34,7 @@ https://pinot.apache.org/ ${basedir}/../../.. - package + package diff --git a/pinot-plugins/pinot-metrics/pinot-dropwizard/pom.xml b/pinot-plugins/pinot-metrics/pinot-dropwizard/pom.xml index 1fd60542c7ef..8e67f5bde8ee 100644 --- a/pinot-plugins/pinot-metrics/pinot-dropwizard/pom.xml +++ b/pinot-plugins/pinot-metrics/pinot-dropwizard/pom.xml @@ -34,7 +34,7 @@ https://pinot.apache.org/ ${basedir}/../../.. - package + package diff --git a/pinot-plugins/pinot-metrics/pinot-yammer/pom.xml b/pinot-plugins/pinot-metrics/pinot-yammer/pom.xml index 46bebe926c5b..97e2c49ea95a 100644 --- a/pinot-plugins/pinot-metrics/pinot-yammer/pom.xml +++ b/pinot-plugins/pinot-metrics/pinot-yammer/pom.xml @@ -34,7 +34,7 @@ https://pinot.apache.org/ ${basedir}/../../.. - package + package diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/pom.xml b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/pom.xml index 287c832146c7..a8388d1df16c 100644 --- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/pom.xml +++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/pom.xml @@ -34,7 +34,6 @@ https://pinot.apache.org/ ${basedir}/../../.. - none diff --git a/pinot-plugins/pinot-segment-uploader/pinot-segment-uploader-default/pom.xml b/pinot-plugins/pinot-segment-uploader/pinot-segment-uploader-default/pom.xml index f104c5417804..4104c365ddfc 100644 --- a/pinot-plugins/pinot-segment-uploader/pinot-segment-uploader-default/pom.xml +++ b/pinot-plugins/pinot-segment-uploader/pinot-segment-uploader-default/pom.xml @@ -35,7 +35,6 @@ https://pinot.apache.org/ ${basedir}/../../.. - none diff --git a/pinot-plugins/pinot-segment-writer/pinot-segment-writer-file-based/pom.xml b/pinot-plugins/pinot-segment-writer/pinot-segment-writer-file-based/pom.xml index 697d952d66a7..c064b4f35ffa 100644 --- a/pinot-plugins/pinot-segment-writer/pinot-segment-writer-file-based/pom.xml +++ b/pinot-plugins/pinot-segment-writer/pinot-segment-writer-file-based/pom.xml @@ -35,7 +35,6 @@ https://pinot.apache.org/ ${basedir}/../../.. - none diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/pom.xml b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/pom.xml index 533e18f90e24..168001f16ff3 100644 --- a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/pom.xml +++ b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-2.0/pom.xml @@ -36,7 +36,7 @@ ${basedir}/../../.. 2.8.1 - package + package diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-base/pom.xml b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-base/pom.xml index c3d663f06400..1c34237eb658 100644 --- a/pinot-plugins/pinot-stream-ingestion/pinot-kafka-base/pom.xml +++ b/pinot-plugins/pinot-stream-ingestion/pinot-kafka-base/pom.xml @@ -35,7 +35,7 @@ https://pinot.apache.org/ ${basedir}/../../.. - package + package diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml index 919bd1d19e29..916a617504af 100644 --- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml +++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml @@ -35,7 +35,7 @@ https://pinot.apache.org/ ${basedir}/../../.. - package + package 1.0.2 0.2.23 @@ -133,69 +133,4 @@ - - - build-shaded-jar - - - skipShade - !true - - - - - - maven-shade-plugin - 3.2.1 - - - ${phase.prop} - - shade - - - - - - - - - - - com.google.common - ${shade.prefix}.com.google.common - - - com.fasterxml.jackson - ${shade.prefix}.com.fasterxml.jackson - - - software.amazon - ${shade.prefix}.software.amazon - - - - - - - - - - diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml index 2037493483d2..f742f52f45a8 100644 --- a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml +++ b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml @@ -35,7 +35,7 @@ https://pinot.apache.org/ - package + package ${basedir}/../../.. 0.16.0 1.63.0 diff --git a/pinot-plugins/pom.xml b/pinot-plugins/pom.xml index 57e66ca0fd80..f3e9d02d9b99 100644 --- a/pinot-plugins/pom.xml +++ b/pinot-plugins/pom.xml @@ -36,7 +36,6 @@ ${basedir}/.. - none compile @@ -83,77 +82,5 @@ provided - - build-shaded-jar - - - skipShade - !true - - - - - - maven-shade-plugin - 3.2.1 - - - ${phase.prop} - - shade - - - - - - - - - com.google.common - ${shade.prefix}.com.google.common - - - com.fasterxml.jackson - ${shade.prefix}.com.fasterxml.jackson - - - org.apache.http - ${shade.prefix}.org.apache.http - - - software.amazon - ${shade.prefix}.software.amazon - - - org.reflections - ${shade.prefix}.org.reflections - - - io.netty - ${shade.prefix}.io.netty - - - org.apache.parquet - ${shade.prefix}.org.apache.parquet - - - org.apache.kafka - ${shade.prefix}.org.apache.kafka - - - - - - - - - diff --git a/pinot-spi/pom.xml b/pinot-spi/pom.xml index d1c76c9e8883..43ae753a6d8d 100644 --- a/pinot-spi/pom.xml +++ b/pinot-spi/pom.xml @@ -179,39 +179,4 @@ reflections - - - build-shaded-jar - - false - - - - - maven-shade-plugin - - - package - - shade - - - - - com.google.common.base - ${shade.prefix}.com.google.common.base - - - com.fasterxml.jackson - ${shade.prefix}.com.fasterxml.jackson - - - - - - - - - - diff --git a/pom.xml b/pom.xml index e1714b98ef90..67c437975f57 100644 --- a/pom.xml +++ b/pom.xml @@ -131,6 +131,8 @@ org.apache.pinot.shaded 3.4.0 + 3.5.2 + none 1.11.3 1.13.1 @@ -2215,6 +2217,9 @@ **/target/** + + **/dependency-reduced-pom.xml + **/*.txt **/*.log @@ -2305,10 +2310,20 @@ maven-shade-plugin - 3.2.1 + ${maven-shade-plugin.version} + + + ${shade.phase.prop} + + shade + + + true + false + ${mainClass} @@ -2329,6 +2344,40 @@ + + + com.fasterxml.jackson + ${shade.prefix}.com.fasterxml.jackson + + + com.google.common + ${shade.prefix}.com.google.common + + + io.netty + ${shade.prefix}.io.netty + + + org.apache.http + ${shade.prefix}.org.apache.http + + + org.apache.kafka + ${shade.prefix}.org.apache.kafka + + + org.apache.parquet + ${shade.prefix}.org.apache.parquet + + + org.reflections + ${shade.prefix}.org.reflections + + + software.amazon + ${shade.prefix}.software.amazon + + From dbbf44ce153bbe72fb3161f1977d0af1b403e06e Mon Sep 17 00:00:00 2001 From: deemoliu Date: Thu, 18 Apr 2024 15:54:59 -0700 Subject: [PATCH 045/102] Add splitPartWithLimit and splitPartFromEnd UDFs (#12437) --- .../function/scalar/StringFunctions.java | 26 ++++++++-- .../function/scalar/StringFunctionsTest.java | 48 +++++++++++++++++++ 2 files changed, 71 insertions(+), 3 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java index 8ce77e8ccb6d..374917ec9939 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java @@ -49,7 +49,6 @@ private StringFunctions() { private final static Pattern LTRIM = Pattern.compile("^\\s+"); private final static Pattern RTRIM = Pattern.compile("\\s+$"); - /** * @see StringUtils#reverse(String) * @param input @@ -585,14 +584,35 @@ public static String[] split(String input, String delimiter, int limit) { * TODO: Revisit if index should be one-based (both Presto and Postgres use one-based index, which starts with 1) * @param input * @param delimiter - * @param index + * @param index we allow negative value for index which indicates the index from the end. * @return splits string on specified delimiter and returns String at specified index from the split. */ @ScalarFunction(names = {"splitPart", "split_part"}) public static String splitPart(String input, String delimiter, int index) { String[] splitString = StringUtils.splitByWholeSeparator(input, delimiter); - if (index < splitString.length) { + if (index >= 0 && index < splitString.length) { + return splitString[index]; + } else if (index < 0 && index >= -splitString.length) { + return splitString[splitString.length + index]; + } else { + return "null"; + } + } + + /** + * @param input the input String to be split into parts. + * @param delimiter the specified delimiter to split the input string. + * @param limit the max count of parts that the input string can be splitted into. + * @param index the specified index for the splitted parts to be returned. + * @return splits string on the delimiter with the limit count and returns String at specified index from the split. + */ + @ScalarFunction + public static String splitPart(String input, String delimiter, int limit, int index) { + String[] splitString = StringUtils.splitByWholeSeparator(input, delimiter, limit); + if (index >= 0 && index < splitString.length) { return splitString[index]; + } else if (index < 0 && index >= -splitString.length) { + return splitString[splitString.length + index]; } else { return "null"; } diff --git a/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java b/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java index 9129ccdc3769..d75b8ada435d 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java @@ -26,6 +26,47 @@ public class StringFunctionsTest { + @DataProvider(name = "splitPartTestCases") + public static Object[][] splitPartTestCases() { + return new Object[][]{ + {"org.apache.pinot.common.function", ".", 0, 100, "org", "org"}, + {"org.apache.pinot.common.function", ".", 10, 100, "null", "null"}, + {"org.apache.pinot.common.function", ".", 1, 0, "apache", "apache"}, + {"org.apache.pinot.common.function", ".", 1, 1, "apache", "null"}, + {"org.apache.pinot.common.function", ".", 0, 1, "org", "org.apache.pinot.common.function"}, + {"org.apache.pinot.common.function", ".", 1, 2, "apache", "apache.pinot.common.function"}, + {"org.apache.pinot.common.function", ".", 2, 3, "pinot", "pinot.common.function"}, + {"org.apache.pinot.common.function", ".", 3, 4, "common", "common.function"}, + {"org.apache.pinot.common.function", ".", 4, 5, "function", "function"}, + {"org.apache.pinot.common.function", ".", 5, 6, "null", "null"}, + {"org.apache.pinot.common.function", ".", 3, 3, "common", "null"}, + {"+++++", "+", 0, 100, "", ""}, + {"+++++", "+", 1, 100, "null", "null"}, + // note that splitPart will split with limit first, then lookup by index from START or END. + {"org.apache.pinot.common.function", ".", -1, 100, "function", "function"}, + {"org.apache.pinot.common.function", ".", -10, 100, "null", "null"}, + {"org.apache.pinot.common.function", ".", -2, 0, "common", "common"}, // Case: limit=0 is not taking effect. + {"org.apache.pinot.common.function", ".", -1, 1, "function", "org.apache.pinot.common.function"}, + {"org.apache.pinot.common.function", ".", -2, 1, "common", "null"}, + {"org.apache.pinot.common.function", ".", -1, 2, "function", "apache.pinot.common.function"}, + {"org.apache.pinot.common.function", ".", -2, 2, "common", "org"}, + {"org.apache.pinot.common.function", ".", -1, 3, "function", "pinot.common.function"}, + {"org.apache.pinot.common.function", ".", -3, 3, "pinot", "org"}, + {"org.apache.pinot.common.function", ".", -4, 3, "apache", "null"}, + {"org.apache.pinot.common.function", ".", -1, 4, "function", "common.function"}, + {"org.apache.pinot.common.function", ".", -3, 4, "pinot", "apache"}, + {"org.apache.pinot.common.function", ".", -4, 4, "apache", "org"}, + {"org.apache.pinot.common.function", ".", -1, 5, "function", "function"}, + {"org.apache.pinot.common.function", ".", -5, 5, "org", "org"}, + {"org.apache.pinot.common.function", ".", -6, 5, "null", "null"}, + {"org.apache.pinot.common.function", ".", -1, 6, "function", "function"}, + {"org.apache.pinot.common.function", ".", -5, 6, "org", "org"}, + {"org.apache.pinot.common.function", ".", -6, 6, "null", "null"}, + {"+++++", "+", -1, 100, "", ""}, + {"+++++", "+", -2, 100, "null", "null"}, + }; + } + @DataProvider(name = "isJson") public static Object[][] isJsonTestCases() { return new Object[][]{ @@ -40,4 +81,11 @@ public static Object[][] isJsonTestCases() { public void testIsJson(String input, boolean expectedValue) { assertEquals(StringFunctions.isJson(input), expectedValue); } + + @Test(dataProvider = "splitPartTestCases") + public void testSplitPart(String input, String delimiter, int index, int limit, String expectedToken, + String expectedTokenWithLimitCounts) { + assertEquals(StringFunctions.splitPart(input, delimiter, index), expectedToken); + assertEquals(StringFunctions.splitPart(input, delimiter, limit, index), expectedTokenWithLimitCounts); + } } From ea60408debb2dbc3b82c93dee6df8d30db5bf65d Mon Sep 17 00:00:00 2001 From: Abhishek Sharma Date: Fri, 19 Apr 2024 00:28:11 -0400 Subject: [PATCH 046/102] hash4j version upgrade to 0.17.0 (#12968) --- pom.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 67c437975f57..24006bcea490 100644 --- a/pom.xml +++ b/pom.xml @@ -155,6 +155,7 @@ 1.36.0 9.8.0 0.10.2 + 0.17.0 4.2.25 1.1.10.5 @@ -1242,7 +1243,7 @@ com.dynatrace.hash4j hash4j - 0.13.0 + ${dynatrace.hash4j.version} com.tdunning From 5b90c6564571721f480a0f4ed63937ce3a79f255 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 18 Apr 2024 21:31:02 -0700 Subject: [PATCH 047/102] Bump moment in /pinot-controller/src/main/resources (#9030) --- .../src/main/resources/package-lock.json | 14 +++++++------- pinot-controller/src/main/resources/package.json | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pinot-controller/src/main/resources/package-lock.json b/pinot-controller/src/main/resources/package-lock.json index d41e32c9eaa7..be7e77f7bab9 100644 --- a/pinot-controller/src/main/resources/package-lock.json +++ b/pinot-controller/src/main/resources/package-lock.json @@ -30,7 +30,7 @@ "jsonlint": "1.6.3", "jwt-decode": "^3.1.2", "lodash": "4.17.21", - "moment": "2.29.3", + "moment": "2.29.4", "prop-types": "15.8.1", "re-resizable": "6.9.9", "react": "16.13.1", @@ -7786,9 +7786,9 @@ } }, "node_modules/moment": { - "version": "2.29.3", - "resolved": "https://registry.npmjs.org/moment/-/moment-2.29.3.tgz", - "integrity": "sha512-c6YRvhEo//6T2Jz/vVtYzqBzwvPT95JBQ+smCytzf7c50oMZRsR/a4w88aD34I+/QVSfnoAnSBFPJHItlOMJVw==", + "version": "2.29.4", + "resolved": "https://registry.npmjs.org/moment/-/moment-2.29.4.tgz", + "integrity": "sha512-5LC9SOxjSc2HF6vO2CyuTDNivEdoz2IvyJJGj6X8DJ0eFyfszE0QiEd+iXmBvUP3WHxSjFH/vIsA0EN00cgr8w==", "engines": { "node": "*" } @@ -20324,9 +20324,9 @@ } }, "moment": { - "version": "2.29.3", - "resolved": "https://registry.npmjs.org/moment/-/moment-2.29.3.tgz", - "integrity": "sha512-c6YRvhEo//6T2Jz/vVtYzqBzwvPT95JBQ+smCytzf7c50oMZRsR/a4w88aD34I+/QVSfnoAnSBFPJHItlOMJVw==" + "version": "2.29.4", + "resolved": "https://registry.npmjs.org/moment/-/moment-2.29.4.tgz", + "integrity": "sha512-5LC9SOxjSc2HF6vO2CyuTDNivEdoz2IvyJJGj6X8DJ0eFyfszE0QiEd+iXmBvUP3WHxSjFH/vIsA0EN00cgr8w==" }, "move-concurrently": { "version": "1.0.1", diff --git a/pinot-controller/src/main/resources/package.json b/pinot-controller/src/main/resources/package.json index 08638569491e..865af39bd42f 100644 --- a/pinot-controller/src/main/resources/package.json +++ b/pinot-controller/src/main/resources/package.json @@ -81,7 +81,7 @@ "jsonlint": "1.6.3", "jwt-decode": "^3.1.2", "lodash": "4.17.21", - "moment": "2.29.3", + "moment": "2.29.4", "prop-types": "15.8.1", "re-resizable": "6.9.9", "react": "16.13.1", From 5e8428c497e1003f84c8fe2acb899b3471acfd70 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 18 Apr 2024 21:31:30 -0700 Subject: [PATCH 048/102] Bump terser from 4.8.0 to 4.8.1 in /pinot-controller/src/main/resources (#9085) --- .../src/main/resources/package-lock.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pinot-controller/src/main/resources/package-lock.json b/pinot-controller/src/main/resources/package-lock.json index be7e77f7bab9..551f2274b2a6 100644 --- a/pinot-controller/src/main/resources/package-lock.json +++ b/pinot-controller/src/main/resources/package-lock.json @@ -11793,9 +11793,9 @@ } }, "node_modules/terser": { - "version": "4.8.0", - "resolved": "https://registry.npmjs.org/terser/-/terser-4.8.0.tgz", - "integrity": "sha512-EAPipTNeWsb/3wLPeup1tVPaXfIaU68xMnVdPafIL1TV05OhASArYyIfFvnvJCNrR2NIOvDVNNTFRa+Re2MWyw==", + "version": "4.8.1", + "resolved": "https://registry.npmjs.org/terser/-/terser-4.8.1.tgz", + "integrity": "sha512-4GnLC0x667eJG0ewJTa6z/yXrbLGv80D9Ru6HIpCQmO+Q4PfEtBFi0ObSckqwL6VyQv/7ENJieXHo2ANmdQwgw==", "dev": true, "dependencies": { "commander": "^2.20.0", @@ -23531,9 +23531,9 @@ "dev": true }, "terser": { - "version": "4.8.0", - "resolved": "https://registry.npmjs.org/terser/-/terser-4.8.0.tgz", - "integrity": "sha512-EAPipTNeWsb/3wLPeup1tVPaXfIaU68xMnVdPafIL1TV05OhASArYyIfFvnvJCNrR2NIOvDVNNTFRa+Re2MWyw==", + "version": "4.8.1", + "resolved": "https://registry.npmjs.org/terser/-/terser-4.8.1.tgz", + "integrity": "sha512-4GnLC0x667eJG0ewJTa6z/yXrbLGv80D9Ru6HIpCQmO+Q4PfEtBFi0ObSckqwL6VyQv/7ENJieXHo2ANmdQwgw==", "dev": true, "requires": { "commander": "^2.20.0", From 31d2ee8de19458020d24951b7046fdc1a225a5af Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 18 Apr 2024 21:31:59 -0700 Subject: [PATCH 049/102] Bump json5 from 1.0.1 to 1.0.2 in /pinot-controller/src/main/resources (#10067) --- .../src/main/resources/package-lock.json | 60 +++++++++---------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/pinot-controller/src/main/resources/package-lock.json b/pinot-controller/src/main/resources/package-lock.json index 551f2274b2a6..f56f0aaeff8a 100644 --- a/pinot-controller/src/main/resources/package-lock.json +++ b/pinot-controller/src/main/resources/package-lock.json @@ -3902,9 +3902,9 @@ } }, "node_modules/eslint-loader/node_modules/json5": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.1.tgz", - "integrity": "sha512-1hqLFMSrGHRHxav9q9gNjJ5EXznIxGVO09xQRrwplcS8qs28pZ8s8hupZAmqDwZUmVZ2Qb2jnyPOWcDH8m8dlA==", + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", + "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", "dev": true, "bin": { "json5": "lib/cli.js" @@ -4925,9 +4925,9 @@ } }, "node_modules/file-loader/node_modules/json5": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.1.tgz", - "integrity": "sha512-1hqLFMSrGHRHxav9q9gNjJ5EXznIxGVO09xQRrwplcS8qs28pZ8s8hupZAmqDwZUmVZ2Qb2jnyPOWcDH8m8dlA==", + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", + "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", "dev": true, "bin": { "json5": "lib/cli.js" @@ -7034,9 +7034,9 @@ "dev": true }, "node_modules/json5": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/json5/-/json5-1.0.1.tgz", - "integrity": "sha512-aKS4WQjPenRxiQsC93MNfjx+nbF4PAdYzmd/1JIj8HYzqfbu86beTuNgXDzPknWk0n0uARlyewZo4s++ES36Ow==", + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/json5/-/json5-1.0.2.tgz", + "integrity": "sha512-g1MWMLBiz8FKi1e4w0UyVL3w+iJceWAFBAaBnnGKOpNa5f8TLktkbre1+s6oICydWAm+HRUGTmI+//xv2hvXYA==", "dev": true, "dependencies": { "minimist": "^1.2.0" @@ -11644,9 +11644,9 @@ } }, "node_modules/style-loader/node_modules/json5": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.1.tgz", - "integrity": "sha512-1hqLFMSrGHRHxav9q9gNjJ5EXznIxGVO09xQRrwplcS8qs28pZ8s8hupZAmqDwZUmVZ2Qb2jnyPOWcDH8m8dlA==", + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", + "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", "dev": true, "bin": { "json5": "lib/cli.js" @@ -12373,9 +12373,9 @@ } }, "node_modules/url-loader/node_modules/json5": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.1.tgz", - "integrity": "sha512-1hqLFMSrGHRHxav9q9gNjJ5EXznIxGVO09xQRrwplcS8qs28pZ8s8hupZAmqDwZUmVZ2Qb2jnyPOWcDH8m8dlA==", + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", + "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", "dev": true, "bin": { "json5": "lib/cli.js" @@ -17342,9 +17342,9 @@ } }, "json5": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.1.tgz", - "integrity": "sha512-1hqLFMSrGHRHxav9q9gNjJ5EXznIxGVO09xQRrwplcS8qs28pZ8s8hupZAmqDwZUmVZ2Qb2jnyPOWcDH8m8dlA==", + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", + "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", "dev": true }, "loader-utils": { @@ -18070,9 +18070,9 @@ }, "dependencies": { "json5": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.1.tgz", - "integrity": "sha512-1hqLFMSrGHRHxav9q9gNjJ5EXznIxGVO09xQRrwplcS8qs28pZ8s8hupZAmqDwZUmVZ2Qb2jnyPOWcDH8m8dlA==", + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", + "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", "dev": true }, "loader-utils": { @@ -19712,9 +19712,9 @@ "dev": true }, "json5": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/json5/-/json5-1.0.1.tgz", - "integrity": "sha512-aKS4WQjPenRxiQsC93MNfjx+nbF4PAdYzmd/1JIj8HYzqfbu86beTuNgXDzPknWk0n0uARlyewZo4s++ES36Ow==", + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/json5/-/json5-1.0.2.tgz", + "integrity": "sha512-g1MWMLBiz8FKi1e4w0UyVL3w+iJceWAFBAaBnnGKOpNa5f8TLktkbre1+s6oICydWAm+HRUGTmI+//xv2hvXYA==", "dev": true, "requires": { "minimist": "^1.2.0" @@ -23424,9 +23424,9 @@ }, "dependencies": { "json5": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.1.tgz", - "integrity": "sha512-1hqLFMSrGHRHxav9q9gNjJ5EXznIxGVO09xQRrwplcS8qs28pZ8s8hupZAmqDwZUmVZ2Qb2jnyPOWcDH8m8dlA==", + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", + "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", "dev": true }, "loader-utils": { @@ -24003,9 +24003,9 @@ }, "dependencies": { "json5": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.1.tgz", - "integrity": "sha512-1hqLFMSrGHRHxav9q9gNjJ5EXznIxGVO09xQRrwplcS8qs28pZ8s8hupZAmqDwZUmVZ2Qb2jnyPOWcDH8m8dlA==", + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", + "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", "dev": true }, "loader-utils": { From da6823600b5bf13a2b9fdbfb60f5e29525496bbc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 18 Apr 2024 21:34:34 -0700 Subject: [PATCH 050/102] Bump net.openhft:posix from 2.23.2 to 2.25ea0 (#12828) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 24006bcea490..5c5c654befc4 100644 --- a/pom.xml +++ b/pom.xml @@ -1445,7 +1445,7 @@ net.openhft posix - 2.23.2 + 2.25ea0 net.openhft From 76eebc24fc53b2f78491b9ed2e63f85d98f3990e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 19 Apr 2024 10:10:58 -0700 Subject: [PATCH 051/102] Bump net.openhft:chronicle-core from 2.25ea13 to 2.25ea14 (#12971) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 5c5c654befc4..1aa4d37006de 100644 --- a/pom.xml +++ b/pom.xml @@ -1450,7 +1450,7 @@ net.openhft chronicle-core - 2.25ea13 + 2.25ea14 org.ow2.asm From fe63a026ab24478570f751dc62dd097ca29aa1ba Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 19 Apr 2024 10:11:12 -0700 Subject: [PATCH 052/102] Bump org.apache.maven.plugins:maven-gpg-plugin from 3.2.3 to 3.2.4 (#12972) --- pinot-connectors/pinot-spark-2-connector/pom.xml | 2 +- pinot-connectors/pinot-spark-3-connector/pom.xml | 2 +- pinot-connectors/pinot-spark-common/pom.xml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pinot-connectors/pinot-spark-2-connector/pom.xml b/pinot-connectors/pinot-spark-2-connector/pom.xml index 58e307221b71..ec5564a52c49 100644 --- a/pinot-connectors/pinot-spark-2-connector/pom.xml +++ b/pinot-connectors/pinot-spark-2-connector/pom.xml @@ -152,7 +152,7 @@ Thus, explicitly adding this plugin to a new profile to sign the files at the end all at once. --> org.apache.maven.plugins maven-gpg-plugin - 3.2.3 + 3.2.4 diff --git a/pinot-connectors/pinot-spark-3-connector/pom.xml b/pinot-connectors/pinot-spark-3-connector/pom.xml index 2cf4a3fe2e0f..1f43254fb4bb 100644 --- a/pinot-connectors/pinot-spark-3-connector/pom.xml +++ b/pinot-connectors/pinot-spark-3-connector/pom.xml @@ -148,7 +148,7 @@ Thus, explicitly adding this plugin to a new profile to sign the files at the end all at once. --> org.apache.maven.plugins maven-gpg-plugin - 3.2.3 + 3.2.4 diff --git a/pinot-connectors/pinot-spark-common/pom.xml b/pinot-connectors/pinot-spark-common/pom.xml index ec708e5d5c5e..4da9dd3d5571 100644 --- a/pinot-connectors/pinot-spark-common/pom.xml +++ b/pinot-connectors/pinot-spark-common/pom.xml @@ -163,7 +163,7 @@ Thus, explicitly adding this plugin to a new profile to sign the files at the end all at once. --> org.apache.maven.plugins maven-gpg-plugin - 3.2.3 + 3.2.4 From bebb491ddad96ea83fb977baa70e3ed2ed125109 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 19 Apr 2024 10:12:18 -0700 Subject: [PATCH 053/102] Bump aws.sdk.version from 2.25.33 to 2.25.34 (#12975) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 1aa4d37006de..932dcdf04d85 100644 --- a/pom.xml +++ b/pom.xml @@ -172,7 +172,7 @@ 0.15.0 0.4.4 4.2.2 - 2.25.33 + 2.25.34 2.12.7 3.1.12 7.10.1 From d840413432e9b3b887d239a1293a0de692934ee6 Mon Sep 17 00:00:00 2001 From: aishikbh Date: Fri, 19 Apr 2024 22:46:17 +0530 Subject: [PATCH 054/102] reduce logging for SpecialValueTransformer (#12970) --- .../local/recordtransformer/SpecialValueTransformer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java index d019384ed7cf..1075ff349722 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java @@ -111,7 +111,7 @@ public GenericRow transform(GenericRow record) { } } if (_negativeZeroConversionCount > 0 || _nanConversionCount > 0) { - LOGGER.info("Converted {} -0.0s to 0.0 and {} NaNs to null", _negativeZeroConversionCount, _nanConversionCount); + LOGGER.debug("Converted {} -0.0s to 0.0 and {} NaNs to null", _negativeZeroConversionCount, _nanConversionCount); } return record; } From e1b0e5357ebfcecffcc6cce3997a3edcdac1aa2c Mon Sep 17 00:00:00 2001 From: Pratik Tibrewal Date: Sat, 20 Apr 2024 03:35:01 +0530 Subject: [PATCH 055/102] Refactor PinotTaskManager class (#12964) --- .../resources/PinotTaskRestletResource.java | 14 +- .../helix/core/minion/CronJobScheduleJob.java | 2 +- .../helix/core/minion/PinotTaskManager.java | 162 ++++++++---------- ...rgeRollupMinionClusterIntegrationTest.java | 140 +++++++-------- .../PurgeMinionClusterIntegrationTest.java | 40 ++--- ...eSegmentsMinionClusterIntegrationTest.java | 55 +++--- .../SimpleMinionClusterIntegrationTest.java | 45 ++--- .../integration/tests/TlsIntegrationTest.java | 2 +- .../tests/UpsertTableIntegrationTest.java | 14 +- .../tests/UrlAuthRealtimeIntegrationTest.java | 2 +- 10 files changed, 217 insertions(+), 259 deletions(-) diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotTaskRestletResource.java b/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotTaskRestletResource.java index e09bde84668a..0d9d3a05c123 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotTaskRestletResource.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotTaskRestletResource.java @@ -618,22 +618,20 @@ public Map getCronSchedulerJobDetails( @ApiOperation("Schedule tasks and return a map from task type to task name scheduled") public Map scheduleTasks(@ApiParam(value = "Task type") @QueryParam("taskType") String taskType, @ApiParam(value = "Table name (with type suffix)") @QueryParam("tableName") String tableName, - @ApiParam(value = "Minion Instance tag to schedule the task explicitly on") - @QueryParam("minionInstanceTag") @Nullable String minionInstanceTag, - @Context HttpHeaders headers) { + @ApiParam(value = "Minion Instance tag to schedule the task explicitly on") @QueryParam("minionInstanceTag") + @Nullable String minionInstanceTag, @Context HttpHeaders headers) { String database = headers != null ? headers.getHeaderString(DATABASE) : DEFAULT_DATABASE; if (taskType != null) { // Schedule task for the given task type - List taskNames = tableName != null - ? _pinotTaskManager.scheduleTask(taskType, + List taskNames = tableName != null ? _pinotTaskManager.scheduleTaskForTable(taskType, DatabaseUtils.translateTableName(tableName, headers), minionInstanceTag) : _pinotTaskManager.scheduleTaskForDatabase(taskType, database, minionInstanceTag); return Collections.singletonMap(taskType, taskNames == null ? null : StringUtils.join(taskNames, ',')); } else { // Schedule tasks for all task types - Map> allTaskNames = tableName != null - ? _pinotTaskManager.scheduleTasks(DatabaseUtils.translateTableName(tableName, headers), minionInstanceTag) - : _pinotTaskManager.scheduleTasksForDatabase(database, minionInstanceTag); + Map> allTaskNames = tableName != null ? _pinotTaskManager.scheduleAllTasksForTable( + DatabaseUtils.translateTableName(tableName, headers), minionInstanceTag) + : _pinotTaskManager.scheduleAllTasksForDatabase(database, minionInstanceTag); return allTaskNames.entrySet().stream() .collect(Collectors.toMap(Map.Entry::getKey, entry -> String.join(",", entry.getValue()))); } diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/CronJobScheduleJob.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/CronJobScheduleJob.java index 8c0433854f0c..f9b250b2bcd4 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/CronJobScheduleJob.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/CronJobScheduleJob.java @@ -65,7 +65,7 @@ public void execute(JobExecutionContext jobExecutionContext) return; } long jobStartTime = System.currentTimeMillis(); - pinotTaskManager.scheduleTask(taskType, table); + pinotTaskManager.scheduleTaskForTable(taskType, table, null); LOGGER.info("Finished CronJob: table - {}, task - {}, next runtime is {}", table, taskType, jobExecutionContext.getNextFireTime()); pinotTaskManager.getControllerMetrics().addTimedTableValue(PinotTaskManager.getCronJobName(table, taskType), diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotTaskManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotTaskManager.java index 40299441390a..97417d6bea94 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotTaskManager.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotTaskManager.java @@ -22,7 +22,6 @@ import java.io.PrintWriter; import java.io.StringWriter; import java.util.ArrayList; -import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.HashSet; @@ -480,30 +479,72 @@ public void registerTaskGenerator(PinotTaskGenerator taskGenerator) { } /** - * Public API to schedule tasks (all task types) for all tables in all databases. + * Schedules tasks (all task types) for all tables. * It might be called from the non-leader controller. * Returns a map from the task type to the list of tasks scheduled. */ - public synchronized Map> scheduleTasks() { - return scheduleTasks(_pinotHelixResourceManager.getAllTables(), false, null); + public synchronized Map> scheduleAllTasksForAllTables(@Nullable String minionInstanceTag) { + return scheduleTasks(_pinotHelixResourceManager.getAllTables(), false, minionInstanceTag); } /** - * Public API to schedule tasks (all task types) for all tables in given database. + * Schedules tasks (all task types) for all tables in the given database. * It might be called from the non-leader controller. * Returns a map from the task type to the list of tasks scheduled. */ - public synchronized Map> scheduleTasksForDatabase(@Nullable String database, + public synchronized Map> scheduleAllTasksForDatabase(@Nullable String database, @Nullable String minionInstanceTag) { return scheduleTasks(_pinotHelixResourceManager.getAllTables(database), false, minionInstanceTag); } + /** + * Schedules tasks (all task types) for the given table. + * It might be called from the non-leader controller. + * Returns a map from the task type to the list of tasks scheduled. + */ + public synchronized Map> scheduleAllTasksForTable(String tableNameWithType, + @Nullable String minionInstanceTag) { + return scheduleTasks(List.of(tableNameWithType), false, minionInstanceTag); + } + + /** + * Schedules task for the given task type for all tables. + * It might be called from the non-leader controller. + * Returns a list of tasks scheduled, or {@code null} if no task is scheduled. + */ + @Nullable + public synchronized List scheduleTaskForAllTables(String taskType, @Nullable String minionInstanceTag) { + return scheduleTask(taskType, _pinotHelixResourceManager.getAllTables(), minionInstanceTag); + } + + /** + * Schedules task for the given task type for all tables in the given database. + * It might be called from the non-leader controller. + * Returns a list of tasks scheduled, or {@code null} if no task is scheduled. + */ + @Nullable + public synchronized List scheduleTaskForDatabase(String taskType, @Nullable String database, + @Nullable String minionInstanceTag) { + return scheduleTask(taskType, _pinotHelixResourceManager.getAllTables(database), minionInstanceTag); + } + + /** + * Schedules task for the given task type for the give table. + * It might be called from the non-leader controller. + * Returns a list of tasks scheduled, or {@code null} if no task is scheduled. + */ + @Nullable + public synchronized List scheduleTaskForTable(String taskType, String tableNameWithType, + @Nullable String minionInstanceTag) { + return scheduleTask(taskType, List.of(tableNameWithType), minionInstanceTag); + } + /** * Helper method to schedule tasks (all task types) for the given tables that have the tasks enabled. Returns a map * from the task type to the list of the tasks scheduled. */ - private synchronized Map> scheduleTasks(List tableNamesWithType, - boolean isLeader, @Nullable String minionInstanceTag) { + private synchronized Map> scheduleTasks(List tableNamesWithType, boolean isLeader, + @Nullable String minionInstanceTag) { _controllerMetrics.addMeteredGlobalValue(ControllerMeter.NUMBER_TIMES_SCHEDULE_TASKS_CALLED, 1L); // Scan all table configs to get the tables with tasks enabled @@ -541,6 +582,27 @@ private synchronized Map> scheduleTasks(List tableN return tasksScheduled; } + @Nullable + private synchronized List scheduleTask(String taskType, List tables, + @Nullable String minionInstanceTag) { + PinotTaskGenerator taskGenerator = _taskGeneratorRegistry.getTaskGenerator(taskType); + Preconditions.checkState(taskGenerator != null, "Task type: %s is not registered", taskType); + + // Scan all table configs to get the tables with task enabled + List enabledTableConfigs = new ArrayList<>(); + for (String tableNameWithType : tables) { + TableConfig tableConfig = _pinotHelixResourceManager.getTableConfig(tableNameWithType); + if (tableConfig != null && tableConfig.getTaskConfig() != null && tableConfig.getTaskConfig() + .isTaskTypeEnabled(taskType)) { + enabledTableConfigs.add(tableConfig); + } + } + + _helixTaskResourceManager.ensureTaskQueueExists(taskType); + addTaskTypeMetricsUpdaterIfNeeded(taskType); + return scheduleTask(taskGenerator, enabledTableConfigs, false, minionInstanceTag); + } + /** * Helper method to schedule task with the given task generator for the given tables that have the task enabled. * Returns the list of task names, or {@code null} if no task is scheduled. @@ -554,8 +616,8 @@ private List scheduleTask(PinotTaskGenerator taskGenerator, List presentTaskConfig = minionInstanceTagToTaskConfigs.computeIfAbsent(minionInstanceTag, k -> new ArrayList<>()); taskGenerator.generateTasks(List.of(tableConfig), presentTaskConfig); @@ -622,86 +684,6 @@ private List scheduleTask(PinotTaskGenerator taskGenerator, List> scheduleTasks(String tableNameWithType) { - return scheduleTasks(Collections.singletonList(tableNameWithType), false, null); - } - - /** - * Public API to schedule tasks (all task types) for the given table on a specific instance tag. - * It might be called from the non-leader controller. Returns a map from the task type to the list of tasks scheduled. - */ - public synchronized Map> scheduleTasks(String tableNameWithType, - @Nullable String minionInstanceTag) { - return scheduleTasks(Collections.singletonList(tableNameWithType), false, minionInstanceTag); - } - - /** - * Public API to schedule task for the given task type in all databases. - * It might be called from the non-leader controller. - * Returns the list of task names, or {@code null} if no task is scheduled. - */ - @Nullable - public synchronized List scheduleTask(String taskType, @Nullable String minionInstanceTag) { - return scheduleTask(taskType, _pinotHelixResourceManager.getAllTables(), minionInstanceTag); - } - - /** - * Public API to schedule task for the given task type in given database. - * It might be called from the non-leader controller. - * Returns the list of task name, or {@code null} if no task is scheduled. - */ - @Nullable - public synchronized List scheduleTaskForDatabase(String taskType, @Nullable String database, - @Nullable String minionInstanceTag) { - return scheduleTask(taskType, _pinotHelixResourceManager.getAllTables(database), minionInstanceTag); - } - - @Nullable - private List scheduleTask(String taskType, List tables, @Nullable String minionInstanceTag) { - PinotTaskGenerator taskGenerator = _taskGeneratorRegistry.getTaskGenerator(taskType); - Preconditions.checkState(taskGenerator != null, "Task type: %s is not registered", taskType); - - // Scan all table configs to get the tables with task enabled - List enabledTableConfigs = new ArrayList<>(); - for (String tableNameWithType : tables) { - TableConfig tableConfig = _pinotHelixResourceManager.getTableConfig(tableNameWithType); - if (tableConfig != null && tableConfig.getTaskConfig() != null && tableConfig.getTaskConfig() - .isTaskTypeEnabled(taskType)) { - enabledTableConfigs.add(tableConfig); - } - } - - _helixTaskResourceManager.ensureTaskQueueExists(taskType); - addTaskTypeMetricsUpdaterIfNeeded(taskType); - return scheduleTask(taskGenerator, enabledTableConfigs, false, minionInstanceTag); - } - - /** - * Public API to schedule task for the given task type on the given table. It might be called from the non-leader - * controller. Returns the list of task names, or {@code null} if no task is scheduled. - */ - @Nullable - public synchronized List scheduleTask(String taskType, String tableNameWithType, - @Nullable String minionInstanceTag) { - PinotTaskGenerator taskGenerator = _taskGeneratorRegistry.getTaskGenerator(taskType); - Preconditions.checkState(taskGenerator != null, "Task type: %s is not registered", taskType); - - TableConfig tableConfig = _pinotHelixResourceManager.getTableConfig(tableNameWithType); - Preconditions.checkState(tableConfig != null, "Failed to find table config for table: %s", tableNameWithType); - - Preconditions.checkState( - tableConfig.getTaskConfig() != null && tableConfig.getTaskConfig().isTaskTypeEnabled(taskType), - "Table: %s does not have task type: %s enabled", tableNameWithType, taskType); - - _helixTaskResourceManager.ensureTaskQueueExists(taskType); - addTaskTypeMetricsUpdaterIfNeeded(taskType); - return scheduleTask(taskGenerator, Collections.singletonList(tableConfig), false, minionInstanceTag); - } - @Override protected void processTables(List tableNamesWithType, Properties taskProperties) { scheduleTasks(tableNamesWithType, true, null); diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/MergeRollupMinionClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/MergeRollupMinionClusterIntegrationTest.java index b655416c878c..c5be600661f3 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/MergeRollupMinionClusterIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/MergeRollupMinionClusterIntegrationTest.java @@ -139,14 +139,14 @@ public void setUp() List avroFiles = unpackAvroData(_tempDir); // Create and upload segments - ClusterIntegrationTestUtils - .buildSegmentsFromAvro(avroFiles, singleLevelConcatTableConfig, schema, 0, _segmentDir1, _tarDir1); + ClusterIntegrationTestUtils.buildSegmentsFromAvro(avroFiles, singleLevelConcatTableConfig, schema, 0, _segmentDir1, + _tarDir1); buildSegmentsFromAvroWithPostfix(avroFiles, singleLevelRollupTableConfig, schema, 0, _segmentDir2, _tarDir2, "1"); buildSegmentsFromAvroWithPostfix(avroFiles, singleLevelRollupTableConfig, schema, 0, _segmentDir2, _tarDir2, "2"); - ClusterIntegrationTestUtils - .buildSegmentsFromAvro(avroFiles, multiLevelConcatTableConfig, schema, 0, _segmentDir3, _tarDir3); - ClusterIntegrationTestUtils - .buildSegmentsFromAvro(avroFiles, singleLevelConcatMetadataTableConfig, schema, 0, _segmentDir4, _tarDir4); + ClusterIntegrationTestUtils.buildSegmentsFromAvro(avroFiles, multiLevelConcatTableConfig, schema, 0, _segmentDir3, + _tarDir3); + ClusterIntegrationTestUtils.buildSegmentsFromAvro(avroFiles, singleLevelConcatMetadataTableConfig, schema, 0, + _segmentDir4, _tarDir4); uploadSegments(SINGLE_LEVEL_CONCAT_TEST_TABLE, _tarDir1); uploadSegments(SINGLE_LEVEL_ROLLUP_TEST_TABLE, _tarDir2); uploadSegments(MULTI_LEVEL_CONCAT_TEST_TABLE, _tarDir3); @@ -160,8 +160,8 @@ public void setUp() schema.setSchemaName(MULTI_LEVEL_CONCAT_PROCESS_ALL_REALTIME_TABLE); addSchema(schema); TableConfig singleLevelConcatProcessAllRealtimeTableConfig = - createRealtimeTableConfigWithProcessAllMode(avroFiles.get(0), - MULTI_LEVEL_CONCAT_PROCESS_ALL_REALTIME_TABLE, PROCESS_ALL_MODE_KAFKA_TOPIC); + createRealtimeTableConfigWithProcessAllMode(avroFiles.get(0), MULTI_LEVEL_CONCAT_PROCESS_ALL_REALTIME_TABLE, + PROCESS_ALL_MODE_KAFKA_TOPIC); addTableConfig(singleLevelConcatProcessAllRealtimeTableConfig); // Push data into Kafka @@ -172,9 +172,8 @@ PROCESS_ALL_MODE_KAFKA_TOPIC, getMaxNumKafkaMessagesPerBatch(), getKafkaMessageH ClusterIntegrationTestUtils.pushAvroIntoKafka(avroFiles.subList(0, 3), "localhost:" + getKafkaPort(), PROCESS_ALL_MODE_KAFKA_TOPIC, getMaxNumKafkaMessagesPerBatch(), getKafkaMessageHeader(), getPartitionColumn(), injectTombstones()); - ClusterIntegrationTestUtils - .buildSegmentsFromAvro(avroFiles.subList(3, 9), singleLevelConcatProcessAllRealtimeTableConfig, schema, 0, - _segmentDir5, _tarDir5); + ClusterIntegrationTestUtils.buildSegmentsFromAvro(avroFiles.subList(3, 9), + singleLevelConcatProcessAllRealtimeTableConfig, schema, 0, _segmentDir5, _tarDir5); // Wait for all documents loaded waitForAllDocsLoaded(600_000L); @@ -216,14 +215,14 @@ private TableConfig createOfflineTableConfig(String tableName, TableTaskConfig t private TableConfig createOfflineTableConfig(String tableName, TableTaskConfig taskConfig, @Nullable SegmentPartitionConfig partitionConfig) { - return new TableConfigBuilder(TableType.OFFLINE).setTableName(tableName) - .setTimeColumnName(getTimeColumnName()).setSortedColumn(getSortedColumn()) - .setInvertedIndexColumns(getInvertedIndexColumns()).setNoDictionaryColumns(getNoDictionaryColumns()) - .setRangeIndexColumns(getRangeIndexColumns()).setBloomFilterColumns(getBloomFilterColumns()) - .setFieldConfigList(getFieldConfigs()).setNumReplicas(getNumReplicas()).setSegmentVersion(getSegmentVersion()) - .setLoadMode(getLoadMode()).setTaskConfig(taskConfig).setBrokerTenant(getBrokerTenant()) - .setServerTenant(getServerTenant()).setIngestionConfig(getIngestionConfig()) - .setNullHandlingEnabled(getNullHandlingEnabled()).setSegmentPartitionConfig(partitionConfig).build(); + return new TableConfigBuilder(TableType.OFFLINE).setTableName(tableName).setTimeColumnName(getTimeColumnName()) + .setSortedColumn(getSortedColumn()).setInvertedIndexColumns(getInvertedIndexColumns()) + .setNoDictionaryColumns(getNoDictionaryColumns()).setRangeIndexColumns(getRangeIndexColumns()) + .setBloomFilterColumns(getBloomFilterColumns()).setFieldConfigList(getFieldConfigs()) + .setNumReplicas(getNumReplicas()).setSegmentVersion(getSegmentVersion()).setLoadMode(getLoadMode()) + .setTaskConfig(taskConfig).setBrokerTenant(getBrokerTenant()).setServerTenant(getServerTenant()) + .setIngestionConfig(getIngestionConfig()).setNullHandlingEnabled(getNullHandlingEnabled()) + .setSegmentPartitionConfig(partitionConfig).build(); } protected TableConfig createRealtimeTableConfigWithProcessAllMode(File sampleAvroFile, String tableName, @@ -246,12 +245,12 @@ protected TableConfig createRealtimeTableConfigWithProcessAllMode(File sampleAvr tableTaskConfigs.put("ActualElapsedTime.aggregationType", "min"); tableTaskConfigs.put("WeatherDelay.aggregationType", "sum"); tableTaskConfigs.put("mode", "processAll"); - return new TableConfigBuilder(TableType.REALTIME).setTableName(tableName) - .setTimeColumnName(getTimeColumnName()).setSortedColumn(getSortedColumn()) - .setInvertedIndexColumns(getInvertedIndexColumns()).setNoDictionaryColumns(getNoDictionaryColumns()) - .setRangeIndexColumns(getRangeIndexColumns()).setBloomFilterColumns(getBloomFilterColumns()) - .setFieldConfigList(getFieldConfigs()).setNumReplicas(getNumReplicas()).setSegmentVersion(getSegmentVersion()) - .setLoadMode(getLoadMode()).setTaskConfig( + return new TableConfigBuilder(TableType.REALTIME).setTableName(tableName).setTimeColumnName(getTimeColumnName()) + .setSortedColumn(getSortedColumn()).setInvertedIndexColumns(getInvertedIndexColumns()) + .setNoDictionaryColumns(getNoDictionaryColumns()).setRangeIndexColumns(getRangeIndexColumns()) + .setBloomFilterColumns(getBloomFilterColumns()).setFieldConfigList(getFieldConfigs()) + .setNumReplicas(getNumReplicas()).setSegmentVersion(getSegmentVersion()).setLoadMode(getLoadMode()) + .setTaskConfig( new TableTaskConfig(Collections.singletonMap(MinionConstants.MergeRollupTask.TASK_TYPE, tableTaskConfigs))) .setBrokerTenant(getBrokerTenant()).setServerTenant(getServerTenant()).setIngestionConfig(getIngestionConfig()) .setQueryConfig(getQueryConfig()).setStreamConfigs(streamConfigs) @@ -411,17 +410,16 @@ public void testOfflineTableSingleLevelConcat() int numTasks = 0; List taskList; for (String tasks = - _taskManager.scheduleTasks(offlineTableName).get(MinionConstants.MergeRollupTask.TASK_TYPE).get(0); - tasks != null; - taskList = _taskManager.scheduleTasks(offlineTableName).get(MinionConstants.MergeRollupTask.TASK_TYPE), - tasks = taskList != null ? taskList.get(0) : null, - numTasks++) { + _taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.MergeRollupTask.TASK_TYPE) + .get(0); tasks != null; taskList = + _taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.MergeRollupTask.TASK_TYPE), + tasks = taskList != null ? taskList.get(0) : null, numTasks++) { assertEquals(_helixTaskResourceManager.getSubtaskConfigs(tasks).size(), expectedNumSubTasks[numTasks]); assertTrue(_helixTaskResourceManager.getTaskQueues() .contains(PinotHelixTaskResourceManager.getHelixJobQueueName(MinionConstants.MergeRollupTask.TASK_TYPE))); // Will not schedule task if there's incomplete task - assertNull( - _taskManager.scheduleTasks(offlineTableName).get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); + assertNull(_taskManager.scheduleAllTasksForTable(offlineTableName, null) + .get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); waitForTaskToComplete(); // Check watermark @@ -527,17 +525,16 @@ public void testOfflineTableSingleLevelConcatWithMetadataPush() int numTasks = 0; List taskList; for (String tasks = - _taskManager.scheduleTasks(offlineTableName).get(MinionConstants.MergeRollupTask.TASK_TYPE).get(0); - tasks != null; - taskList = _taskManager.scheduleTasks(offlineTableName).get(MinionConstants.MergeRollupTask.TASK_TYPE), - tasks = taskList != null ? taskList.get(0) : null, - numTasks++) { + _taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.MergeRollupTask.TASK_TYPE) + .get(0); tasks != null; taskList = + _taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.MergeRollupTask.TASK_TYPE), + tasks = taskList != null ? taskList.get(0) : null, numTasks++) { assertEquals(_helixTaskResourceManager.getSubtaskConfigs(tasks).size(), expectedNumSubTasks[numTasks]); assertTrue(_helixTaskResourceManager.getTaskQueues() .contains(PinotHelixTaskResourceManager.getHelixJobQueueName(MinionConstants.MergeRollupTask.TASK_TYPE))); // Will not schedule task if there's incomplete task - assertNull( - _taskManager.scheduleTasks(offlineTableName).get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); + assertNull(_taskManager.scheduleAllTasksForTable(offlineTableName, null) + .get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); waitForTaskToComplete(); // Check watermark @@ -636,17 +633,16 @@ public void testOfflineTableSingleLevelRollup() int numTasks = 0; List taskList; for (String tasks = - _taskManager.scheduleTasks(offlineTableName).get(MinionConstants.MergeRollupTask.TASK_TYPE).get(0); - tasks != null; - taskList = _taskManager.scheduleTasks(offlineTableName).get(MinionConstants.MergeRollupTask.TASK_TYPE), - tasks = taskList != null ? taskList.get(0) : null, - numTasks++) { + _taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.MergeRollupTask.TASK_TYPE) + .get(0); tasks != null; taskList = + _taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.MergeRollupTask.TASK_TYPE), + tasks = taskList != null ? taskList.get(0) : null, numTasks++) { assertEquals(_helixTaskResourceManager.getSubtaskConfigs(tasks).size(), 1); assertTrue(_helixTaskResourceManager.getTaskQueues() .contains(PinotHelixTaskResourceManager.getHelixJobQueueName(MinionConstants.MergeRollupTask.TASK_TYPE))); // Will not schedule task if there's incomplete task - assertNull( - _taskManager.scheduleTasks(offlineTableName).get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); + assertNull(_taskManager.scheduleAllTasksForTable(offlineTableName, null) + .get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); waitForTaskToComplete(); // Check watermark @@ -788,17 +784,16 @@ public void testOfflineTableMultiLevelConcat() int numTasks = 0; List taskList; for (String tasks = - _taskManager.scheduleTasks(offlineTableName).get(MinionConstants.MergeRollupTask.TASK_TYPE).get(0); - tasks != null; - taskList = _taskManager.scheduleTasks(offlineTableName).get(MinionConstants.MergeRollupTask.TASK_TYPE), - tasks = taskList != null ? taskList.get(0) : null, - numTasks++) { + _taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.MergeRollupTask.TASK_TYPE) + .get(0); tasks != null; taskList = + _taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.MergeRollupTask.TASK_TYPE), + tasks = taskList != null ? taskList.get(0) : null, numTasks++) { assertEquals(_helixTaskResourceManager.getSubtaskConfigs(tasks).size(), expectedNumSubTasks[numTasks]); assertTrue(_helixTaskResourceManager.getTaskQueues() .contains(PinotHelixTaskResourceManager.getHelixJobQueueName(MinionConstants.MergeRollupTask.TASK_TYPE))); // Will not schedule task if there's incomplete task - assertNull( - _taskManager.scheduleTasks(offlineTableName).get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); + assertNull(_taskManager.scheduleAllTasksForTable(offlineTableName, null) + .get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); waitForTaskToComplete(); // Check watermark @@ -859,8 +854,8 @@ protected void verifyTableDelete(String tableNameWithType) { return false; } // Check if the task metadata is cleaned up - if (MinionTaskMetadataUtils - .fetchTaskMetadata(_propertyStore, MinionConstants.MergeRollupTask.TASK_TYPE, tableNameWithType) != null) { + if (MinionTaskMetadataUtils.fetchTaskMetadata(_propertyStore, MinionConstants.MergeRollupTask.TASK_TYPE, + tableNameWithType) != null) { return false; } return true; @@ -921,18 +916,17 @@ public void testRealtimeTableSingleLevelConcat() int numTasks = 0; List taskList; for (String tasks = - taskManager.scheduleTasks(realtimeTableName).get(MinionConstants.MergeRollupTask.TASK_TYPE).get(0); - tasks != null; - taskList = taskManager.scheduleTasks(realtimeTableName).get(MinionConstants.MergeRollupTask.TASK_TYPE), - tasks = taskList != null ? taskList.get(0) : null, - numTasks++) { + taskManager.scheduleAllTasksForTable(realtimeTableName, null).get(MinionConstants.MergeRollupTask.TASK_TYPE) + .get(0); tasks != null; taskList = + taskManager.scheduleAllTasksForTable(realtimeTableName, null).get(MinionConstants.MergeRollupTask.TASK_TYPE), + tasks = taskList != null ? taskList.get(0) : null, numTasks++) { // assertEquals(helixTaskResourceManager.getSubtaskConfigs(tasks).size(), expectedNumSubTasks[numTasks]); assertTrue(helixTaskResourceManager.getTaskQueues() .contains(PinotHelixTaskResourceManager.getHelixJobQueueName(MinionConstants.MergeRollupTask.TASK_TYPE))); // Will not schedule task if there's incomplete task - assertNull( - taskManager.scheduleTasks(realtimeTableName).get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); + assertNull(taskManager.scheduleAllTasksForTable(realtimeTableName, null) + .get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); waitForTaskToComplete(); // Check watermark @@ -1027,17 +1021,16 @@ public void testRealtimeTableProcessAllModeMultiLevelConcat() int numTasks = 0; List taskList; for (String tasks = - taskManager.scheduleTasks(realtimeTableName).get(MinionConstants.MergeRollupTask.TASK_TYPE).get(0); - tasks != null; taskList = - taskManager.scheduleTasks(realtimeTableName).get(MinionConstants.MergeRollupTask.TASK_TYPE), - tasks = taskList != null ? taskList.get(0) : null, - numTasks++) { + taskManager.scheduleAllTasksForTable(realtimeTableName, null).get(MinionConstants.MergeRollupTask.TASK_TYPE) + .get(0); tasks != null; taskList = + taskManager.scheduleAllTasksForTable(realtimeTableName, null).get(MinionConstants.MergeRollupTask.TASK_TYPE), + tasks = taskList != null ? taskList.get(0) : null, numTasks++) { assertTrue(helixTaskResourceManager.getTaskQueues() .contains(PinotHelixTaskResourceManager.getHelixJobQueueName(MinionConstants.MergeRollupTask.TASK_TYPE))); // Will not schedule task if there's incomplete task - assertNull( - taskManager.scheduleTasks(realtimeTableName).get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); + assertNull(taskManager.scheduleAllTasksForTable(realtimeTableName, null) + .get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); waitForTaskToComplete(); // Check not using watermarks @@ -1069,11 +1062,10 @@ public void testRealtimeTableProcessAllModeMultiLevelConcat() waitForAllDocsLoaded(600_000L); for (String tasks = - taskManager.scheduleTasks(realtimeTableName).get(MinionConstants.MergeRollupTask.TASK_TYPE).get(0); - tasks != null; taskList = - taskManager.scheduleTasks(realtimeTableName).get(MinionConstants.MergeRollupTask.TASK_TYPE), - tasks = taskList != null ? taskList.get(0) : null, - numTasks++) { + taskManager.scheduleAllTasksForTable(realtimeTableName, null).get(MinionConstants.MergeRollupTask.TASK_TYPE) + .get(0); tasks != null; taskList = + taskManager.scheduleAllTasksForTable(realtimeTableName, null).get(MinionConstants.MergeRollupTask.TASK_TYPE), + tasks = taskList != null ? taskList.get(0) : null, numTasks++) { waitForTaskToComplete(); // Check metrics long numBucketsToProcess = MetricValueUtils.getGaugeValue(_controllerStarter.getControllerMetrics(), diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/PurgeMinionClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/PurgeMinionClusterIntegrationTest.java index c4ba131f6de3..da4e85696c7f 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/PurgeMinionClusterIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/PurgeMinionClusterIntegrationTest.java @@ -18,7 +18,6 @@ */ package org.apache.pinot.integration.tests; -import com.google.common.collect.ImmutableList; import java.io.File; import java.util.ArrayList; import java.util.Arrays; @@ -63,7 +62,6 @@ public class PurgeMinionClusterIntegrationTest extends BaseClusterIntegrationTes private static final String PURGE_DELTA_NOT_PASSED_TABLE = "myTable3"; private static final String PURGE_OLD_SEGMENTS_WITH_NEW_INDICES_TABLE = "myTable4"; - protected PinotHelixTaskResourceManager _helixTaskResourceManager; protected PinotTaskManager _taskManager; protected PinotHelixResourceManager _pinotHelixResourceManager; @@ -83,12 +81,8 @@ public void setUp() startBrokers(1); startServers(1); - List allTables = ImmutableList.of( - PURGE_FIRST_RUN_TABLE, - PURGE_DELTA_PASSED_TABLE, - PURGE_DELTA_NOT_PASSED_TABLE, - PURGE_OLD_SEGMENTS_WITH_NEW_INDICES_TABLE - ); + List allTables = List.of(PURGE_FIRST_RUN_TABLE, PURGE_DELTA_PASSED_TABLE, PURGE_DELTA_NOT_PASSED_TABLE, + PURGE_OLD_SEGMENTS_WITH_NEW_INDICES_TABLE); Schema schema = null; TableConfig tableConfig = null; for (String tableName : allTables) { @@ -152,12 +146,9 @@ public void setUp() private void setRecordPurger() { MinionContext minionContext = MinionContext.getInstance(); minionContext.setRecordPurgerFactory(rawTableName -> { - List tableNames = Arrays.asList( - PURGE_FIRST_RUN_TABLE, - PURGE_DELTA_PASSED_TABLE, - PURGE_DELTA_NOT_PASSED_TABLE, - PURGE_OLD_SEGMENTS_WITH_NEW_INDICES_TABLE - ); + List tableNames = + Arrays.asList(PURGE_FIRST_RUN_TABLE, PURGE_DELTA_PASSED_TABLE, PURGE_DELTA_NOT_PASSED_TABLE, + PURGE_OLD_SEGMENTS_WITH_NEW_INDICES_TABLE); if (tableNames.contains(rawTableName)) { return row -> row.getValue("ArrTime").equals(1); } else { @@ -195,11 +186,12 @@ public void testFirstRunPurge() // 5. Check the purge process itself by setting an expecting number of rows String offlineTableName = TableNameBuilder.OFFLINE.tableNameWithType(PURGE_FIRST_RUN_TABLE); - assertNotNull(_taskManager.scheduleTasks(offlineTableName).get(MinionConstants.PurgeTask.TASK_TYPE)); + assertNotNull( + _taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.PurgeTask.TASK_TYPE)); assertTrue(_helixTaskResourceManager.getTaskQueues() .contains(PinotHelixTaskResourceManager.getHelixJobQueueName(MinionConstants.PurgeTask.TASK_TYPE))); // Will not schedule task if there's incomplete task - assertNull(_taskManager.scheduleTasks(offlineTableName).get(MinionConstants.PurgeTask.TASK_TYPE)); + assertNull(_taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.PurgeTask.TASK_TYPE)); waitForTaskToComplete(); // Check that metadata contains expected values @@ -209,7 +201,7 @@ public void testFirstRunPurge() metadata.getCustomMap().containsKey(MinionConstants.PurgeTask.TASK_TYPE + MinionConstants.TASK_TIME_SUFFIX)); } // Should not generate new purge task as the last time purge is not greater than last + 1day (default purge delay) - assertNull(_taskManager.scheduleTasks(offlineTableName).get(MinionConstants.PurgeTask.TASK_TYPE)); + assertNull(_taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.PurgeTask.TASK_TYPE)); // 52 rows with ArrTime = 1 // 115545 totals rows @@ -239,11 +231,12 @@ public void testPassedDelayTimePurge() // 5. Check the purge process itself by setting an expecting number of rows String offlineTableName = TableNameBuilder.OFFLINE.tableNameWithType(PURGE_DELTA_PASSED_TABLE); - assertNotNull(_taskManager.scheduleTasks(offlineTableName).get(MinionConstants.PurgeTask.TASK_TYPE)); + assertNotNull( + _taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.PurgeTask.TASK_TYPE)); assertTrue(_helixTaskResourceManager.getTaskQueues() .contains(PinotHelixTaskResourceManager.getHelixJobQueueName(MinionConstants.PurgeTask.TASK_TYPE))); // Will not schedule task if there's incomplete task - assertNull(_taskManager.scheduleTasks(offlineTableName).get(MinionConstants.PurgeTask.TASK_TYPE)); + assertNull(_taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.PurgeTask.TASK_TYPE)); waitForTaskToComplete(); // Check that metadata contains expected values @@ -255,7 +248,7 @@ public void testPassedDelayTimePurge() assertTrue(System.currentTimeMillis() - Long.parseLong(purgeTime) < 86400000); } // Should not generate new purge task as the last time purge is not greater than last + 1day (default purge delay) - assertNull(_taskManager.scheduleTasks(offlineTableName).get(MinionConstants.PurgeTask.TASK_TYPE)); + assertNull(_taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.PurgeTask.TASK_TYPE)); // 52 rows with ArrTime = 1 // 115545 totals rows @@ -287,7 +280,7 @@ public void testNotPassedDelayTimePurge() String offlineTableName = TableNameBuilder.OFFLINE.tableNameWithType(PURGE_DELTA_NOT_PASSED_TABLE); // No task should be schedule as the delay is not passed - assertNull(_taskManager.scheduleTasks(offlineTableName).get(MinionConstants.PurgeTask.TASK_TYPE)); + assertNull(_taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.PurgeTask.TASK_TYPE)); for (SegmentZKMetadata metadata : _pinotHelixResourceManager.getSegmentsZKMetadata(offlineTableName)) { // Check purge time String purgeTime = @@ -338,10 +331,11 @@ public void testPurgeOnOldSegmentsWithIndicesOnNewColumns() // schedule purge tasks String offlineTableName = TableNameBuilder.OFFLINE.tableNameWithType(PURGE_OLD_SEGMENTS_WITH_NEW_INDICES_TABLE); - assertNotNull(_taskManager.scheduleTasks(offlineTableName).get(MinionConstants.PurgeTask.TASK_TYPE)); + assertNotNull( + _taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.PurgeTask.TASK_TYPE)); assertTrue(_helixTaskResourceManager.getTaskQueues() .contains(PinotHelixTaskResourceManager.getHelixJobQueueName(MinionConstants.PurgeTask.TASK_TYPE))); - assertNull(_taskManager.scheduleTasks(offlineTableName).get(MinionConstants.PurgeTask.TASK_TYPE)); + assertNull(_taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.PurgeTask.TASK_TYPE)); waitForTaskToComplete(); // Check that metadata contains expected values diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java index 043c654ef77a..e6c8ce270030 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java @@ -134,14 +134,14 @@ public void setUp() Map taskConfigsWithMetadata = new HashMap<>(); taskConfigsWithMetadata.put(BatchConfigProperties.OVERWRITE_OUTPUT, "true"); - taskConfigsWithMetadata.put( - BatchConfigProperties.PUSH_MODE, BatchConfigProperties.SegmentPushType.METADATA.toString()); + taskConfigsWithMetadata.put(BatchConfigProperties.PUSH_MODE, + BatchConfigProperties.SegmentPushType.METADATA.toString()); String tableWithMetadataPush = "myTable2"; schema.setSchemaName(tableWithMetadataPush); addSchema(schema); TableConfig realtimeMetadataTableConfig = createRealtimeTableConfig(avroFiles.get(0), tableWithMetadataPush, - new TableTaskConfig(Collections.singletonMap( - MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE, taskConfigsWithMetadata))); + new TableTaskConfig(Collections.singletonMap(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE, + taskConfigsWithMetadata))); realtimeMetadataTableConfig.setIngestionConfig(ingestionConfig); realtimeMetadataTableConfig.setFieldConfigList(Collections.singletonList(tsFieldConfig)); addTableConfig(realtimeMetadataTableConfig); @@ -151,7 +151,6 @@ public void setUp() offlineMetadataTableConfig.setFieldConfigList(Collections.singletonList(tsFieldConfig)); addTableConfig(offlineMetadataTableConfig); - // Push data into Kafka pushAvroIntoKafka(avroFiles); @@ -163,7 +162,6 @@ public void setUp() waitForDocsLoaded(600_000L, true, tableWithMetadataPush); - _taskResourceManager = _controllerStarter.getHelixTaskResourceManager(); _taskManager = _controllerStarter.getTaskManager(); _realtimeTableName = TableNameBuilder.REALTIME.tableNameWithType(getTableName()); @@ -181,8 +179,8 @@ public void setUp() } _dataSmallestTimeMs = minSegmentTimeMs; - segmentsZKMetadata = _helixResourceManager.getSegmentsZKMetadata(_realtimeMetadataTableName); - minSegmentTimeMs = Long.MAX_VALUE; + segmentsZKMetadata = _helixResourceManager.getSegmentsZKMetadata(_realtimeMetadataTableName); + minSegmentTimeMs = Long.MAX_VALUE; for (SegmentZKMetadata segmentZKMetadata : segmentsZKMetadata) { if (segmentZKMetadata.getStatus() == CommonConstants.Segment.Realtime.Status.DONE) { minSegmentTimeMs = Math.min(minSegmentTimeMs, segmentZKMetadata.getStartTimeMs()); @@ -193,29 +191,28 @@ public void setUp() private TableConfig createOfflineTableConfig(String tableName, @Nullable TableTaskConfig taskConfig, @Nullable SegmentPartitionConfig partitionConfig) { - return new TableConfigBuilder(TableType.OFFLINE).setTableName(tableName) - .setTimeColumnName(getTimeColumnName()).setSortedColumn(getSortedColumn()) - .setInvertedIndexColumns(getInvertedIndexColumns()).setNoDictionaryColumns(getNoDictionaryColumns()) - .setRangeIndexColumns(getRangeIndexColumns()).setBloomFilterColumns(getBloomFilterColumns()) - .setFieldConfigList(getFieldConfigs()).setNumReplicas(getNumReplicas()).setSegmentVersion(getSegmentVersion()) - .setLoadMode(getLoadMode()).setTaskConfig(taskConfig).setBrokerTenant(getBrokerTenant()) - .setServerTenant(getServerTenant()).setIngestionConfig(getIngestionConfig()) - .setNullHandlingEnabled(getNullHandlingEnabled()).setSegmentPartitionConfig(partitionConfig).build(); + return new TableConfigBuilder(TableType.OFFLINE).setTableName(tableName).setTimeColumnName(getTimeColumnName()) + .setSortedColumn(getSortedColumn()).setInvertedIndexColumns(getInvertedIndexColumns()) + .setNoDictionaryColumns(getNoDictionaryColumns()).setRangeIndexColumns(getRangeIndexColumns()) + .setBloomFilterColumns(getBloomFilterColumns()).setFieldConfigList(getFieldConfigs()) + .setNumReplicas(getNumReplicas()).setSegmentVersion(getSegmentVersion()).setLoadMode(getLoadMode()) + .setTaskConfig(taskConfig).setBrokerTenant(getBrokerTenant()).setServerTenant(getServerTenant()) + .setIngestionConfig(getIngestionConfig()).setNullHandlingEnabled(getNullHandlingEnabled()) + .setSegmentPartitionConfig(partitionConfig).build(); } protected TableConfig createRealtimeTableConfig(File sampleAvroFile, String tableName, TableTaskConfig taskConfig) { AvroFileSchemaKafkaAvroMessageDecoder._avroFile = sampleAvroFile; - return new TableConfigBuilder(TableType.REALTIME).setTableName(tableName) - .setTimeColumnName(getTimeColumnName()).setSortedColumn(getSortedColumn()) - .setInvertedIndexColumns(getInvertedIndexColumns()).setNoDictionaryColumns(getNoDictionaryColumns()) - .setRangeIndexColumns(getRangeIndexColumns()).setBloomFilterColumns(getBloomFilterColumns()) - .setFieldConfigList(getFieldConfigs()).setNumReplicas(getNumReplicas()).setSegmentVersion(getSegmentVersion()) - .setLoadMode(getLoadMode()).setTaskConfig(taskConfig).setBrokerTenant(getBrokerTenant()) - .setServerTenant(getServerTenant()).setIngestionConfig(getIngestionConfig()).setQueryConfig(getQueryConfig()) - .setStreamConfigs(getStreamConfigs()).setNullHandlingEnabled(getNullHandlingEnabled()).build(); + return new TableConfigBuilder(TableType.REALTIME).setTableName(tableName).setTimeColumnName(getTimeColumnName()) + .setSortedColumn(getSortedColumn()).setInvertedIndexColumns(getInvertedIndexColumns()) + .setNoDictionaryColumns(getNoDictionaryColumns()).setRangeIndexColumns(getRangeIndexColumns()) + .setBloomFilterColumns(getBloomFilterColumns()).setFieldConfigList(getFieldConfigs()) + .setNumReplicas(getNumReplicas()).setSegmentVersion(getSegmentVersion()).setLoadMode(getLoadMode()) + .setTaskConfig(taskConfig).setBrokerTenant(getBrokerTenant()).setServerTenant(getServerTenant()) + .setIngestionConfig(getIngestionConfig()).setQueryConfig(getQueryConfig()).setStreamConfigs(getStreamConfigs()) + .setNullHandlingEnabled(getNullHandlingEnabled()).build(); } - @Test public void testRealtimeToOfflineSegmentsTask() throws Exception { @@ -234,12 +231,12 @@ public void testRealtimeToOfflineSegmentsTask() long expectedWatermark = _dataSmallestTimeMs + 86400000; for (int i = 0; i < 3; i++) { // Schedule task - assertNotNull(_taskManager.scheduleTasks(_realtimeTableName) + assertNotNull(_taskManager.scheduleAllTasksForTable(_realtimeTableName, null) .get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); assertTrue(_taskResourceManager.getTaskQueues().contains( PinotHelixTaskResourceManager.getHelixJobQueueName(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE))); // Should not generate more tasks - assertNull(_taskManager.scheduleTasks(_realtimeTableName) + assertNull(_taskManager.scheduleAllTasksForTable(_realtimeTableName, null) .get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); // Wait at most 600 seconds for all tasks COMPLETED @@ -286,12 +283,12 @@ public void testRealtimeToOfflineSegmentsMetadataPushTask() _taskManager.cleanUpTask(); for (int i = 0; i < 3; i++) { // Schedule task - assertNotNull(_taskManager.scheduleTasks(_realtimeMetadataTableName) + assertNotNull(_taskManager.scheduleAllTasksForTable(_realtimeMetadataTableName, null) .get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); assertTrue(_taskResourceManager.getTaskQueues().contains( PinotHelixTaskResourceManager.getHelixJobQueueName(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE))); // Should not generate more tasks - assertNull(_taskManager.scheduleTasks(_realtimeMetadataTableName) + assertNull(_taskManager.scheduleAllTasksForTable(_realtimeMetadataTableName, null) .get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); // Wait at most 600 seconds for all tasks COMPLETED diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/SimpleMinionClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/SimpleMinionClusterIntegrationTest.java index 241c1c0876ff..78aa4d1c2470 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/SimpleMinionClusterIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/SimpleMinionClusterIntegrationTest.java @@ -87,8 +87,8 @@ public void setUp() properties.put(TASK_TYPE + MinionConstants.MAX_ATTEMPTS_PER_TASK_KEY_SUFFIX, "2"); helixResourceManager.getHelixAdmin().setConfig( - new HelixConfigScopeBuilder(HelixConfigScope.ConfigScopeProperty.CLUSTER) - .forCluster(helixResourceManager.getHelixClusterName()).build(), properties); + new HelixConfigScopeBuilder(HelixConfigScope.ConfigScopeProperty.CLUSTER).forCluster( + helixResourceManager.getHelixClusterName()).build(), properties); // Add 3 offline tables, where 2 of them have TestTask enabled addDummySchema(TABLE_NAME_1); @@ -136,7 +136,7 @@ public void testStopResumeDeleteTaskQueue() { assertEquals(_helixTaskResourceManager.getTasksInProgress(TASK_TYPE).size(), 0); // Should create the task queues and generate a task in the same minion instance - List task1 = _taskManager.scheduleTasks().get(TASK_TYPE); + List task1 = _taskManager.scheduleAllTasksForAllTables(null).get(TASK_TYPE); assertNotNull(task1); assertEquals(task1.size(), 1); assertTrue(_helixTaskResourceManager.getTaskQueues() @@ -150,7 +150,7 @@ public void testStopResumeDeleteTaskQueue() { verifyTaskCount(task1.get(0), 0, 1, 1, 2); // Should generate one more task, with two sub-tasks. Both of these sub-tasks will wait // since we have one minion instance that is still running one of the sub-tasks. - List task2 = _taskManager.scheduleTask(TASK_TYPE, null); + List task2 = _taskManager.scheduleTaskForAllTables(TASK_TYPE, null); assertNotNull(task2); assertEquals(task2.size(), 1); assertTrue(_helixTaskResourceManager.getTasksInProgress(TASK_TYPE).contains(task2.get(0))); @@ -159,8 +159,8 @@ public void testStopResumeDeleteTaskQueue() { // Should not generate more tasks since SimpleMinionClusterIntegrationTests.NUM_TASKS is 2. // Our test task generator does not generate if there are already this many sub-tasks in the // running+waiting count already. - assertNull(_taskManager.scheduleTasks().get(TASK_TYPE)); - assertNull(_taskManager.scheduleTask(TASK_TYPE, null)); + assertNull(_taskManager.scheduleAllTasksForAllTables(null).get(TASK_TYPE)); + assertNull(_taskManager.scheduleTaskForAllTables(TASK_TYPE, null)); // Wait at most 60 seconds for all tasks IN_PROGRESS TestUtils.waitForCondition(input -> { @@ -183,13 +183,12 @@ public void testStopResumeDeleteTaskQueue() { String inProgressGauge = TASK_TYPE + "." + TaskState.IN_PROGRESS; String stoppedGauge = TASK_TYPE + "." + TaskState.STOPPED; String completedGauge = TASK_TYPE + "." + TaskState.COMPLETED; - TestUtils.waitForCondition( - input -> MetricValueUtils.getGlobalGaugeValue(controllerMetrics, inProgressGauge, ControllerGauge.TASK_STATUS) + TestUtils.waitForCondition(input -> + MetricValueUtils.getGlobalGaugeValue(controllerMetrics, inProgressGauge, ControllerGauge.TASK_STATUS) == NUM_TASKS && MetricValueUtils.getGlobalGaugeValue(controllerMetrics, stoppedGauge, ControllerGauge.TASK_STATUS) == 0 && MetricValueUtils.getGlobalGaugeValue(controllerMetrics, completedGauge, ControllerGauge.TASK_STATUS) - == 0, - ZK_CALLBACK_TIMEOUT_MS, "Failed to update the controller gauges"); + == 0, ZK_CALLBACK_TIMEOUT_MS, "Failed to update the controller gauges"); // Stop the task queue _helixTaskResourceManager.stopTaskQueue(TASK_TYPE); @@ -211,14 +210,12 @@ public void testStopResumeDeleteTaskQueue() { }, STATE_TRANSITION_TIMEOUT_MS, "Failed to get all tasks STOPPED"); // Wait at most 30 seconds for ZK callback to update the controller gauges - TestUtils.waitForCondition( - input -> MetricValueUtils.getGlobalGaugeValue(controllerMetrics, inProgressGauge, ControllerGauge.TASK_STATUS) - == 0 + TestUtils.waitForCondition(input -> + MetricValueUtils.getGlobalGaugeValue(controllerMetrics, inProgressGauge, ControllerGauge.TASK_STATUS) == 0 && MetricValueUtils.getGlobalGaugeValue(controllerMetrics, stoppedGauge, ControllerGauge.TASK_STATUS) == NUM_TASKS && MetricValueUtils.getGlobalGaugeValue(controllerMetrics, completedGauge, ControllerGauge.TASK_STATUS) - == 0, - ZK_CALLBACK_TIMEOUT_MS, "Failed to update the controller gauges"); + == 0, ZK_CALLBACK_TIMEOUT_MS, "Failed to update the controller gauges"); // Task deletion requires the task queue to be stopped, // so deleting task1 here before resuming the task queue. @@ -247,13 +244,11 @@ public void testStopResumeDeleteTaskQueue() { }, STATE_TRANSITION_TIMEOUT_MS, "Failed to get all tasks COMPLETED"); // Wait at most 30 seconds for ZK callback to update the controller gauges - TestUtils.waitForCondition( - input -> MetricValueUtils.getGlobalGaugeValue(controllerMetrics, inProgressGauge, ControllerGauge.TASK_STATUS) - == 0 + TestUtils.waitForCondition(input -> + MetricValueUtils.getGlobalGaugeValue(controllerMetrics, inProgressGauge, ControllerGauge.TASK_STATUS) == 0 && MetricValueUtils.getGlobalGaugeValue(controllerMetrics, stoppedGauge, ControllerGauge.TASK_STATUS) == 0 - && MetricValueUtils.getGlobalGaugeValue(controllerMetrics, completedGauge, ControllerGauge.TASK_STATUS) - == (NUM_TASKS - 1), - ZK_CALLBACK_TIMEOUT_MS, "Failed to update the controller gauges"); + && MetricValueUtils.getGlobalGaugeValue(controllerMetrics, completedGauge, ControllerGauge.TASK_STATUS) == ( + NUM_TASKS - 1), ZK_CALLBACK_TIMEOUT_MS, "Failed to update the controller gauges"); // Delete the task queue _helixTaskResourceManager.deleteTaskQueue(TASK_TYPE, false); @@ -263,13 +258,11 @@ public void testStopResumeDeleteTaskQueue() { STATE_TRANSITION_TIMEOUT_MS, "Failed to delete the task queue"); // Wait at most 30 seconds for ZK callback to update the controller gauges - TestUtils.waitForCondition( - input -> MetricValueUtils.getGlobalGaugeValue(controllerMetrics, inProgressGauge, ControllerGauge.TASK_STATUS) - == 0 + TestUtils.waitForCondition(input -> + MetricValueUtils.getGlobalGaugeValue(controllerMetrics, inProgressGauge, ControllerGauge.TASK_STATUS) == 0 && MetricValueUtils.getGlobalGaugeValue(controllerMetrics, stoppedGauge, ControllerGauge.TASK_STATUS) == 0 && MetricValueUtils.getGlobalGaugeValue(controllerMetrics, completedGauge, ControllerGauge.TASK_STATUS) - == 0, - ZK_CALLBACK_TIMEOUT_MS, "Failed to update the controller gauges"); + == 0, ZK_CALLBACK_TIMEOUT_MS, "Failed to update the controller gauges"); } @AfterClass diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/TlsIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/TlsIntegrationTest.java index d292ef4c9bd8..5058fd4b759f 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/TlsIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/TlsIntegrationTest.java @@ -489,7 +489,7 @@ public void testRealtimeSegmentUploadDownload() Assert.assertTrue(resultBeforeOffline.getResultSet(0).getLong(0) > 0); // schedule offline segment generation - Assert.assertNotNull(_controllerStarter.getTaskManager().scheduleTasks()); + Assert.assertNotNull(_controllerStarter.getTaskManager().scheduleAllTasksForAllTables(null)); // wait for offline segments JsonNode offlineSegments = TestUtils.waitForResult(() -> { diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UpsertTableIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UpsertTableIntegrationTest.java index 238d515b54b8..19c3ac61ff9d 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UpsertTableIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UpsertTableIntegrationTest.java @@ -471,8 +471,8 @@ public void testUpsertCompaction() waitForAllDocsLoaded(tableName, 600_000L, 1000); assertEquals(getScore(tableName), 3692); waitForNumQueriedSegmentsToConverge(tableName, 10_000L, 3); - - assertNotNull(_taskManager.scheduleTasks(TableNameBuilder.forType(TableType.REALTIME).tableNameWithType(tableName)) + String realtimeTableName = TableNameBuilder.forType(TableType.REALTIME).tableNameWithType(tableName); + assertNotNull(_taskManager.scheduleAllTasksForTable(realtimeTableName, null) .get(MinionConstants.UpsertCompactionTask.TASK_TYPE)); waitForTaskToComplete(); waitForAllDocsLoaded(tableName, 600_000L, 3); @@ -501,8 +501,8 @@ public void testUpsertCompactionDeletesSegments() waitForAllDocsLoaded(tableName, 600_000L, 2000); assertEquals(getScore(tableName), 3692); waitForNumQueriedSegmentsToConverge(tableName, 10_000L, 5); - - assertNotNull(_taskManager.scheduleTasks(TableNameBuilder.forType(TableType.REALTIME).tableNameWithType(tableName)) + String realtimeTableName = TableNameBuilder.forType(TableType.REALTIME).tableNameWithType(tableName); + assertNotNull(_taskManager.scheduleAllTasksForTable(realtimeTableName, null) .get(MinionConstants.UpsertCompactionTask.TASK_TYPE)); waitForTaskToComplete(); waitForAllDocsLoaded(tableName, 600_000L, 3); @@ -546,7 +546,8 @@ public void testUpsertCompactionWithSoftDelete() // Run segment compaction. This time, we expect that the deleting rows are still there because they are // as part of the consuming segment - assertNotNull(_taskManager.scheduleTasks(TableNameBuilder.forType(TableType.REALTIME).tableNameWithType(tableName)) + String realtimeTableName = TableNameBuilder.forType(TableType.REALTIME).tableNameWithType(tableName); + assertNotNull(_taskManager.scheduleAllTasksForTable(realtimeTableName, null) .get(MinionConstants.UpsertCompactionTask.TASK_TYPE)); waitForTaskToComplete(); waitForAllDocsLoaded(tableName, 600_000L, 3); @@ -563,7 +564,8 @@ public void testUpsertCompactionWithSoftDelete() assertEquals(getNumDeletedRows(tableName), 2); // Run segment compaction. This time, we expect that the deleting rows are cleaned up - assertNotNull(_taskManager.scheduleTasks(TableNameBuilder.forType(TableType.REALTIME).tableNameWithType(tableName)) + realtimeTableName = TableNameBuilder.forType(TableType.REALTIME).tableNameWithType(tableName); + assertNotNull(_taskManager.scheduleAllTasksForTable(realtimeTableName, null) .get(MinionConstants.UpsertCompactionTask.TASK_TYPE)); waitForTaskToComplete(); waitForAllDocsLoaded(tableName, 600_000L, 3); diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UrlAuthRealtimeIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UrlAuthRealtimeIntegrationTest.java index e8389b377f59..08aa9aee6afc 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UrlAuthRealtimeIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UrlAuthRealtimeIntegrationTest.java @@ -203,7 +203,7 @@ public void testSegmentUploadDownload() Assert.assertTrue(resultBeforeOffline.getResultSet(0).getLong(0) > 0); // schedule offline segment generation - Assert.assertNotNull(_controllerStarter.getTaskManager().scheduleTasks()); + Assert.assertNotNull(_controllerStarter.getTaskManager().scheduleAllTasksForAllTables(null)); // wait for offline segments JsonNode offlineSegments = TestUtils.waitForResult(() -> { From f83e466c4205844f87e4b4c77e063c901d69f94f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Apr 2024 09:12:13 -0700 Subject: [PATCH 056/102] Bump org.roaringbitmap:RoaringBitmap from 1.0.5 to 1.0.6 (#12985) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 932dcdf04d85..1ba3aba56844 100644 --- a/pom.xml +++ b/pom.xml @@ -457,7 +457,7 @@ org.roaringbitmap RoaringBitmap - 1.0.5 + 1.0.6 com.101tec From 7b68aa369d9b8304860fd2b7679111a41eeadb46 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Apr 2024 09:12:30 -0700 Subject: [PATCH 057/102] Bump aws.sdk.version from 2.25.34 to 2.25.35 (#12984) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 1ba3aba56844..79df63429c83 100644 --- a/pom.xml +++ b/pom.xml @@ -172,7 +172,7 @@ 0.15.0 0.4.4 4.2.2 - 2.25.34 + 2.25.35 2.12.7 3.1.12 7.10.1 From c9d513aef03774b5ea92020cf05b37f7ee7c72f2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Apr 2024 10:19:50 -0700 Subject: [PATCH 058/102] Bump org.apache.maven.plugins:maven-jar-plugin from 3.4.0 to 3.4.1 (#12983) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 79df63429c83..a4a83c8b287a 100644 --- a/pom.xml +++ b/pom.xml @@ -130,7 +130,7 @@ org.apache.pinot.shaded - 3.4.0 + 3.4.1 3.5.2 none From a852c8a42446cb2d798f67cc6bd133b603b7b99f Mon Sep 17 00:00:00 2001 From: Yash Mayya Date: Mon, 22 Apr 2024 22:51:11 +0530 Subject: [PATCH 059/102] Update ORC and Hive dependency versions in the license binary file (#12986) --- LICENSE-binary | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index 9ffb5b19eb2c..5944cc2bba50 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -420,7 +420,7 @@ org.apache.helix:helix-core:1.3.1 org.apache.helix:metadata-store-directory-common:1.3.1 org.apache.helix:metrics-common:1.3.1 org.apache.helix:zookeeper-api:1.3.1 -org.apache.hive:hive-storage-api:2.7.1 +org.apache.hive:hive-storage-api:2.8.1 org.apache.httpcomponents:httpclient:4.5.14 org.apache.httpcomponents:httpcore:4.4.13 org.apache.httpcomponents:httpmime:4.5.13 @@ -438,8 +438,8 @@ org.apache.lucene:lucene-core:9.8.0 org.apache.lucene:lucene-queries:9.8.0 org.apache.lucene:lucene-queryparser:9.8.0 org.apache.lucene:lucene-sandbox:9.8.0 -org.apache.orc:orc-core:1.5.9 -org.apache.orc:orc-shims:1.5.9 +org.apache.orc:orc-core:1.9.3 +org.apache.orc:orc-shims:1.9.3 org.apache.parquet:parquet-avro:1.13.1 org.apache.parquet:parquet-column:1.13.1 org.apache.parquet:parquet-common:1.13.1 From a5c728f549fe1be5560a88080caaa2063def3d87 Mon Sep 17 00:00:00 2001 From: Xiang Fu Date: Tue, 23 Apr 2024 02:58:07 +0800 Subject: [PATCH 060/102] Add back profile for shade (#12979) --- pinot-clients/pinot-jdbc-client/pom.xml | 15 ++++++++++++++- pinot-common/pom.xml | 11 ++++++++++- pinot-core/pom.xml | 11 +++++++++++ pinot-plugins/pinot-file-system/pinot-s3/pom.xml | 16 +++++++++++++++- .../pinot-stream-ingestion/pinot-kinesis/pom.xml | 16 +++++++++++++++- pinot-spi/pom.xml | 11 +++++++++++ 6 files changed, 76 insertions(+), 4 deletions(-) diff --git a/pinot-clients/pinot-jdbc-client/pom.xml b/pinot-clients/pinot-jdbc-client/pom.xml index 08c3880a483f..6ffc2fa19ae3 100644 --- a/pinot-clients/pinot-jdbc-client/pom.xml +++ b/pinot-clients/pinot-jdbc-client/pom.xml @@ -33,7 +33,6 @@ https://pinot.apache.org/ ${basedir}/../.. - package @@ -82,4 +81,18 @@ jsr305 + + + build-shaded-jar + + + skipShade + !true + + + + package + + + diff --git a/pinot-common/pom.xml b/pinot-common/pom.xml index 16e5b6d41bb9..2381f024d46b 100644 --- a/pinot-common/pom.xml +++ b/pinot-common/pom.xml @@ -33,7 +33,6 @@ https://pinot.apache.org/ ${basedir}/.. - package @@ -414,5 +413,15 @@ + + + build-shaded-jar + + true + + + package + + diff --git a/pinot-core/pom.xml b/pinot-core/pom.xml index 6aa29f7d8e2d..6d00a98d75ab 100644 --- a/pinot-core/pom.xml +++ b/pinot-core/pom.xml @@ -179,4 +179,15 @@ + + + build-shaded-jar + + false + + + package + + + diff --git a/pinot-plugins/pinot-file-system/pinot-s3/pom.xml b/pinot-plugins/pinot-file-system/pinot-s3/pom.xml index bd650eadf229..0f4d2eea78f0 100644 --- a/pinot-plugins/pinot-file-system/pinot-s3/pom.xml +++ b/pinot-plugins/pinot-file-system/pinot-s3/pom.xml @@ -36,7 +36,6 @@ ${basedir}/../../.. 2.12.2 - package @@ -65,4 +64,19 @@ test + + + + build-shaded-jar + + + skipShade + !true + + + + package + + + diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml index 916a617504af..d58e3313ecaa 100644 --- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml +++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml @@ -35,7 +35,6 @@ https://pinot.apache.org/ ${basedir}/../../.. - package 1.0.2 0.2.23 @@ -133,4 +132,19 @@ + + + + build-shaded-jar + + + skipShade + !true + + + + package + + + diff --git a/pinot-spi/pom.xml b/pinot-spi/pom.xml index 43ae753a6d8d..baa63f0edc08 100644 --- a/pinot-spi/pom.xml +++ b/pinot-spi/pom.xml @@ -179,4 +179,15 @@ reflections + + + build-shaded-jar + + false + + + package + + + From 8e103205955e8af4fe286ebd6e97b30605724be2 Mon Sep 17 00:00:00 2001 From: Xiaobing <61892277+klsince@users.noreply.github.com> Date: Mon, 22 Apr 2024 16:41:46 -0700 Subject: [PATCH 061/102] handle absent segments so that catchup checker doesn't get stuck on them (#12883) * skip missing segments while checking freshness during server startup * get new consuming segments again if current consuming segments are committed by other servers --- .../starter/helix/BaseServerStarter.java | 71 ++++++---- ...reshnessBasedConsumptionStatusChecker.java | 7 +- ...ngestionBasedConsumptionStatusChecker.java | 128 ++++++++++++------ .../OffsetBasedConsumptionStatusChecker.java | 7 +- .../ConsumptionStatusCheckerTestUtils.java | 38 ++++++ ...nessBasedConsumptionStatusCheckerTest.java | 103 ++++++++++++-- ...fsetBasedConsumptionStatusCheckerTest.java | 32 +++-- 7 files changed, 288 insertions(+), 98 deletions(-) create mode 100644 pinot-server/src/test/java/org/apache/pinot/server/starter/helix/ConsumptionStatusCheckerTestUtils.java diff --git a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/BaseServerStarter.java b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/BaseServerStarter.java index 02c7b81ea5eb..78cd1a14e77d 100644 --- a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/BaseServerStarter.java +++ b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/BaseServerStarter.java @@ -25,6 +25,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; @@ -153,8 +154,8 @@ public void init(PinotConfiguration serverConf) _helixClusterName = _serverConf.getProperty(CommonConstants.Helix.CONFIG_OF_CLUSTER_NAME); ServiceStartableUtils.applyClusterConfig(_serverConf, _zkAddress, _helixClusterName, ServiceRole.SERVER); - PinotInsecureMode.setPinotInInsecureMode( - Boolean.valueOf(_serverConf.getProperty(CommonConstants.CONFIG_OF_PINOT_INSECURE_MODE, + PinotInsecureMode.setPinotInInsecureMode(Boolean.parseBoolean( + _serverConf.getProperty(CommonConstants.CONFIG_OF_PINOT_INSECURE_MODE, CommonConstants.DEFAULT_PINOT_INSECURE_MODE))); setupHelixSystemProperties(); @@ -275,8 +276,7 @@ private void registerServiceStatusHandler() { // collect all resources which have this instance in the ideal state List resourcesToMonitor = new ArrayList<>(); - - Set consumingSegments = new HashSet<>(); + Map> consumingSegments = new HashMap<>(); boolean checkRealtime = realtimeConsumptionCatchupWaitMs > 0; if (isFreshnessStatusCheckerEnabled && realtimeMinFreshnessMs <= 0) { LOGGER.warn("Realtime min freshness {} must be > 0. Setting relatime min freshness to default {}.", @@ -289,23 +289,22 @@ private void registerServiceStatusHandler() { if (!TableNameBuilder.isTableResource(resourceName)) { continue; } - // Only monitor enabled resources IdealState idealState = _helixAdmin.getResourceIdealState(_helixClusterName, resourceName); - if (idealState.isEnabled()) { - - for (String partitionName : idealState.getPartitionSet()) { - if (idealState.getInstanceSet(partitionName).contains(_instanceId)) { - resourcesToMonitor.add(resourceName); - break; - } + if (idealState == null || !idealState.isEnabled()) { + continue; + } + for (String partitionName : idealState.getPartitionSet()) { + if (idealState.getInstanceSet(partitionName).contains(_instanceId)) { + resourcesToMonitor.add(resourceName); + break; } - if (checkRealtime && TableNameBuilder.isRealtimeTableResource(resourceName)) { - for (String partitionName : idealState.getPartitionSet()) { - if (StateModel.SegmentStateModel.CONSUMING.equals( - idealState.getInstanceStateMap(partitionName).get(_instanceId))) { - consumingSegments.add(partitionName); - } + } + if (checkRealtime && TableNameBuilder.isRealtimeTableResource(resourceName)) { + for (String partitionName : idealState.getPartitionSet()) { + if (StateModel.SegmentStateModel.CONSUMING.equals( + idealState.getInstanceStateMap(partitionName).get(_instanceId))) { + consumingSegments.computeIfAbsent(resourceName, k -> new HashSet<>()).add(partitionName); } } } @@ -332,7 +331,7 @@ private void registerServiceStatusHandler() { realtimeMinFreshnessMs, idleTimeoutMs); FreshnessBasedConsumptionStatusChecker freshnessStatusChecker = new FreshnessBasedConsumptionStatusChecker(_serverInstance.getInstanceDataManager(), consumingSegments, - realtimeMinFreshnessMs, idleTimeoutMs); + this::getConsumingSegments, realtimeMinFreshnessMs, idleTimeoutMs); Supplier getNumConsumingSegmentsNotReachedMinFreshness = freshnessStatusChecker::getNumConsumingSegmentsNotReachedIngestionCriteria; serviceStatusCallbackListBuilder.add( @@ -341,7 +340,8 @@ private void registerServiceStatusHandler() { } else if (isOffsetBasedConsumptionStatusCheckerEnabled) { LOGGER.info("Setting up offset based status checker"); OffsetBasedConsumptionStatusChecker consumptionStatusChecker = - new OffsetBasedConsumptionStatusChecker(_serverInstance.getInstanceDataManager(), consumingSegments); + new OffsetBasedConsumptionStatusChecker(_serverInstance.getInstanceDataManager(), consumingSegments, + this::getConsumingSegments); Supplier getNumConsumingSegmentsNotReachedTheirLatestOffset = consumptionStatusChecker::getNumConsumingSegmentsNotReachedIngestionCriteria; serviceStatusCallbackListBuilder.add( @@ -359,6 +359,22 @@ private void registerServiceStatusHandler() { new ServiceStatus.MultipleCallbackServiceStatusCallback(serviceStatusCallbackListBuilder.build())); } + @Nullable + private Set getConsumingSegments(String realtimeTableName) { + IdealState idealState = _helixAdmin.getResourceIdealState(_helixClusterName, realtimeTableName); + if (idealState == null || !idealState.isEnabled()) { + return null; + } + Set consumingSegments = new HashSet<>(); + for (String partitionName : idealState.getPartitionSet()) { + if (StateModel.SegmentStateModel.CONSUMING.equals( + idealState.getInstanceStateMap(partitionName).get(_instanceId))) { + consumingSegments.add(partitionName); + } + } + return consumingSegments; + } + private void updateInstanceConfigIfNeeded(ServerConf serverConf) { InstanceConfig instanceConfig = HelixHelper.getInstanceConfig(_helixManager, _instanceId); @@ -518,12 +534,13 @@ private void startupServiceStatusCheck(long endTimeMs) { } } - boolean exitServerOnIncompleteStartup = _serverConf.getProperty( - Server.CONFIG_OF_EXIT_ON_SERVICE_STATUS_CHECK_FAILURE, - Server.DEFAULT_EXIT_ON_SERVICE_STATUS_CHECK_FAILURE); + boolean exitServerOnIncompleteStartup = + _serverConf.getProperty(Server.CONFIG_OF_EXIT_ON_SERVICE_STATUS_CHECK_FAILURE, + Server.DEFAULT_EXIT_ON_SERVICE_STATUS_CHECK_FAILURE); if (exitServerOnIncompleteStartup) { - String errorMessage = String.format("Service status %s has not turned GOOD within %dms: %s. Exiting server.", - serviceStatus, System.currentTimeMillis() - startTimeMs, ServiceStatus.getStatusDescription()); + String errorMessage = + String.format("Service status %s has not turned GOOD within %dms: %s. Exiting server.", serviceStatus, + System.currentTimeMillis() - startTimeMs, ServiceStatus.getStatusDescription()); throw new IllegalStateException(errorMessage); } LOGGER.warn("Service status has not turned GOOD within {}ms: {}", System.currentTimeMillis() - startTimeMs, @@ -581,8 +598,8 @@ public void start() InstanceDataManager instanceDataManager = _serverInstance.getInstanceDataManager(); instanceDataManager.setSupplierOfIsServerReadyToServeQueries(() -> _isServerReadyToServeQueries); // initialize the thread accountant for query killing - Tracing.ThreadAccountantOps - .initializeThreadAccountant(_serverConf.subset(CommonConstants.PINOT_QUERY_SCHEDULER_PREFIX), _instanceId); + Tracing.ThreadAccountantOps.initializeThreadAccountant( + _serverConf.subset(CommonConstants.PINOT_QUERY_SCHEDULER_PREFIX), _instanceId); initSegmentFetcher(_serverConf); StateModelFactory stateModelFactory = new SegmentOnlineOfflineStateModelFactory(_instanceId, instanceDataManager); diff --git a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/FreshnessBasedConsumptionStatusChecker.java b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/FreshnessBasedConsumptionStatusChecker.java index 6f3610e59623..77eac3832ed5 100644 --- a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/FreshnessBasedConsumptionStatusChecker.java +++ b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/FreshnessBasedConsumptionStatusChecker.java @@ -19,7 +19,9 @@ package org.apache.pinot.server.starter.helix; +import java.util.Map; import java.util.Set; +import java.util.function.Function; import org.apache.pinot.core.data.manager.InstanceDataManager; import org.apache.pinot.core.data.manager.realtime.RealtimeSegmentDataManager; import org.apache.pinot.spi.stream.StreamPartitionMsgOffset; @@ -37,9 +39,10 @@ public class FreshnessBasedConsumptionStatusChecker extends IngestionBasedConsum private final long _minFreshnessMs; private final long _idleTimeoutMs; - public FreshnessBasedConsumptionStatusChecker(InstanceDataManager instanceDataManager, Set consumingSegments, + public FreshnessBasedConsumptionStatusChecker(InstanceDataManager instanceDataManager, + Map> consumingSegments, Function> consumingSegmentsSupplier, long minFreshnessMs, long idleTimeoutMs) { - super(instanceDataManager, consumingSegments); + super(instanceDataManager, consumingSegments, consumingSegmentsSupplier); _minFreshnessMs = minFreshnessMs; _idleTimeoutMs = idleTimeoutMs; } diff --git a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/IngestionBasedConsumptionStatusChecker.java b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/IngestionBasedConsumptionStatusChecker.java index 83de35a63c9d..c6fe0d16d6dc 100644 --- a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/IngestionBasedConsumptionStatusChecker.java +++ b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/IngestionBasedConsumptionStatusChecker.java @@ -19,15 +19,16 @@ package org.apache.pinot.server.starter.helix; +import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; import java.util.Set; -import org.apache.pinot.common.utils.LLCSegmentName; +import java.util.function.Function; import org.apache.pinot.core.data.manager.InstanceDataManager; import org.apache.pinot.core.data.manager.realtime.RealtimeSegmentDataManager; import org.apache.pinot.segment.local.data.manager.SegmentDataManager; import org.apache.pinot.segment.local.data.manager.TableDataManager; -import org.apache.pinot.spi.config.table.TableType; -import org.apache.pinot.spi.utils.builder.TableNameBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,64 +36,103 @@ public abstract class IngestionBasedConsumptionStatusChecker { protected final Logger _logger = LoggerFactory.getLogger(getClass()); - // constructor parameters - protected final InstanceDataManager _instanceDataManager; - protected final Set _consumingSegments; - - // helper variable - private final Set _caughtUpSegments = new HashSet<>(); + private final InstanceDataManager _instanceDataManager; + private final Map> _consumingSegmentsByTable; + private final Map> _caughtUpSegmentsByTable = new HashMap<>(); + private final Function> _consumingSegmentsSupplier; + /** + * Both consumingSegmentsByTable and consumingSegmentsSupplier are provided as it can be costly to get + * consumingSegmentsByTable via the supplier, so only use it when any missing segment is detected. + */ public IngestionBasedConsumptionStatusChecker(InstanceDataManager instanceDataManager, - Set consumingSegments) { + Map> consumingSegmentsByTable, Function> consumingSegmentsSupplier) { _instanceDataManager = instanceDataManager; - _consumingSegments = consumingSegments; + _consumingSegmentsByTable = consumingSegmentsByTable; + _consumingSegmentsSupplier = consumingSegmentsSupplier; } - public int getNumConsumingSegmentsNotReachedIngestionCriteria() { - for (String segName : _consumingSegments) { - if (_caughtUpSegments.contains(segName)) { - continue; - } - TableDataManager tableDataManager = getTableDataManager(segName); + // This might be called by multiple threads, thus synchronized to be correct. + public synchronized int getNumConsumingSegmentsNotReachedIngestionCriteria() { + // If the checker found any consuming segments are missing or committed for a table, it should reset the set of + // consuming segments for the table to continue to monitor the freshness, otherwise the checker might get stuck + // on deleted segments or tables, or miss new consuming segments created in the table and get ready prematurely. + Set tablesToRefresh = new HashSet<>(); + Iterator>> itr = _consumingSegmentsByTable.entrySet().iterator(); + while (itr.hasNext()) { + Map.Entry> tableSegments = itr.next(); + String tableNameWithType = tableSegments.getKey(); + TableDataManager tableDataManager = _instanceDataManager.getTableDataManager(tableNameWithType); if (tableDataManager == null) { - _logger.info("TableDataManager is not yet setup for segment {}. Will check consumption status later", segName); + _logger.info("No tableDataManager for table: {}. Refresh table's consuming segments", tableNameWithType); + tablesToRefresh.add(tableNameWithType); continue; } - SegmentDataManager segmentDataManager = null; - try { - segmentDataManager = tableDataManager.acquireSegment(segName); - if (segmentDataManager == null) { - _logger.info("SegmentDataManager is not yet setup for segment {}. Will check consumption status later", - segName); + Set consumingSegments = tableSegments.getValue(); + Set caughtUpSegments = _caughtUpSegmentsByTable.computeIfAbsent(tableNameWithType, k -> new HashSet<>()); + for (String segName : consumingSegments) { + if (caughtUpSegments.contains(segName)) { continue; } - if (!(segmentDataManager instanceof RealtimeSegmentDataManager)) { - // There's a possibility that a consuming segment has converted to a committed segment. If that's the case, - // segment data manager will not be of type RealtimeSegmentDataManager. - _logger.info("Segment {} is already committed and is considered caught up.", segName); - _caughtUpSegments.add(segName); + SegmentDataManager segmentDataManager = tableDataManager.acquireSegment(segName); + if (segmentDataManager == null) { + _logger.info("No segmentDataManager for segment: {} from table: {}. Refresh table's consuming segments", + segName, tableNameWithType); + tablesToRefresh.add(tableNameWithType); continue; } - - RealtimeSegmentDataManager rtSegmentDataManager = (RealtimeSegmentDataManager) segmentDataManager; - if (isSegmentCaughtUp(segName, rtSegmentDataManager)) { - _caughtUpSegments.add(segName); - } - } finally { - if (segmentDataManager != null) { + try { + if (!(segmentDataManager instanceof RealtimeSegmentDataManager)) { + // It's possible that the consuming segment has been committed by another server. In this case, we should + // get the new consuming segments for the table and continue to monitor their consumption status, until the + // current server catches up the consuming segments. + _logger.info("Segment: {} from table: {} is already committed. Refresh table's consuming segments.", + segName, tableNameWithType); + tablesToRefresh.add(tableNameWithType); + continue; + } + RealtimeSegmentDataManager rtSegmentDataManager = (RealtimeSegmentDataManager) segmentDataManager; + if (isSegmentCaughtUp(segName, rtSegmentDataManager)) { + caughtUpSegments.add(segName); + } + } finally { tableDataManager.releaseSegment(segmentDataManager); } } + int numLaggingSegments = consumingSegments.size() - caughtUpSegments.size(); + if (numLaggingSegments == 0) { + _logger.info("Consuming segments from table: {} have all caught up", tableNameWithType); + itr.remove(); + _caughtUpSegmentsByTable.remove(tableNameWithType); + } + } + if (!tablesToRefresh.isEmpty()) { + for (String tableNameWithType : tablesToRefresh) { + Set updatedConsumingSegments = _consumingSegmentsSupplier.apply(tableNameWithType); + if (updatedConsumingSegments == null || updatedConsumingSegments.isEmpty()) { + _consumingSegmentsByTable.remove(tableNameWithType); + _caughtUpSegmentsByTable.remove(tableNameWithType); + _logger.info("Found no consuming segments from table: {}, which is probably removed", tableNameWithType); + } else { + _consumingSegmentsByTable.put(tableNameWithType, updatedConsumingSegments); + _caughtUpSegmentsByTable.computeIfAbsent(tableNameWithType, k -> new HashSet<>()) + .retainAll(updatedConsumingSegments); + _logger.info( + "Updated consumingSegments: {} and caughtUpSegments: {} for table: {}, as consuming segments were " + + "missing or committed", updatedConsumingSegments, _caughtUpSegmentsByTable.get(tableNameWithType), + tableNameWithType); + } + } } - return _consumingSegments.size() - _caughtUpSegments.size(); + int numLaggingSegments = 0; + for (Map.Entry> tableSegments : _consumingSegmentsByTable.entrySet()) { + String tableNameWithType = tableSegments.getKey(); + Set consumingSegments = tableSegments.getValue(); + Set caughtUpSegments = _caughtUpSegmentsByTable.computeIfAbsent(tableNameWithType, k -> new HashSet<>()); + numLaggingSegments += consumingSegments.size() - caughtUpSegments.size(); + } + return numLaggingSegments; } protected abstract boolean isSegmentCaughtUp(String segmentName, RealtimeSegmentDataManager rtSegmentDataManager); - - private TableDataManager getTableDataManager(String segmentName) { - LLCSegmentName llcSegmentName = new LLCSegmentName(segmentName); - String tableName = llcSegmentName.getTableName(); - String tableNameWithType = TableNameBuilder.forType(TableType.REALTIME).tableNameWithType(tableName); - return _instanceDataManager.getTableDataManager(tableNameWithType); - } } diff --git a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/OffsetBasedConsumptionStatusChecker.java b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/OffsetBasedConsumptionStatusChecker.java index 6b597e3fa2ac..ad7d2905baa1 100644 --- a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/OffsetBasedConsumptionStatusChecker.java +++ b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/OffsetBasedConsumptionStatusChecker.java @@ -19,7 +19,9 @@ package org.apache.pinot.server.starter.helix; +import java.util.Map; import java.util.Set; +import java.util.function.Function; import org.apache.pinot.core.data.manager.InstanceDataManager; import org.apache.pinot.core.data.manager.realtime.RealtimeSegmentDataManager; import org.apache.pinot.spi.stream.StreamPartitionMsgOffset; @@ -34,8 +36,9 @@ */ public class OffsetBasedConsumptionStatusChecker extends IngestionBasedConsumptionStatusChecker { - public OffsetBasedConsumptionStatusChecker(InstanceDataManager instanceDataManager, Set consumingSegments) { - super(instanceDataManager, consumingSegments); + public OffsetBasedConsumptionStatusChecker(InstanceDataManager instanceDataManager, + Map> consumingSegments, Function> consumingSegmentsSupplier) { + super(instanceDataManager, consumingSegments, consumingSegmentsSupplier); } @Override diff --git a/pinot-server/src/test/java/org/apache/pinot/server/starter/helix/ConsumptionStatusCheckerTestUtils.java b/pinot-server/src/test/java/org/apache/pinot/server/starter/helix/ConsumptionStatusCheckerTestUtils.java new file mode 100644 index 000000000000..ccd8f6f8558e --- /dev/null +++ b/pinot-server/src/test/java/org/apache/pinot/server/starter/helix/ConsumptionStatusCheckerTestUtils.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.server.starter.helix; + +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; + + +class ConsumptionStatusCheckerTestUtils { + private ConsumptionStatusCheckerTestUtils() { + } + + public static Function> getConsumingSegments(Map> consumingSegments) { + // Create a new Set instance to keep updates separated from the consumingSegments. + return (tableName) -> { + Set updated = consumingSegments.get(tableName); + return updated == null ? null : new HashSet<>(updated); + }; + } +} diff --git a/pinot-server/src/test/java/org/apache/pinot/server/starter/helix/FreshnessBasedConsumptionStatusCheckerTest.java b/pinot-server/src/test/java/org/apache/pinot/server/starter/helix/FreshnessBasedConsumptionStatusCheckerTest.java index 6301b54d0441..e619ba7d707a 100644 --- a/pinot-server/src/test/java/org/apache/pinot/server/starter/helix/FreshnessBasedConsumptionStatusCheckerTest.java +++ b/pinot-server/src/test/java/org/apache/pinot/server/starter/helix/FreshnessBasedConsumptionStatusCheckerTest.java @@ -20,7 +20,11 @@ package org.apache.pinot.server.starter.helix; import com.google.common.collect.ImmutableSet; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; import java.util.Set; +import java.util.function.Function; import org.apache.pinot.core.data.manager.InstanceDataManager; import org.apache.pinot.core.data.manager.offline.ImmutableSegmentDataManager; import org.apache.pinot.core.data.manager.realtime.RealtimeSegmentDataManager; @@ -42,8 +46,9 @@ private class FakeFreshnessBasedConsumptionStatusChecker extends FreshnessBasedC private final long _now; public FakeFreshnessBasedConsumptionStatusChecker(InstanceDataManager instanceDataManager, - Set consumingSegments, long minFreshnessMs, long idleTimeoutMs, long now) { - super(instanceDataManager, consumingSegments, minFreshnessMs, idleTimeoutMs); + Map> consumingSegments, Function> consumingSegmentsSupplier, + long minFreshnessMs, long idleTimeoutMs, long now) { + super(instanceDataManager, consumingSegments, consumingSegmentsSupplier, minFreshnessMs, idleTimeoutMs); _now = now; } @@ -58,10 +63,13 @@ public void regularCaseWithOffsetCatchup() { String segA0 = "tableA__0__0__123Z"; String segA1 = "tableA__1__0__123Z"; String segB0 = "tableB__0__0__123Z"; - Set consumingSegments = ImmutableSet.of(segA0, segA1, segB0); + Map> consumingSegments = new HashMap<>(); + consumingSegments.put("tableA_REALTIME", ImmutableSet.of(segA0, segA1)); + consumingSegments.put("tableB_REALTIME", ImmutableSet.of(segB0)); InstanceDataManager instanceDataManager = mock(InstanceDataManager.class); FreshnessBasedConsumptionStatusChecker statusChecker = - new FreshnessBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, 10000L, 0L); + new FreshnessBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, + ConsumptionStatusCheckerTestUtils.getConsumingSegments(consumingSegments), 10000L, 0L); // TableDataManager is not set up yet assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 3); @@ -119,6 +127,55 @@ public void regularCaseWithOffsetCatchup() { assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 0); } + @Test + public void testWithDroppedTableAndSegment() + throws InterruptedException { + String segA0 = "tableA__0__0__123Z"; + String segA1 = "tableA__1__0__123Z"; + String segB0 = "tableB__0__0__123Z"; + Map> consumingSegments = new HashMap<>(); + consumingSegments.computeIfAbsent("tableA_REALTIME", k -> new HashSet<>()).add(segA0); + consumingSegments.computeIfAbsent("tableA_REALTIME", k -> new HashSet<>()).add(segA1); + consumingSegments.computeIfAbsent("tableB_REALTIME", k -> new HashSet<>()).add(segB0); + InstanceDataManager instanceDataManager = mock(InstanceDataManager.class); + FreshnessBasedConsumptionStatusChecker statusChecker = + new FreshnessBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, + ConsumptionStatusCheckerTestUtils.getConsumingSegments(consumingSegments), 10L, 0L); + + // TableDataManager is not set up yet + assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 3); + + // setup TableDataMangers + TableDataManager tableDataManagerA = mock(TableDataManager.class); + when(instanceDataManager.getTableDataManager("tableA_REALTIME")).thenReturn(tableDataManagerA); + when(instanceDataManager.getTableDataManager("tableB_REALTIME")).thenReturn(null); + + // setup SegmentDataManagers + RealtimeSegmentDataManager segMngrA0 = mock(RealtimeSegmentDataManager.class); + when(tableDataManagerA.acquireSegment(segA0)).thenReturn(segMngrA0); + when(tableDataManagerA.acquireSegment(segA1)).thenReturn(null); + + when(segMngrA0.fetchLatestStreamOffset(5000)).thenReturn(new LongMsgOffset(20)); + when(segMngrA0.getCurrentOffset()).thenReturn(new LongMsgOffset(0)); + // ensure negative values are ignored + setupLatestIngestionTimestamp(segMngrA0, Long.MIN_VALUE); + + // current offset latest stream offset current time last ingestion time + // segA0 0 20 100 Long.MIN_VALUE + // segA1 (segment is absent) + // segB0 (table is absent) + assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 3); + + // updatedConsumingSegments still provide 3 segments to checker but one has caught up. + when(segMngrA0.getCurrentOffset()).thenReturn(new LongMsgOffset(20)); + assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 2); + // Remove the missing segments and check again. + consumingSegments.get("tableA_REALTIME").remove(segA1); + assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 1); + consumingSegments.remove("tableB_REALTIME"); + assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 0); + } + private void setupLatestIngestionTimestamp(RealtimeSegmentDataManager segmentDataManager, long latestIngestionTimestamp) { MutableSegment mockSegment = mock(MutableSegment.class); @@ -133,10 +190,13 @@ public void regularCaseWithFreshnessCatchup() { String segA0 = "tableA__0__0__123Z"; String segA1 = "tableA__1__0__123Z"; String segB0 = "tableB__0__0__123Z"; - Set consumingSegments = ImmutableSet.of(segA0, segA1, segB0); + Map> consumingSegments = new HashMap<>(); + consumingSegments.put("tableA_REALTIME", ImmutableSet.of(segA0, segA1)); + consumingSegments.put("tableB_REALTIME", ImmutableSet.of(segB0)); InstanceDataManager instanceDataManager = mock(InstanceDataManager.class); FreshnessBasedConsumptionStatusChecker statusChecker = - new FakeFreshnessBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, 10L, 0L, 100L); + new FakeFreshnessBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, + ConsumptionStatusCheckerTestUtils.getConsumingSegments(consumingSegments), 10L, 0L, 100L); // TableDataManager is not set up yet assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 3); @@ -195,12 +255,14 @@ public void regularCaseWithIdleTimeout() { String segA0 = "tableA__0__0__123Z"; String segA1 = "tableA__1__0__123Z"; String segB0 = "tableB__0__0__123Z"; - Set consumingSegments = ImmutableSet.of(segA0, segA1, segB0); + Map> consumingSegments = new HashMap<>(); + consumingSegments.put("tableA_REALTIME", ImmutableSet.of(segA0, segA1)); + consumingSegments.put("tableB_REALTIME", ImmutableSet.of(segB0)); InstanceDataManager instanceDataManager = mock(InstanceDataManager.class); long idleTimeoutMs = 10L; FreshnessBasedConsumptionStatusChecker statusChecker = - new FakeFreshnessBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, 10L, idleTimeoutMs, - 100L); + new FakeFreshnessBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, + ConsumptionStatusCheckerTestUtils.getConsumingSegments(consumingSegments), 10L, idleTimeoutMs, 100L); // TableDataManager is not set up yet assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 3); @@ -270,10 +332,13 @@ public void testSegmentsNeverHealthyWhenIdleTimeoutZeroAndNoOtherCriteriaMet() { String segA0 = "tableA__0__0__123Z"; String segA1 = "tableA__1__0__123Z"; String segB0 = "tableB__0__0__123Z"; - Set consumingSegments = ImmutableSet.of(segA0, segA1, segB0); + Map> consumingSegments = new HashMap<>(); + consumingSegments.put("tableA_REALTIME", ImmutableSet.of(segA0, segA1)); + consumingSegments.put("tableB_REALTIME", ImmutableSet.of(segB0)); InstanceDataManager instanceDataManager = mock(InstanceDataManager.class); FreshnessBasedConsumptionStatusChecker statusChecker = - new FakeFreshnessBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, 10L, 0L, 100L); + new FakeFreshnessBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, + ConsumptionStatusCheckerTestUtils.getConsumingSegments(consumingSegments), 10L, 0L, 100L); // TableDataManager is not set up yet assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 3); @@ -319,10 +384,13 @@ public void segmentBeingCommmitted() { String segA0 = "tableA__0__0__123Z"; String segA1 = "tableA__1__0__123Z"; String segB0 = "tableB__0__0__123Z"; - Set consumingSegments = ImmutableSet.of(segA0, segA1, segB0); + Map> consumingSegments = new HashMap<>(); + consumingSegments.put("tableA_REALTIME", ImmutableSet.of(segA0, segA1)); + consumingSegments.put("tableB_REALTIME", ImmutableSet.of(segB0)); InstanceDataManager instanceDataManager = mock(InstanceDataManager.class); FreshnessBasedConsumptionStatusChecker statusChecker = - new FakeFreshnessBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, 10L, 0L, 100L); + new FakeFreshnessBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, + ConsumptionStatusCheckerTestUtils.getConsumingSegments(consumingSegments), 10L, 0L, 100L); // TableDataManager is not set up yet assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 3); @@ -369,6 +437,8 @@ public void segmentBeingCommmitted() { setupLatestIngestionTimestamp(segMngrA0, 90L); // Unexpected case where latest ingested is somehow after current time setupLatestIngestionTimestamp(segMngrA1, 101L); + assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 1); + consumingSegments.get("tableB_REALTIME").remove(segB0); assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 0); } @@ -377,10 +447,13 @@ public void testCannotGetOffsetsOrFreshness() { String segA0 = "tableA__0__0__123Z"; String segA1 = "tableA__1__0__123Z"; String segB0 = "tableB__0__0__123Z"; - Set consumingSegments = ImmutableSet.of(segA0, segA1, segB0); + Map> consumingSegments = new HashMap<>(); + consumingSegments.put("tableA_REALTIME", ImmutableSet.of(segA0, segA1)); + consumingSegments.put("tableB_REALTIME", ImmutableSet.of(segB0)); InstanceDataManager instanceDataManager = mock(InstanceDataManager.class); FreshnessBasedConsumptionStatusChecker statusChecker = - new FakeFreshnessBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, 10L, 0L, 100L); + new FakeFreshnessBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, + ConsumptionStatusCheckerTestUtils.getConsumingSegments(consumingSegments), 10L, 0L, 100L); // TableDataManager is not set up yet assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 3); diff --git a/pinot-server/src/test/java/org/apache/pinot/server/starter/helix/OffsetBasedConsumptionStatusCheckerTest.java b/pinot-server/src/test/java/org/apache/pinot/server/starter/helix/OffsetBasedConsumptionStatusCheckerTest.java index 88b05b8ff003..2248f731d2d7 100644 --- a/pinot-server/src/test/java/org/apache/pinot/server/starter/helix/OffsetBasedConsumptionStatusCheckerTest.java +++ b/pinot-server/src/test/java/org/apache/pinot/server/starter/helix/OffsetBasedConsumptionStatusCheckerTest.java @@ -20,6 +20,8 @@ package org.apache.pinot.server.starter.helix; import com.google.common.collect.ImmutableSet; +import java.util.HashMap; +import java.util.Map; import java.util.Set; import org.apache.pinot.core.data.manager.InstanceDataManager; import org.apache.pinot.core.data.manager.offline.ImmutableSegmentDataManager; @@ -41,10 +43,13 @@ public void regularCase() { String segA0 = "tableA__0__0__123Z"; String segA1 = "tableA__1__0__123Z"; String segB0 = "tableB__0__0__123Z"; - Set consumingSegments = ImmutableSet.of(segA0, segA1, segB0); + Map> consumingSegments = new HashMap<>(); + consumingSegments.put("tableA_REALTIME", ImmutableSet.of(segA0, segA1)); + consumingSegments.put("tableB_REALTIME", ImmutableSet.of(segB0)); InstanceDataManager instanceDataManager = mock(InstanceDataManager.class); OffsetBasedConsumptionStatusChecker statusChecker = - new OffsetBasedConsumptionStatusChecker(instanceDataManager, consumingSegments); + new OffsetBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, + ConsumptionStatusCheckerTestUtils.getConsumingSegments(consumingSegments)); // setup TableDataMangers TableDataManager tableDataManagerA = mock(TableDataManager.class); @@ -88,11 +93,14 @@ public void dataMangersBeingSetup() { String segA0 = "tableA__0__0__123Z"; String segA1 = "tableA__1__0__123Z"; String segB0 = "tableB__0__0__123Z"; - Set consumingSegments = ImmutableSet.of(segA0, segA1, segB0); + Map> consumingSegments = new HashMap<>(); + consumingSegments.put("tableA_REALTIME", ImmutableSet.of(segA0, segA1)); + consumingSegments.put("tableB_REALTIME", ImmutableSet.of(segB0)); InstanceDataManager instanceDataManager = mock(InstanceDataManager.class); OffsetBasedConsumptionStatusChecker statusChecker = - new OffsetBasedConsumptionStatusChecker(instanceDataManager, consumingSegments); + new OffsetBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, + ConsumptionStatusCheckerTestUtils.getConsumingSegments(consumingSegments)); // TableDataManager is not set up yet assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 3); @@ -149,10 +157,13 @@ public void segmentsBeingCommitted() { String segA0 = "tableA__0__0__123Z"; String segA1 = "tableA__1__0__123Z"; String segB0 = "tableB__0__0__123Z"; - Set consumingSegments = ImmutableSet.of(segA0, segA1, segB0); + Map> consumingSegments = new HashMap<>(); + consumingSegments.put("tableA_REALTIME", ImmutableSet.of(segA0, segA1)); + consumingSegments.put("tableB_REALTIME", ImmutableSet.of(segB0)); InstanceDataManager instanceDataManager = mock(InstanceDataManager.class); OffsetBasedConsumptionStatusChecker statusChecker = - new OffsetBasedConsumptionStatusChecker(instanceDataManager, consumingSegments); + new OffsetBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, + ConsumptionStatusCheckerTestUtils.getConsumingSegments(consumingSegments)); // setup TableDataMangers TableDataManager tableDataManagerA = mock(TableDataManager.class); @@ -190,6 +201,8 @@ public void segmentsBeingCommitted() { // segB0 committed at 1200 1500 when(segMngrA0.getCurrentOffset()).thenReturn(new LongMsgOffset(20)); when(segMngrA1.getCurrentOffset()).thenReturn(new LongMsgOffset(200)); + assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 1); + consumingSegments.get("tableB_REALTIME").remove(segB0); assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 0); } @@ -199,10 +212,13 @@ public void cannotGetLatestStreamOffset() { String segA0 = "tableA__0__0__123Z"; String segA1 = "tableA__1__0__123Z"; String segB0 = "tableB__0__0__123Z"; - Set consumingSegments = ImmutableSet.of(segA0, segA1, segB0); + Map> consumingSegments = new HashMap<>(); + consumingSegments.put("tableA_REALTIME", ImmutableSet.of(segA0, segA1)); + consumingSegments.put("tableB_REALTIME", ImmutableSet.of(segB0)); InstanceDataManager instanceDataManager = mock(InstanceDataManager.class); OffsetBasedConsumptionStatusChecker statusChecker = - new OffsetBasedConsumptionStatusChecker(instanceDataManager, consumingSegments); + new OffsetBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, + ConsumptionStatusCheckerTestUtils.getConsumingSegments(consumingSegments)); // setup TableDataMangers TableDataManager tableDataManagerA = mock(TableDataManager.class); From dd4f0acabd73a752f4fa09986bd8c933c9cd00e0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 23 Apr 2024 10:43:51 -0700 Subject: [PATCH 062/102] Bump org.jline:jline from 3.24.1 to 3.26.0 (#12991) --- pom.xml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index a4a83c8b287a..9f6838ec78d1 100644 --- a/pom.xml +++ b/pom.xml @@ -243,8 +243,7 @@ 1.9.23 3.9.0 2.0.3 - - 3.24.1 + 3.26.0 1.7.0.Final 1.5.4 9.4.54.v20240208 From 40cf5a7ba82fcc4fc10d8b8efe800d3f732c0655 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 23 Apr 2024 10:50:24 -0700 Subject: [PATCH 063/102] Bump aws.sdk.version from 2.25.35 to 2.25.36 (#12990) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 9f6838ec78d1..f9d4f7f328cc 100644 --- a/pom.xml +++ b/pom.xml @@ -172,7 +172,7 @@ 0.15.0 0.4.4 4.2.2 - 2.25.35 + 2.25.36 2.12.7 3.1.12 7.10.1 From 0caeccfc1399087885205e7e796d1ee8037f7867 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 23 Apr 2024 15:12:07 -0700 Subject: [PATCH 064/102] Bump org.webjars:swagger-ui from 5.15.0 to 5.17.0 (#12989) --- .../main/java/org/apache/pinot/spi/utils/CommonConstants.java | 2 +- pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java index bbf3b30342fa..befd5b57633e 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java @@ -51,7 +51,7 @@ private CommonConstants() { "org.apache.pinot.spi.eventlistener.query.NoOpBrokerQueryEventListener"; public static final String SWAGGER_AUTHORIZATION_KEY = "oauth"; - public static final String CONFIG_OF_SWAGGER_RESOURCES_PATH = "META-INF/resources/webjars/swagger-ui/5.15.0/"; + public static final String CONFIG_OF_SWAGGER_RESOURCES_PATH = "META-INF/resources/webjars/swagger-ui/5.17.0/"; public static final String CONFIG_OF_TIMEZONE = "pinot.timezone"; public static final String DATABASE = "database"; diff --git a/pom.xml b/pom.xml index f9d4f7f328cc..b52922f01e7b 100644 --- a/pom.xml +++ b/pom.xml @@ -147,7 +147,7 @@ 2.6.1 3.30.2-GA 1.6.14 - 5.15.0 + 5.17.0 3.3.6 2.9.0 2.5.1 From 36c4b9a86fcab77e96cb1e90b1900efca0e1ce7c Mon Sep 17 00:00:00 2001 From: deemoliu Date: Tue, 23 Apr 2024 15:20:35 -0700 Subject: [PATCH 065/102] Add Prefix, Suffix and Ngram UDFs (#12392) --- .../function/scalar/StringFunctions.java | 108 ++++++++++++++++++ .../function/scalar/StringFunctionsTest.java | 50 ++++++++ 2 files changed, 158 insertions(+) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java index 374917ec9939..31baeb5d2d44 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java @@ -18,6 +18,8 @@ */ package org.apache.pinot.common.function.scalar; +import it.unimi.dsi.fastutil.objects.ObjectLinkedOpenHashSet; +import it.unimi.dsi.fastutil.objects.ObjectSet; import java.io.UnsupportedEncodingException; import java.net.URLDecoder; import java.net.URLEncoder; @@ -28,6 +30,7 @@ import java.util.UUID; import java.util.regex.Matcher; import java.util.regex.Pattern; +import javax.annotation.Nullable; import org.apache.commons.lang3.StringUtils; import org.apache.pinot.common.utils.RegexpPatternConverterUtils; import org.apache.pinot.spi.annotations.ScalarFunction; @@ -580,6 +583,111 @@ public static String[] split(String input, String delimiter, int limit) { return StringUtils.splitByWholeSeparator(input, delimiter, limit); } + /** + * @param input an input string for prefix strings generations. + * @param maxlength the max length of the prefix strings for the string. + * @return generate an array of prefix strings of the string that are shorter than the specified length. + */ + @ScalarFunction + public static String[] prefixes(String input, int maxlength) { + int arrLength = Math.min(maxlength, input.length()); + String[] prefixArr = new String[arrLength]; + for (int prefixIdx = 1; prefixIdx <= arrLength; prefixIdx++) { + prefixArr[prefixIdx - 1] = input.substring(0, prefixIdx); + } + return prefixArr; + } + + /** + * @param input an input string for prefix strings generations. + * @param maxlength the max length of the prefix strings for the string. + * @param prefix the prefix to be prepended to prefix strings generated. e.g. '^' for regex matching + * @return generate an array of prefix matchers of the string that are shorter than the specified length. + */ + @ScalarFunction(nullableParameters = true, names = {"prefixesWithPrefix", "prefixes_with_prefix"}) + public static String[] prefixesWithPrefix(String input, int maxlength, @Nullable String prefix) { + if (prefix == null) { + return prefixes(input, maxlength); + } + int arrLength = Math.min(maxlength, input.length()); + String[] prefixArr = new String[arrLength]; + for (int prefixIdx = 1; prefixIdx <= arrLength; prefixIdx++) { + prefixArr[prefixIdx - 1] = prefix + input.substring(0, prefixIdx); + } + return prefixArr; + } + + /** + * @param input an input string for suffix strings generations. + * @param maxlength the max length of the suffix strings for the string. + * @return generate an array of suffix strings of the string that are shorter than the specified length. + */ + @ScalarFunction + public static String[] suffixes(String input, int maxlength) { + int arrLength = Math.min(maxlength, input.length()); + String[] suffixArr = new String[arrLength]; + for (int suffixIdx = 1; suffixIdx <= arrLength; suffixIdx++) { + suffixArr[suffixIdx - 1] = input.substring(input.length() - suffixIdx); + } + return suffixArr; + } + + /** + * @param input an input string for suffix strings generations. + * @param maxlength the max length of the suffix strings for the string. + * @param suffix the suffix string to be appended for suffix strings generated. e.g. '$' for regex matching. + * @return generate an array of suffix matchers of the string that are shorter than the specified length. + */ + @ScalarFunction(nullableParameters = true, names = {"suffixesWithSuffix", "suffixes_with_suffix"}) + public static String[] suffixesWithSuffix(String input, int maxlength, @Nullable String suffix) { + if (suffix == null) { + return suffixes(input, maxlength); + } + int arrLength = Math.min(maxlength, input.length()); + String[] suffixArr = new String[arrLength]; + for (int suffixIdx = 1; suffixIdx <= arrLength; suffixIdx++) { + suffixArr[suffixIdx - 1] = input.substring(input.length() - suffixIdx) + suffix; + } + return suffixArr; + } + + /** + * @param input an input string for ngram generations. + * @param length the max length of the ngram for the string. + * @return generate an array of unique ngram of the string that length are exactly matching the specified length. + */ + @ScalarFunction + public static String[] uniqueNgrams(String input, int length) { + if (length == 0 || length > input.length()) { + return new String[0]; + } + ObjectSet ngramSet = new ObjectLinkedOpenHashSet<>(); + for (int i = 0; i < input.length() - length + 1; i++) { + ngramSet.add(input.substring(i, i + length)); + } + return ngramSet.toArray(new String[0]); + } + + /** + * @param input an input string for ngram generations. + * @param minGram the min length of the ngram for the string. + * @param maxGram the max length of the ngram for the string. + * @return generate an array of ngram of the string that length are within the specified range [minGram, maxGram]. + */ + @ScalarFunction + public static String[] uniqueNgrams(String input, int minGram, int maxGram) { + ObjectSet ngramSet = new ObjectLinkedOpenHashSet<>(); + for (int n = minGram; n <= maxGram && n <= input.length(); n++) { + if (n == 0) { + continue; + } + for (int i = 0; i < input.length() - n + 1; i++) { + ngramSet.add(input.substring(i, i + n)); + } + } + return ngramSet.toArray(new String[0]); + } + /** * TODO: Revisit if index should be one-based (both Presto and Postgres use one-based index, which starts with 1) * @param input diff --git a/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java b/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java index d75b8ada435d..6c9fa465f54d 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java @@ -77,6 +77,41 @@ public static Object[][] isJsonTestCases() { }; } + @DataProvider(name = "prefixAndSuffixTestCases") + public static Object[][] prefixAndSuffixTestCases() { + return new Object[][]{ + {"abcde", 3, new String[]{"a", "ab", "abc"}, new String[]{"e", "de", "cde"}, new String[]{ + "^a", "^ab", "^abc"}, new String[]{"e$", "de$", "cde$"}}, + {"abcde", 0, new String[]{}, new String[]{}, new String[]{}, new String[]{}}, + {"abcde", 9, new String[]{"a", "ab", "abc", "abcd", "abcde"}, new String[]{"e", "de", "cde", "bcde", "abcde"}, + new String[]{"^a", "^ab", "^abc", "^abcd", "^abcde"}, new String[]{"e$", "de$", "cde$", "bcde$", "abcde$"}}, + {"a", 3, new String[]{"a"}, new String[]{"a"}, new String[]{"^a"}, new String[]{"a$"}}, + {"a", 0, new String[]{}, new String[]{}, new String[]{}, new String[]{}}, + {"a", 9, new String[]{"a"}, new String[]{"a"}, new String[]{"^a"}, new String[]{"a$"}}, + {"", 3, new String[]{}, new String[]{}, new String[]{}, new String[]{}}, + {"", 0, new String[]{}, new String[]{}, new String[]{}, new String[]{}}, + {"", 9, new String[]{}, new String[]{}, new String[]{}, new String[]{}} + }; + } + + @DataProvider(name = "ngramTestCases") + public static Object[][] ngramTestCases() { + return new Object[][]{ + {"abcd", 0, 3, new String[]{"abc", "bcd"}, new String[]{"a", "b", "c", "d", "ab", "bc", "cd", "abc", "bcd"}}, + {"abcd", 2, 2, new String[]{"ab", "bc", "cd"}, new String[]{"ab", "bc", "cd"}}, + {"abcd", 3, 0, new String[]{}, new String[]{}}, + {"abc", 0, 3, new String[]{"abc"}, new String[]{"a", "b", "c", "ab", "bc", "abc"}}, + {"abc", 3, 0, new String[]{}, new String[]{}}, + {"abc", 3, 3, new String[]{"abc"}, new String[]{"abc"}}, + {"a", 0, 3, new String[]{}, new String[]{"a"}}, + {"a", 2, 3, new String[]{}, new String[]{}}, + {"a", 3, 3, new String[]{}, new String[]{}}, + {"", 3, 0, new String[]{}, new String[]{}}, + {"", 3, 3, new String[]{}, new String[]{}}, + {"", 0, 3, new String[]{}, new String[]{}} + }; + } + @Test(dataProvider = "isJson") public void testIsJson(String input, boolean expectedValue) { assertEquals(StringFunctions.isJson(input), expectedValue); @@ -88,4 +123,19 @@ public void testSplitPart(String input, String delimiter, int index, int limit, assertEquals(StringFunctions.splitPart(input, delimiter, index), expectedToken); assertEquals(StringFunctions.splitPart(input, delimiter, limit, index), expectedTokenWithLimitCounts); } + + @Test(dataProvider = "prefixAndSuffixTestCases") + public void testPrefixAndSuffix(String input, int length, String[] expectedPrefix, String[] expectedSuffix, + String[] expectedPrefixWithRegexChar, String[] expectedSuffixWithRegexChar) { + assertEquals(StringFunctions.prefixes(input, length), expectedPrefix); + assertEquals(StringFunctions.suffixes(input, length), expectedSuffix); + assertEquals(StringFunctions.prefixesWithPrefix(input, length, "^"), expectedPrefixWithRegexChar); + assertEquals(StringFunctions.suffixesWithSuffix(input, length, "$"), expectedSuffixWithRegexChar); + } + + @Test(dataProvider = "ngramTestCases") + public void testNGram(String input, int minGram, int maxGram, String[] expectedExactNGram, String[] expectedNGram) { + assertEquals(StringFunctions.uniqueNgrams(input, maxGram), expectedExactNGram); + assertEquals(StringFunctions.uniqueNgrams(input, minGram, maxGram), expectedNGram); + } } From bc9e8ee5413c8611fe2be3ed6c3d7073e750d608 Mon Sep 17 00:00:00 2001 From: "Xiaotian (Jackie) Jiang" <17555551+Jackie-Jiang@users.noreply.github.com> Date: Tue, 23 Apr 2024 21:43:12 -0700 Subject: [PATCH 066/102] Upgrade Pulsar to 3.2.2 (#12967) --- .../pinot-pulsar/pom.xml | 134 ++---------------- .../plugin/stream/pulsar/PulsarUtils.java | 27 ++-- .../stream/pulsar/PulsarConsumerTest.java | 2 +- pinot-tools/pom.xml | 38 ----- pom.xml | 31 ++-- 5 files changed, 37 insertions(+), 195 deletions(-) diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml index f742f52f45a8..cb13fb9bba87 100644 --- a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml +++ b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml @@ -37,137 +37,33 @@ package ${basedir}/../../.. - 0.16.0 - 1.63.0 - 1.63.0 - 2.6.2 - 1.17 + 3.2.2 + 1.19.7 - - org.testcontainers - pulsar - 1.19.7 - test - - - org.mockito - mockito-core - test - org.apache.pulsar - pulsar-client-original - - - commons-configuration - commons-configuration - - - org.bouncycastle - bcpkix-jdk15on - - - org.bouncycastle - bcprov-ext-jdk15on - - - org.eclipse.jetty - jetty-util - - + pulsar-client + ${pulsar.version} + org.apache.pulsar - pulsar-client-admin-original - - - org.glassfish.jersey.core - jersey-server - - - org.glassfish.jersey.containers - jersey-container-grizzly2-http - - - org.glassfish.jersey.containers - jersey-container-servlet-core - - - io.netty - netty-resolver - - - io.prometheus - simpleclient_common - ${simpleclient_common.version} - - - com.google.api.grpc - proto-google-common-protos - - - io.grpc - grpc-context - ${grpc-context.version} - - - io.grpc - grpc-protobuf-lite - ${grpc-protobuf-lite.version} - - - io.prometheus - simpleclient - ${simpleclient_common.version} - - - org.eclipse.jetty - jetty-server - - - org.eclipse.jetty - jetty-servlet - - - com.squareup.okio - okio - - - io.prometheus - simpleclient_hotspot - ${simpleclient_common.version} - - - org.codehaus.mojo - animal-sniffer-annotations - ${codehaus-annotations.version} - - - com.github.ben-manes.caffeine - caffeine - ${caffeine.version} - - - io.netty - netty-codec-socks - - - org.bouncycastle - bcpkix-jdk15to18 - - - org.bouncycastle - bcprov-ext-jdk15to18 + pulsar-client-admin + ${pulsar.version} + test - org.bouncycastle - bcprov-jdk15to18 + org.testcontainers + pulsar + ${testcontainers.pulsar.version} + test - org.apache.pinot - pinot-spi + org.mockito + mockito-core + test diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/main/java/org/apache/pinot/plugin/stream/pulsar/PulsarUtils.java b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/main/java/org/apache/pinot/plugin/stream/pulsar/PulsarUtils.java index 0ccacc304704..e1b7b50c21a4 100644 --- a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/main/java/org/apache/pinot/plugin/stream/pulsar/PulsarUtils.java +++ b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/main/java/org/apache/pinot/plugin/stream/pulsar/PulsarUtils.java @@ -22,6 +22,7 @@ import com.google.common.collect.Maps; import java.nio.ByteBuffer; import java.util.Base64; +import java.util.BitSet; import java.util.Map; import java.util.Set; import org.apache.commons.lang3.StringUtils; @@ -31,12 +32,11 @@ import org.apache.pinot.spi.stream.StreamMessageMetadata; import org.apache.pulsar.client.api.Message; import org.apache.pulsar.client.api.MessageId; +import org.apache.pulsar.client.api.MessageIdAdv; import org.apache.pulsar.client.api.Reader; import org.apache.pulsar.client.api.SubscriptionInitialPosition; -import org.apache.pulsar.client.impl.BatchMessageAcker; import org.apache.pulsar.client.impl.BatchMessageIdImpl; import org.apache.pulsar.client.impl.MessageIdImpl; -import org.apache.pulsar.client.internal.DefaultImplementation; public class PulsarUtils { @@ -119,22 +119,21 @@ static StreamMessageMetadata extractMessageMetadata(Message message, Pul * record in the new ledger. */ public static MessageId getNextMessageId(MessageId messageId) { - MessageIdImpl messageIdImpl = MessageIdImpl.convertToMessageIdImpl(messageId); - long ledgerId = messageIdImpl.getLedgerId(); - long entryId = messageIdImpl.getEntryId(); - int partitionIndex = messageIdImpl.getPartitionIndex(); - if (messageIdImpl instanceof BatchMessageIdImpl) { - BatchMessageIdImpl batchMessageIdImpl = (BatchMessageIdImpl) messageIdImpl; - int batchIndex = batchMessageIdImpl.getBatchIndex(); - int batchSize = batchMessageIdImpl.getBatchSize(); - BatchMessageAcker acker = batchMessageIdImpl.getAcker(); + MessageIdAdv messageIdAdv = (MessageIdAdv) messageId; + long ledgerId = messageIdAdv.getLedgerId(); + long entryId = messageIdAdv.getEntryId(); + int partitionIndex = messageIdAdv.getPartitionIndex(); + int batchSize = messageIdAdv.getBatchSize(); + if (batchSize > 0) { + int batchIndex = messageIdAdv.getBatchIndex(); + BitSet ackSet = messageIdAdv.getAckSet(); if (batchIndex < batchSize - 1) { - return new BatchMessageIdImpl(ledgerId, entryId, partitionIndex, batchIndex + 1, batchSize, acker); + return new BatchMessageIdImpl(ledgerId, entryId, partitionIndex, batchIndex + 1, batchSize, ackSet); } else { - return new BatchMessageIdImpl(ledgerId, entryId + 1, partitionIndex, 0, batchSize, acker); + return new BatchMessageIdImpl(ledgerId, entryId + 1, partitionIndex, 0, batchSize, ackSet); } } else { - return DefaultImplementation.getDefaultImplementation().newMessageId(ledgerId, entryId + 1, partitionIndex); + return new MessageIdImpl(ledgerId, entryId + 1, partitionIndex); } } diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/test/java/org/apache/pinot/plugin/stream/pulsar/PulsarConsumerTest.java b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/test/java/org/apache/pinot/plugin/stream/pulsar/PulsarConsumerTest.java index 1baf212f170e..0ee9eb062332 100644 --- a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/test/java/org/apache/pinot/plugin/stream/pulsar/PulsarConsumerTest.java +++ b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/test/java/org/apache/pinot/plugin/stream/pulsar/PulsarConsumerTest.java @@ -54,7 +54,7 @@ public class PulsarConsumerTest { - private static final DockerImageName PULSAR_IMAGE = DockerImageName.parse("apachepulsar/pulsar:2.11.4"); + private static final DockerImageName PULSAR_IMAGE = DockerImageName.parse("apachepulsar/pulsar:3.2.2"); public static final String TABLE_NAME_WITH_TYPE = "tableName_REALTIME"; public static final String TEST_TOPIC = "test-topic"; public static final String TEST_TOPIC_BATCH = "test-topic-batch"; diff --git a/pinot-tools/pom.xml b/pinot-tools/pom.xml index 26c466ee0c5c..2e9dcf3b87b8 100644 --- a/pinot-tools/pom.xml +++ b/pinot-tools/pom.xml @@ -146,40 +146,6 @@ pinot-pulsar ${project.version} runtime - - - com.google.errorprone - error_prone_annotations - - - org.codehaus.mojo - animal-sniffer-annotations - - - com.google.api.grpc - proto-google-common-protos - - - org.glassfish.jersey.containers - jersey-container-servlet-core - - - io.grpc - grpc-protobuf-lite - - - io.grpc - grpc-context - - - com.typesafe.netty - netty-reactive-streams - - - com.beust - jcommander - - org.apache.pinot @@ -208,10 +174,6 @@ info.picocli picocli - - io.airlift - aircompressor - org.testng testng diff --git a/pom.xml b/pom.xml index b52922f01e7b..6a75bfd73ecc 100644 --- a/pom.xml +++ b/pom.xml @@ -240,6 +240,7 @@ 1.14.6 + 1.9.23 3.9.0 2.0.3 @@ -249,8 +250,7 @@ 9.4.54.v20240208 9.37.3 1.78 - 0.26 - 2.11.4 + 0.26 @@ -1561,19 +1561,7 @@ ${sslcontext.kickstart.version} - - - org.apache.pulsar - pulsar-client-original - ${pulsar.version} - - - org.apache.pulsar - pulsar-client-admin-original - ${pulsar.version} - - - + org.bouncycastle bcpkix-jdk18on @@ -1586,27 +1574,24 @@ org.bouncycastle - bcpkix-jdk15to18 + bcutil-jdk18on ${bouncycastle.version} org.bouncycastle - bcprov-ext-jdk15to18 - ${bouncycastle.version} - - - org.bouncycastle - bcprov-jdk15to18 + bcprov-ext-jdk18on ${bouncycastle.version} + io.airlift aircompressor - ${airlift.version} + ${aircompressor.version} + clean install From 33b8c88d2976e5be599630873349413bd832299c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 24 Apr 2024 10:19:11 -0700 Subject: [PATCH 067/102] Bump org.apache.maven.plugins:maven-shade-plugin from 3.5.2 to 3.5.3 (#12996) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 6a75bfd73ecc..98dc157f8adc 100644 --- a/pom.xml +++ b/pom.xml @@ -131,7 +131,7 @@ org.apache.pinot.shaded 3.4.1 - 3.5.2 + 3.5.3 none 1.11.3 From cb16cd7c415afb0a26391c406a98aed5124875a1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 24 Apr 2024 10:19:37 -0700 Subject: [PATCH 068/102] Bump com.github.luben:zstd-jni from 1.5.6-2 to 1.5.6-3 (#12999) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 98dc157f8adc..adc9d1f26eff 100644 --- a/pom.xml +++ b/pom.xml @@ -159,7 +159,7 @@ 4.2.25 1.1.10.5 - 1.5.6-2 + 1.5.6-3 1.8.0 0.20.0 2.23.1 From d602ffdad6402ad44bcd198a8aebff381b28a79b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 24 Apr 2024 10:22:37 -0700 Subject: [PATCH 069/102] Bump aws.sdk.version from 2.25.36 to 2.25.37 (#12994) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index adc9d1f26eff..21b221ef2095 100644 --- a/pom.xml +++ b/pom.xml @@ -172,7 +172,7 @@ 0.15.0 0.4.4 4.2.2 - 2.25.36 + 2.25.37 2.12.7 3.1.12 7.10.1 From 5adb02fc33aff219e9a677785fceed5733b28174 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 24 Apr 2024 10:23:03 -0700 Subject: [PATCH 070/102] Bump com.azure:azure-storage-file-datalake from 12.18.3 to 12.18.4 (#12995) --- pinot-plugins/pinot-file-system/pinot-adls/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pinot-plugins/pinot-file-system/pinot-adls/pom.xml b/pinot-plugins/pinot-file-system/pinot-adls/pom.xml index bd5219be623e..f2f31cb65f6a 100644 --- a/pinot-plugins/pinot-file-system/pinot-adls/pom.xml +++ b/pinot-plugins/pinot-file-system/pinot-adls/pom.xml @@ -39,7 +39,7 @@ com.azure azure-storage-file-datalake - 12.18.3 + 12.18.4 com.azure From 73f162005648142031cbfbc7843bb1f53e73d987 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 24 Apr 2024 10:23:25 -0700 Subject: [PATCH 071/102] Bump org.jline:jline from 3.26.0 to 3.26.1 (#12997) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 21b221ef2095..b3367df70bf4 100644 --- a/pom.xml +++ b/pom.xml @@ -244,7 +244,7 @@ 1.9.23 3.9.0 2.0.3 - 3.26.0 + 3.26.1 1.7.0.Final 1.5.4 9.4.54.v20240208 From 2ca6666b7e6951cb9f95eac6ec9f7bae144701f1 Mon Sep 17 00:00:00 2001 From: "Xiaotian (Jackie) Jiang" <17555551+Jackie-Jiang@users.noreply.github.com> Date: Wed, 24 Apr 2024 12:20:50 -0700 Subject: [PATCH 072/102] Pull pulsar version definitaion into root POM (#13002) --- pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml | 1 - pom.xml | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml index cb13fb9bba87..aff1bd4da918 100644 --- a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml +++ b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml @@ -37,7 +37,6 @@ package ${basedir}/../../.. - 3.2.2 1.19.7 diff --git a/pom.xml b/pom.xml index b3367df70bf4..13f99490d1ce 100644 --- a/pom.xml +++ b/pom.xml @@ -179,6 +179,10 @@ 6.6.2 8.3.4 + 7.6.1 + 3.2.2 + 1.14.6 + 3.14.0 4.4 @@ -232,14 +236,10 @@ 3.25.2 1.61.1 - 7.6.1 - 2.12.18 2.12 - 1.14.6 - 1.9.23 3.9.0 From 099a86cff0ad16a4d1a798efaf1b2118cf8e0cfb Mon Sep 17 00:00:00 2001 From: Rekha Seethamraju Date: Wed, 24 Apr 2024 12:30:36 -0700 Subject: [PATCH 073/102] Add schema as input to the decoder. (#12981) --- .../realtime/RealtimeSegmentDataManager.java | 24 ++++++++- .../spi/stream/StreamDecoderProvider.java | 52 ------------------- .../spi/stream/StreamMessageDecoder.java | 21 ++++++-- 3 files changed, 40 insertions(+), 57 deletions(-) delete mode 100644 pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamDecoderProvider.java diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeSegmentDataManager.java b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeSegmentDataManager.java index 6771e038d14d..3290c5e4f3c0 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeSegmentDataManager.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeSegmentDataManager.java @@ -75,6 +75,7 @@ import org.apache.pinot.spi.data.Schema; import org.apache.pinot.spi.data.readers.GenericRow; import org.apache.pinot.spi.metrics.PinotMeter; +import org.apache.pinot.spi.plugin.PluginManager; import org.apache.pinot.spi.recordenricher.RecordEnricherPipeline; import org.apache.pinot.spi.stream.ConsumerPartitionState; import org.apache.pinot.spi.stream.LongMsgOffset; @@ -91,7 +92,6 @@ import org.apache.pinot.spi.stream.StreamDataDecoder; import org.apache.pinot.spi.stream.StreamDataDecoderImpl; import org.apache.pinot.spi.stream.StreamDataDecoderResult; -import org.apache.pinot.spi.stream.StreamDecoderProvider; import org.apache.pinot.spi.stream.StreamMessage; import org.apache.pinot.spi.stream.StreamMessageDecoder; import org.apache.pinot.spi.stream.StreamMessageMetadata; @@ -1505,7 +1505,7 @@ public RealtimeSegmentDataManager(SegmentZKMetadata segmentZKMetadata, TableConf // Create message decoder Set fieldsToRead = IngestionUtils.getFieldsForRecordExtractor(_tableConfig.getIngestionConfig(), _schema); try { - StreamMessageDecoder streamMessageDecoder = StreamDecoderProvider.create(_streamConfig, fieldsToRead); + StreamMessageDecoder streamMessageDecoder = createMessageDecoder(fieldsToRead); _streamDataDecoder = new StreamDataDecoderImpl(streamMessageDecoder); } catch (Exception e) { _realtimeTableDataManager.addSegmentError(_segmentNameStr, @@ -1780,6 +1780,26 @@ private void updateCurrentDocumentCountMetrics() { } } + /** + * Creates a {@link StreamMessageDecoder} using properties in {@link StreamConfig}. + * + * @param streamConfig The stream config from the table config + * @param fieldsToRead The fields to read from the source stream + * @return The initialized StreamMessageDecoder + */ + private StreamMessageDecoder createMessageDecoder(Set fieldsToRead) { + String decoderClass = _streamConfig.getDecoderClass(); + try { + Map decoderProperties = _streamConfig.getDecoderProperties(); + StreamMessageDecoder decoder = PluginManager.get().createInstance(decoderClass); + decoder.init(fieldsToRead, _streamConfig, _tableConfig, _schema); + return decoder; + } catch (Exception e) { + throw new RuntimeException( + "Caught exception while creating StreamMessageDecoder from stream config: " + _streamConfig, e); + } + } + @Override public MutableSegment getSegment() { return _realtimeSegment; diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamDecoderProvider.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamDecoderProvider.java deleted file mode 100644 index fdb97093de87..000000000000 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamDecoderProvider.java +++ /dev/null @@ -1,52 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.pinot.spi.stream; - -import java.util.Map; -import java.util.Set; -import org.apache.pinot.spi.plugin.PluginManager; - - -/** - * Provider for {@link StreamMessageDecoder} - */ -public class StreamDecoderProvider { - private StreamDecoderProvider() { - } - - /** - * Creates a {@link StreamMessageDecoder} using properties in {@link StreamConfig}. - * - * @param streamConfig The stream config from the table config - * @param fieldsToRead The fields to read from the source stream - * @return The initialized StreamMessageDecoder - */ - public static StreamMessageDecoder create(StreamConfig streamConfig, Set fieldsToRead) { - String decoderClass = streamConfig.getDecoderClass(); - Map decoderProperties = streamConfig.getDecoderProperties(); - try { - StreamMessageDecoder decoder = PluginManager.get().createInstance(decoderClass); - decoder.init(decoderProperties, fieldsToRead, streamConfig.getTopicName()); - return decoder; - } catch (Exception e) { - throw new RuntimeException( - "Caught exception while creating StreamMessageDecoder from stream config: " + streamConfig, e); - } - } -} diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMessageDecoder.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMessageDecoder.java index 89312f06b613..b736e975d1b2 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMessageDecoder.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMessageDecoder.java @@ -23,9 +23,10 @@ import javax.annotation.Nullable; import org.apache.pinot.spi.annotations.InterfaceAudience; import org.apache.pinot.spi.annotations.InterfaceStability; +import org.apache.pinot.spi.config.table.TableConfig; +import org.apache.pinot.spi.data.Schema; import org.apache.pinot.spi.data.readers.GenericRow; - /** * Interface for a decoder of messages fetched from the stream * @param @@ -46,8 +47,22 @@ public interface StreamMessageDecoder { * @param topicName Topic name of the stream * @throws Exception If an error occurs */ - void init(Map props, Set fieldsToRead, String topicName) - throws Exception; + default void init(Map props, Set fieldsToRead, String topicName) + throws Exception { + throw new UnsupportedOperationException("init method not implemented"); + } + + /** + * Initializes the decoder. + * @param streamConfig Can be derived from tableConfig but is passed explicitly to avoid redundant computation + * @param tableConfig Table Config of the table + * @param schema Schema of the table + * @throws Exception + */ + default void init(Set fieldsToRead, StreamConfig streamConfig, TableConfig tableConfig, Schema schema) + throws Exception { + init(streamConfig.getDecoderProperties(), fieldsToRead, streamConfig.getTopicName()); + } /** * Decodes a row. From 99a41803305a39887805e1106f90d07e4b6af978 Mon Sep 17 00:00:00 2001 From: Xuanyi Li Date: Wed, 24 Apr 2024 17:03:25 -0700 Subject: [PATCH 074/102] avoid useless intermediate byte array allocation for VarChunkV4Reader's getStringMV (#12978) --- .../VarByteChunkForwardIndexReaderV4.java | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/VarByteChunkForwardIndexReaderV4.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/VarByteChunkForwardIndexReaderV4.java index f0a3658cb3bf..a7fadab8c356 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/VarByteChunkForwardIndexReaderV4.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/VarByteChunkForwardIndexReaderV4.java @@ -207,29 +207,29 @@ public double[] getDoubleMV(int docId, VarByteChunkForwardIndexReaderV4.ReaderCo @Override public int getStringMV(int docId, String[] valueBuffer, VarByteChunkForwardIndexReaderV4.ReaderContext context) { - ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + byte[] bytes = context.getValue(docId); + ByteBuffer byteBuffer = ByteBuffer.wrap(bytes); int numValues = byteBuffer.getInt(); - byteBuffer.position((numValues + 1) * Integer.BYTES); + int offset = (numValues + 1) * Integer.BYTES; for (int i = 0; i < numValues; i++) { int length = byteBuffer.getInt((i + 1) * Integer.BYTES); - byte[] bytes = new byte[length]; - byteBuffer.get(bytes); - valueBuffer[i] = new String(bytes, StandardCharsets.UTF_8); + valueBuffer[i] = new String(bytes, offset, length, StandardCharsets.UTF_8); + offset += length; } return numValues; } @Override public String[] getStringMV(int docId, VarByteChunkForwardIndexReaderV4.ReaderContext context) { - ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + byte[] bytes = context.getValue(docId); + ByteBuffer byteBuffer = ByteBuffer.wrap(bytes); int numValues = byteBuffer.getInt(); - byteBuffer.position((numValues + 1) * Integer.BYTES); + int offset = (numValues + 1) * Integer.BYTES; String[] valueBuffer = new String[numValues]; for (int i = 0; i < numValues; i++) { int length = byteBuffer.getInt((i + 1) * Integer.BYTES); - byte[] bytes = new byte[length]; - byteBuffer.get(bytes); - valueBuffer[i] = new String(bytes, StandardCharsets.UTF_8); + valueBuffer[i] = new String(bytes, offset, length, StandardCharsets.UTF_8); + offset += length; } return valueBuffer; } From 49da7985806d31d7cdf63a76b888c74cd0bc816b Mon Sep 17 00:00:00 2001 From: Abhishek Sharma Date: Thu, 25 Apr 2024 12:41:19 -0400 Subject: [PATCH 075/102] Upgrade scala maven plugin to 4.9.0 (#13007) --- pom.xml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pom.xml b/pom.xml index 13f99490d1ce..f353f4712a79 100644 --- a/pom.xml +++ b/pom.xml @@ -1884,7 +1884,7 @@ net.alchim31.maven scala-maven-plugin - 3.2.2 + 4.9.0 add-source @@ -1920,6 +1920,9 @@ ${scala.version} + ${jdk.version} + ${jdk.version} + ${jdk.version} -unchecked -deprecation @@ -1930,10 +1933,6 @@ -Xmx1024m - -source - ${jdk.version} - -target - ${jdk.version} -Xlint:all,-serial,-path From 3f0b748e140c1f178c9fef16de4d9bf6f64d6b74 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 25 Apr 2024 09:44:47 -0700 Subject: [PATCH 076/102] Bump aws.sdk.version from 2.25.37 to 2.25.38 (#13006) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index f353f4712a79..d40fe84eb0e3 100644 --- a/pom.xml +++ b/pom.xml @@ -172,7 +172,7 @@ 0.15.0 0.4.4 4.2.2 - 2.25.37 + 2.25.38 2.12.7 3.1.12 7.10.1 From 84a4c70327fcdb7078662d9e697610809d29df1c Mon Sep 17 00:00:00 2001 From: Yash Mayya Date: Fri, 26 Apr 2024 03:46:59 +0530 Subject: [PATCH 077/102] Re-enable the Spotless plugin for Java 21 (#12992) --- pinot-common/pom.xml | 38 +++++++++++++------------------------- pom.xml | 19 ++++--------------- 2 files changed, 17 insertions(+), 40 deletions(-) diff --git a/pinot-common/pom.xml b/pinot-common/pom.xml index 2381f024d46b..32cd2eb9dbce 100644 --- a/pinot-common/pom.xml +++ b/pinot-common/pom.xml @@ -62,6 +62,19 @@ protobuf-maven-plugin + + com.diffplug.spotless + spotless-maven-plugin + + + + src/main/java/org/apache/pinot/common/request/*.java + src/main/java/org/apache/pinot/common/response/ProcessingException.java + + + + + @@ -129,31 +142,6 @@ - - - - - com.diffplug.spotless - spotless-maven-plugin - - - - src/main/java/**/*.java - src/test/java/**/*.java - - - src/main/java/org/apache/pinot/common/request/*.java - src/main/java/org/apache/pinot/common/response/ProcessingException.java - - - ,\# - - - - - - - diff --git a/pom.xml b/pom.xml index d40fe84eb0e3..978320c58331 100644 --- a/pom.xml +++ b/pom.xml @@ -254,20 +254,6 @@ - - not-java-21 - - !21 - - - - - com.diffplug.spotless - spotless-maven-plugin - - - - github-actions @@ -1609,7 +1595,6 @@ 2.43.0 - verify check @@ -2063,6 +2048,10 @@ sonar-maven-plugin 2.7.1 + + com.diffplug.spotless + spotless-maven-plugin + com.mycila license-maven-plugin From fc98ce1d53710d333ca652304cd0f1c1f1fa8e1b Mon Sep 17 00:00:00 2001 From: Gonzalo Ortiz Jaureguizar Date: Fri, 26 Apr 2024 00:20:02 +0200 Subject: [PATCH 078/102] Use ArrayList instead of LinkedList in SortOperator (#12783) --- .../query/runtime/operator/SortOperator.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/SortOperator.java b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/SortOperator.java index ce4ddf130f91..b0a1923c808b 100644 --- a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/SortOperator.java +++ b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/SortOperator.java @@ -21,7 +21,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; import java.util.ArrayList; -import java.util.LinkedList; +import java.util.Arrays; import java.util.List; import java.util.PriorityQueue; import javax.annotation.Nullable; @@ -129,16 +129,16 @@ private TransferableBlock produceSortedBlock() { return TransferableBlockUtils.getEndOfStreamTransferableBlock(); } } else { - LinkedList rows = new LinkedList<>(); - while (_priorityQueue.size() > _offset) { - Object[] row = _priorityQueue.poll(); - rows.addFirst(row); - } - if (rows.size() == 0) { + int resultSize = _priorityQueue.size() - _offset; + if (resultSize <= 0) { return TransferableBlockUtils.getEndOfStreamTransferableBlock(); - } else { - return new TransferableBlock(rows, _dataSchema, DataBlock.Type.ROW); } + Object[][] rowsArr = new Object[resultSize][]; + for (int i = resultSize - 1; i >= 0; i--) { + Object[] row = _priorityQueue.poll(); + rowsArr[i] = row; + } + return new TransferableBlock(Arrays.asList(rowsArr), _dataSchema, DataBlock.Type.ROW); } } From 97a2e6d95eb87c67405cd661371a51acc478adec Mon Sep 17 00:00:00 2001 From: Christopher Peck <27231838+itschrispeck@users.noreply.github.com> Date: Thu, 25 Apr 2024 16:14:26 -0700 Subject: [PATCH 079/102] fix TextMatchFilterOperator boolean grouping (#13009) --- .../filter/TextMatchFilterOptimizer.java | 2 +- .../query/optimizer/QueryOptimizerTest.java | 29 ++++++++++--------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TextMatchFilterOptimizer.java b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TextMatchFilterOptimizer.java index aca4e2d5ccf0..8c742cfc98a4 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TextMatchFilterOptimizer.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TextMatchFilterOptimizer.java @@ -162,7 +162,7 @@ private Expression getNewFilter(String operator, List newChildren, mergedTextMatchFilter = String.join(SPACE + operator + SPACE, literals); } Expression mergedTextMatchExpression = RequestUtils.getFunctionExpression(FilterKind.TEXT_MATCH.name()); - Expression mergedTextMatchFilterExpression = RequestUtils.getLiteralExpression(mergedTextMatchFilter); + Expression mergedTextMatchFilterExpression = RequestUtils.getLiteralExpression("(" + mergedTextMatchFilter + ")"); mergedTextMatchExpression.getFunctionCall() .setOperands(Arrays.asList(entry.getKey(), mergedTextMatchFilterExpression)); diff --git a/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/QueryOptimizerTest.java b/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/QueryOptimizerTest.java index 337075f9746c..848f458742d2 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/QueryOptimizerTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/QueryOptimizerTest.java @@ -178,7 +178,7 @@ public void testMergeTextMatchFilter() { List operands = filterFunction.getOperands(); assertEquals(operands.size(), 2); assertEquals(operands.get(0), RequestUtils.getIdentifierExpression("string")); - assertEquals(operands.get(1), RequestUtils.getLiteralExpression("foo AND bar OR baz")); + assertEquals(operands.get(1), RequestUtils.getLiteralExpression("((foo AND bar) OR baz)")); } private static Expression getRangeFilterExpression(String column, String rangeString) { @@ -268,32 +268,35 @@ public void testQueries() { // TextMatchFilterOptimizer testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo') AND TEXT_MATCH(string, 'bar')", - "SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo AND bar')"); + "SELECT * FROM testTable WHERE TEXT_MATCH(string, '(foo AND bar)')"); testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string, '\"foo bar\"') AND TEXT_MATCH(string, 'baz')", - "SELECT * FROM testTable WHERE TEXT_MATCH(string, '\"foo bar\" AND baz')"); + "SELECT * FROM testTable WHERE TEXT_MATCH(string, '(\"foo bar\" AND baz)')"); testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string, '\"foo bar\"') AND TEXT_MATCH(string, '/.*ooba.*/')", - "SELECT * FROM testTable WHERE TEXT_MATCH(string, '\"foo bar\" AND /.*ooba.*/')"); + "SELECT * FROM testTable WHERE TEXT_MATCH(string, '(\"foo bar\" AND /.*ooba.*/)')"); testQuery("SELECT * FROM testTable WHERE int = 1 AND TEXT_MATCH(string, 'foo') AND TEXT_MATCH(string, 'bar')", - "SELECT * FROM testTable WHERE int = 1 AND TEXT_MATCH(string, 'foo AND bar')"); + "SELECT * FROM testTable WHERE int = 1 AND TEXT_MATCH(string, '(foo AND bar)')"); testQuery("SELECT * FROM testTable WHERE int = 1 OR TEXT_MATCH(string, 'foo') AND TEXT_MATCH(string, 'bar')", - "SELECT * FROM testTable WHERE int = 1 OR TEXT_MATCH(string, 'foo AND bar')"); + "SELECT * FROM testTable WHERE int = 1 OR TEXT_MATCH(string, '(foo AND bar)')"); testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo') AND NOT TEXT_MATCH(string, 'bar')", - "SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo AND NOT bar')"); + "SELECT * FROM testTable WHERE TEXT_MATCH(string, '(foo AND NOT bar)')"); testQuery("SELECT * FROM testTable WHERE NOT TEXT_MATCH(string, 'foo') AND TEXT_MATCH(string, 'bar')", - "SELECT * FROM testTable WHERE TEXT_MATCH(string, 'NOT foo AND bar')"); + "SELECT * FROM testTable WHERE TEXT_MATCH(string, '(NOT foo AND bar)')"); testQuery("SELECT * FROM testTable WHERE NOT TEXT_MATCH(string, 'foo') AND NOT TEXT_MATCH(string, 'bar')", - "SELECT * FROM testTable WHERE NOT TEXT_MATCH(string, 'foo OR bar')"); + "SELECT * FROM testTable WHERE NOT TEXT_MATCH(string, '(foo OR bar)')"); testQuery("SELECT * FROM testTable WHERE NOT TEXT_MATCH(string, 'foo') OR NOT TEXT_MATCH(string, 'bar')", - "SELECT * FROM testTable WHERE NOT TEXT_MATCH(string, 'foo AND bar')"); + "SELECT * FROM testTable WHERE NOT TEXT_MATCH(string, '(foo AND bar)')"); testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo') AND TEXT_MATCH(string, 'bar') OR " - + "TEXT_MATCH(string, 'baz')", "SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo AND bar OR baz')"); + + "TEXT_MATCH(string, 'baz')", "SELECT * FROM testTable WHERE TEXT_MATCH(string, '((foo AND bar) OR baz)')"); + testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo') AND (TEXT_MATCH(string, 'bar') OR " + + "TEXT_MATCH(string, 'baz'))", "SELECT * FROM testTable WHERE TEXT_MATCH(string, '(foo AND (bar OR baz))')"); testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string1, 'foo1') AND TEXT_MATCH(string1, 'bar1') OR " + "TEXT_MATCH(string1, 'baz1') AND TEXT_MATCH(string2, 'foo')", - "SELECT * FROM testTable WHERE TEXT_MATCH(string1, 'foo1 AND bar1') OR TEXT_MATCH(string1, 'baz1') AND " + "SELECT * FROM testTable WHERE TEXT_MATCH(string1, '(foo1 AND bar1)') OR TEXT_MATCH(string1, 'baz1') AND " + "TEXT_MATCH(string2, 'foo')"); testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string1, 'foo1') AND TEXT_MATCH(string1, 'bar1')" + "AND TEXT_MATCH(string2, 'foo2') AND TEXT_MATCH(string2, 'bar2')", - "SELECT * FROM testTable WHERE TEXT_MATCH(string1, 'foo1 AND bar1') AND TEXT_MATCH(string2, 'foo2 AND bar2')"); + "SELECT * FROM testTable WHERE TEXT_MATCH(string1, '(foo1 AND bar1)') AND TEXT_MATCH(string2, '(foo2 AND " + + "bar2)')"); testCannotOptimizeQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string1, 'foo') OR TEXT_MATCH(string2, 'bar')"); testCannotOptimizeQuery( "SELECT * FROM testTable WHERE int = 1 AND TEXT_MATCH(string, 'foo') OR TEXT_MATCH(string, 'bar')"); From 2fb30c0c0f74b3a7108f2a5749b1b332156b4fc3 Mon Sep 17 00:00:00 2001 From: Gonzalo Ortiz Jaureguizar Date: Fri, 26 Apr 2024 01:15:42 +0200 Subject: [PATCH 080/102] Add some multi-stage metrics (#12982) --- .../MultiStageBrokerRequestHandler.java | 6 ++++++ .../pinot/common/metrics/BrokerMeter.java | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/MultiStageBrokerRequestHandler.java b/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/MultiStageBrokerRequestHandler.java index 01e4884d6a08..01bfe456b5b1 100644 --- a/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/MultiStageBrokerRequestHandler.java +++ b/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/MultiStageBrokerRequestHandler.java @@ -170,6 +170,12 @@ protected BrokerResponse handleRequest(long requestId, String query, @Nullable S DispatchableSubPlan dispatchableSubPlan = queryPlanResult.getQueryPlan(); Set tableNames = queryPlanResult.getTableNames(); + + _brokerMetrics.addMeteredGlobalValue(BrokerMeter.MULTI_STAGE_QUERIES_GLOBAL, 1); + for (String tableName : tableNames) { + _brokerMetrics.addMeteredTableValue(tableName, BrokerMeter.MULTI_STAGE_QUERIES, 1); + } + requestContext.setTableNames(List.copyOf(tableNames)); // Compilation Time. This includes the time taken for parsing, compiling, create stage plans and assigning workers. diff --git a/pinot-common/src/main/java/org/apache/pinot/common/metrics/BrokerMeter.java b/pinot-common/src/main/java/org/apache/pinot/common/metrics/BrokerMeter.java index bb76591ab003..006ee458ebb6 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/metrics/BrokerMeter.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/metrics/BrokerMeter.java @@ -29,7 +29,26 @@ public enum BrokerMeter implements AbstractMetrics.Meter { UNCAUGHT_POST_EXCEPTIONS("exceptions", true), HEALTHCHECK_BAD_CALLS("healthcheck", true), HEALTHCHECK_OK_CALLS("healthcheck", true), + /** + * Number of queries executed. + *

+ * At this moment this counter does not include queries executed in multi-stage mode. + */ QUERIES("queries", false), + /** + * Number of multi-stage queries that have been started. + *

+ * Unlike {@link #MULTI_STAGE_QUERIES}, this metric is global and not attached to a particular table. + * That means it can be used to know how many multi-stage queries have been started in total. + */ + MULTI_STAGE_QUERIES_GLOBAL("queries", true), + /** + * Number of multi-stage queries that have been started touched a given table. + *

+ * In case the query touch multiple tables (ie using joins)1, this metric will be incremented for each table, so the + * sum of this metric across all tables should be greater or equal than {@link #MULTI_STAGE_QUERIES_GLOBAL}. + */ + MULTI_STAGE_QUERIES("queries", false), // These metrics track the exceptions caught during query execution in broker side. // Query rejected by Jersey thread pool executor From 571214daf14780cb4b832e142b2c65a395d86a0a Mon Sep 17 00:00:00 2001 From: Shounak kulkarni Date: Fri, 26 Apr 2024 11:44:13 +0500 Subject: [PATCH 081/102] Metric for count of tables configured with various tier backends (#12940) * Metric for count of tables using various tier backends * avoid multi tiered double counting * remove unused import * Handle metrics deletion * minor * Metric for count of tables using various tier backends * avoid multi tiered double counting * remove unused import * Handle metrics deletion * minor * reformat metric naming * Revert "Merge branch 'tier-backend-metric' of https://github.com/shounakmk219/pinot into tier-backend-metric" This reverts commit 9646c83517db2afd1a0a95805ea0c73f668b2a8e, reversing changes made to a3f3d10eb2e8f42ecef56eff9f345fd34292e61c. --- .../configs/controller.yml | 3 ++ .../pinot/common/metrics/AbstractMetrics.java | 4 +++ .../pinot/common/metrics/ControllerGauge.java | 1 + .../helix/SegmentStatusChecker.java | 28 +++++++++++++++++++ 4 files changed, 36 insertions(+) diff --git a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml index a441f714e4a6..a036a130533e 100644 --- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml +++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml @@ -85,6 +85,9 @@ rules: - pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_offlineTableCount_$1" cache: true +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" + name: "pinot_controller_tierBackendTableCount_$1_$2" + cache: true - pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_validateion_$4_$5" cache: true diff --git a/pinot-common/src/main/java/org/apache/pinot/common/metrics/AbstractMetrics.java b/pinot-common/src/main/java/org/apache/pinot/common/metrics/AbstractMetrics.java index ee13493e15db..dfdc2abb0f29 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/metrics/AbstractMetrics.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/metrics/AbstractMetrics.java @@ -766,6 +766,10 @@ private String composeTableGaugeName(final String tableName, final String key, f return gauge.getGaugeName() + "." + getTableName(tableName) + "." + key; } + public String composePluginGaugeName(String pluginName, Gauge gauge) { + return gauge.getGaugeName() + "." + pluginName; + } + /** * Remove gauge from Pinot metrics. * @param gaugeName gauge name diff --git a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java index 938242ef7885..82c4e666e16a 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java @@ -65,6 +65,7 @@ public enum ControllerGauge implements AbstractMetrics.Gauge { NUM_MINION_SUBTASKS_ERROR("NumMinionSubtasksError", true), PERCENT_MINION_SUBTASKS_IN_QUEUE("PercentMinionSubtasksInQueue", true), PERCENT_MINION_SUBTASKS_IN_ERROR("PercentMinionSubtasksInError", true), + TIER_BACKEND_TABLE_COUNT("TierBackendTableCount", true), // Pinot controller leader PINOT_CONTROLLER_LEADER("PinotControllerLeader", true), diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java index fa265c4f52f7..4bcdc2f51649 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java @@ -19,6 +19,7 @@ package org.apache.pinot.controller.helix; import com.google.common.annotations.VisibleForTesting; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -50,6 +51,7 @@ import org.apache.pinot.controller.util.TableSizeReader; import org.apache.pinot.spi.config.table.TableConfig; import org.apache.pinot.spi.config.table.TableType; +import org.apache.pinot.spi.config.table.TierConfig; import org.apache.pinot.spi.stream.StreamConfig; import org.apache.pinot.spi.utils.IngestionConfigUtils; import org.apache.pinot.spi.utils.builder.TableNameBuilder; @@ -78,6 +80,7 @@ public class SegmentStatusChecker extends ControllerPeriodicTask _tierBackendGauges = new HashSet<>(); /** * Constructs the segment status checker. @@ -135,6 +138,17 @@ protected void postprocess(Context context) { _controllerMetrics.setValueOfGlobalGauge(ControllerGauge.UPSERT_TABLE_COUNT, context._upsertTableCount); _controllerMetrics.setValueOfGlobalGauge(ControllerGauge.DISABLED_TABLE_COUNT, context._disabledTables.size()); + _tierBackendGauges.forEach(_controllerMetrics::removeGauge); + // metric for total number of tables using a particular tier backend + context._tierBackendTableCountMap.forEach((tier, count) -> { + String gaugeName = _controllerMetrics.composePluginGaugeName(tier, ControllerGauge.TIER_BACKEND_TABLE_COUNT); + _tierBackendGauges.add(gaugeName); + _controllerMetrics.setOrUpdateGauge(gaugeName, count); + }); + // metric for total number of tables having tier backend configured + _controllerMetrics.setOrUpdateGauge(ControllerGauge.TIER_BACKEND_TABLE_COUNT.getGaugeName(), + context._tierBackendConfiguredTableCount); + //emit a 0 for tables that are not paused/disabled. This makes alert expressions simpler as we don't have to deal // with missing metrics context._processedTables.forEach(tableNameWithType -> { @@ -171,6 +185,18 @@ private void updateTableConfigMetrics(String tableNameWithType, TableConfig tabl if (tableConfig.isUpsertEnabled()) { context._upsertTableCount++; } + List tierConfigList = tableConfig.getTierConfigsList(); + if (tierConfigList != null && !tierConfigList.isEmpty()) { + Set tierBackendSet = new HashSet<>(tierConfigList.size()); + for (TierConfig config : tierConfigList) { + if (config.getTierBackend() != null) { + tierBackendSet.add(config.getTierBackend()); + } + } + tierBackendSet.forEach(tierBackend -> context._tierBackendTableCountMap.put(tierBackend, + context._tierBackendTableCountMap.getOrDefault(tierBackend, 0) + 1)); + context._tierBackendConfiguredTableCount += tierBackendSet.isEmpty() ? 0 : 1; + } int replication = tableConfig.getReplication(); _controllerMetrics.setValueOfTableGauge(tableNameWithType, ControllerGauge.REPLICATION_FROM_CONFIG, replication); } @@ -391,6 +417,8 @@ public static final class Context { private int _realTimeTableCount; private int _offlineTableCount; private int _upsertTableCount; + private int _tierBackendConfiguredTableCount; + private Map _tierBackendTableCountMap = new HashMap<>(); private Set _processedTables = new HashSet<>(); private Set _disabledTables = new HashSet<>(); private Set _pausedTables = new HashSet<>(); From e9cba4991ad4542f1c5a62c197533ffbea1e48bb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 26 Apr 2024 10:16:48 -0700 Subject: [PATCH 082/102] Bump aws.sdk.version from 2.25.38 to 2.25.39 (#13012) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 978320c58331..8a9e80ae6e45 100644 --- a/pom.xml +++ b/pom.xml @@ -172,7 +172,7 @@ 0.15.0 0.4.4 4.2.2 - 2.25.38 + 2.25.39 2.12.7 3.1.12 7.10.1 From cb687834c18d39fc7e59025188ceb68634b56789 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 26 Apr 2024 10:17:07 -0700 Subject: [PATCH 083/102] Bump circe.version from 0.14.6 to 0.14.7 (#13013) --- pinot-connectors/pinot-spark-common/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pinot-connectors/pinot-spark-common/pom.xml b/pinot-connectors/pinot-spark-common/pom.xml index 4da9dd3d5571..f26f6a0de25f 100644 --- a/pinot-connectors/pinot-spark-common/pom.xml +++ b/pinot-connectors/pinot-spark-common/pom.xml @@ -33,7 +33,7 @@ https://pinot.apache.org/ ${basedir}/../.. - 0.14.6 + 0.14.7 2.8 2.3.0 3.2.18 From 5fc89ce4530f856756a3ca6357d90deea9365032 Mon Sep 17 00:00:00 2001 From: "Xiaotian (Jackie) Jiang" <17555551+Jackie-Jiang@users.noreply.github.com> Date: Fri, 26 Apr 2024 13:06:25 -0700 Subject: [PATCH 084/102] Support NOT in StarTree Index (#12988) --- .../SortedIndexBasedFilterOperator.java | 6 +- ...BaseDictionaryBasedPredicateEvaluator.java | 55 ++++++-- .../predicate/BasePredicateEvaluator.java | 10 -- .../EqualsPredicateEvaluatorFactory.java | 16 +-- ...TBasedRegexpPredicateEvaluatorFactory.java | 30 ++--- .../InPredicateEvaluatorFactory.java | 37 ++---- .../NotEqualsPredicateEvaluatorFactory.java | 41 +----- .../NotInPredicateEvaluatorFactory.java | 62 +++------ .../filter/predicate/PredicateEvaluator.java | 23 +--- .../filter/predicate/PredicateUtils.java | 34 +++++ .../RangePredicateEvaluatorFactory.java | 96 ++++++++------ .../RegexpLikePredicateEvaluatorFactory.java | 22 +-- .../startree/CompositePredicateEvaluator.java | 17 +-- .../pinot/core/startree/StarTreeUtils.java | 125 ++++++++++++------ .../operator/StarTreeFilterOperator.java | 68 +++++++--- .../core/startree/v2/BaseStarTreeV2Test.java | 20 ++- .../perf/BenchmarkScanDocIdIterators.java | 10 -- 17 files changed, 357 insertions(+), 315 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/SortedIndexBasedFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/SortedIndexBasedFilterOperator.java index d32c68c7e53e..09144af9ff6d 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/SortedIndexBasedFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/SortedIndexBasedFilterOperator.java @@ -20,7 +20,6 @@ import com.google.common.base.Preconditions; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.List; import org.apache.pinot.core.common.BlockDocIdSet; @@ -90,10 +89,7 @@ protected BlockDocIdSet getNextBlockWithoutNullHandling() { return new SortedDocIdSet(Collections.singletonList(docIdRange)); } } else { - // Sort the dictIds in ascending order so that their respective docIdRanges are adjacent if they are adjacent - Arrays.sort(dictIds); - - // Merge adjacent docIdRanges + // Merge adjacent docIdRanges (dictIds are already sorted) List docIdRanges = new ArrayList<>(); IntPair lastDocIdRange = _sortedIndexReader.getDocIds(dictIds[0]); for (int i = 1; i < numDictIds; i++) { diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseDictionaryBasedPredicateEvaluator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseDictionaryBasedPredicateEvaluator.java index 92050c3cefa0..18d15d367353 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseDictionaryBasedPredicateEvaluator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseDictionaryBasedPredicateEvaluator.java @@ -18,17 +18,34 @@ */ package org.apache.pinot.core.operator.filter.predicate; +import it.unimi.dsi.fastutil.ints.IntArrayList; +import it.unimi.dsi.fastutil.ints.IntList; import java.math.BigDecimal; import org.apache.pinot.common.request.context.predicate.Predicate; +import org.apache.pinot.segment.spi.index.reader.Dictionary; import org.apache.pinot.spi.data.FieldSpec.DataType; public abstract class BaseDictionaryBasedPredicateEvaluator extends BasePredicateEvaluator { + protected final Dictionary _dictionary; protected boolean _alwaysTrue; protected boolean _alwaysFalse; + protected int[] _matchingDictIds; + protected int[] _nonMatchingDictIds; - protected BaseDictionaryBasedPredicateEvaluator(Predicate predicate) { + protected BaseDictionaryBasedPredicateEvaluator(Predicate predicate, Dictionary dictionary) { super(predicate); + _dictionary = dictionary; + } + + @Override + public final boolean isDictionaryBased() { + return true; + } + + @Override + public DataType getDataType() { + return DataType.INT; } @Override @@ -42,13 +59,33 @@ public boolean isAlwaysFalse() { } @Override - public final boolean isDictionaryBased() { - return true; + public int[] getMatchingDictIds() { + if (_matchingDictIds == null) { + _matchingDictIds = calculateMatchingDictIds(); + } + return _matchingDictIds; } - @Override - public DataType getDataType() { - return DataType.INT; + protected int[] calculateMatchingDictIds() { + IntList matchingDictIds = new IntArrayList(); + int dictionarySize = _dictionary.length(); + for (int dictId = 0; dictId < dictionarySize; dictId++) { + if (applySV(dictId)) { + matchingDictIds.add(dictId); + } + } + return matchingDictIds.toIntArray(); + } + + public int[] getNonMatchingDictIds() { + if (_nonMatchingDictIds == null) { + _nonMatchingDictIds = calculateNonMatchingDictIds(); + } + return _nonMatchingDictIds; + } + + protected int[] calculateNonMatchingDictIds() { + return PredicateUtils.flipDictIds(getMatchingDictIds(), _dictionary.length()); } @Override @@ -106,12 +143,6 @@ public final boolean applyMV(byte[][] values, int length) { throw new UnsupportedOperationException(); } - // NOTE: override it for exclusive predicate - @Override - public int[] getNonMatchingDictIds() { - throw new UnsupportedOperationException(); - } - /** * Apply a single-value entry to the predicate. * diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BasePredicateEvaluator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BasePredicateEvaluator.java index 0e04954675c7..407e619ae33d 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BasePredicateEvaluator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BasePredicateEvaluator.java @@ -42,14 +42,4 @@ public Predicate.Type getPredicateType() { public final boolean isExclusive() { return getPredicateType().isExclusive(); } - - @Override - public int getNumMatchingDictIds() { - return getMatchingDictIds().length; - } - - @Override - public int getNumNonMatchingDictIds() { - return getNonMatchingDictIds().length; - } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/EqualsPredicateEvaluatorFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/EqualsPredicateEvaluatorFactory.java index 14616b36be78..bf99e6c933e5 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/EqualsPredicateEvaluatorFactory.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/EqualsPredicateEvaluatorFactory.java @@ -62,8 +62,7 @@ public static BaseDictionaryBasedPredicateEvaluator newDictionaryBasedEvaluator( * @param dataType Data type for the column * @return Raw value based EQ predicate evaluator */ - public static EqRawPredicateEvaluator newRawValueBasedEvaluator(EqPredicate eqPredicate, - DataType dataType) { + public static EqRawPredicateEvaluator newRawValueBasedEvaluator(EqPredicate eqPredicate, DataType dataType) { String value = eqPredicate.getValue(); switch (dataType) { case INT: @@ -92,10 +91,9 @@ public static EqRawPredicateEvaluator newRawValueBasedEvaluator(EqPredicate eqPr private static final class DictionaryBasedEqPredicateEvaluator extends BaseDictionaryBasedPredicateEvaluator implements IntValue { final int _matchingDictId; - final int[] _matchingDictIds; DictionaryBasedEqPredicateEvaluator(EqPredicate eqPredicate, Dictionary dictionary, DataType dataType) { - super(eqPredicate); + super(eqPredicate, dictionary); String predicateValue = PredicateUtils.getStoredValue(eqPredicate.getValue(), dataType); _matchingDictId = dictionary.indexOf(predicateValue); if (_matchingDictId >= 0) { @@ -109,6 +107,11 @@ private static final class DictionaryBasedEqPredicateEvaluator extends BaseDicti } } + @Override + protected int[] calculateNonMatchingDictIds() { + return PredicateUtils.getDictIds(_dictionary.length(), _matchingDictId); + } + @Override public int getNumMatchingItems() { return 1; @@ -132,11 +135,6 @@ public int applySV(int limit, int[] docIds, int[] values) { return matches; } - @Override - public int[] getMatchingDictIds() { - return _matchingDictIds; - } - @Override public int getInt() { return _matchingDictId; diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/FSTBasedRegexpPredicateEvaluatorFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/FSTBasedRegexpPredicateEvaluatorFactory.java index b1a0559a92a2..11dbe7aa995c 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/FSTBasedRegexpPredicateEvaluatorFactory.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/FSTBasedRegexpPredicateEvaluatorFactory.java @@ -50,30 +50,29 @@ public static BaseDictionaryBasedPredicateEvaluator newFSTBasedEvaluator(RegexpL * Matches regexp query using FSTIndexReader. */ private static class FSTBasedRegexpPredicateEvaluator extends BaseDictionaryBasedPredicateEvaluator { - final Dictionary _dictionary; - final ImmutableRoaringBitmap _dictIds; + final ImmutableRoaringBitmap _matchingDictIdBitmap; public FSTBasedRegexpPredicateEvaluator(RegexpLikePredicate regexpLikePredicate, TextIndexReader fstIndexReader, Dictionary dictionary) { - super(regexpLikePredicate); - _dictionary = dictionary; + super(regexpLikePredicate, dictionary); String searchQuery = RegexpPatternConverterUtils.regexpLikeToLuceneRegExp(regexpLikePredicate.getValue()); - _dictIds = fstIndexReader.getDictIds(searchQuery); - } - - @Override - public boolean isAlwaysFalse() { - return _dictIds.isEmpty(); + _matchingDictIdBitmap = fstIndexReader.getDictIds(searchQuery); + int numMatchingDictIds = _matchingDictIdBitmap.getCardinality(); + if (numMatchingDictIds == 0) { + _alwaysFalse = true; + } else if (dictionary.length() == numMatchingDictIds) { + _alwaysTrue = true; + } } @Override - public boolean isAlwaysTrue() { - return _dictIds.getCardinality() == _dictionary.length(); + protected int[] calculateMatchingDictIds() { + return _matchingDictIdBitmap.toArray(); } @Override public boolean applySV(int dictId) { - return _dictIds.contains(dictId); + return _matchingDictIdBitmap.contains(dictId); } @Override @@ -88,10 +87,5 @@ public int applySV(int limit, int[] docIds, int[] values) { } return matches; } - - @Override - public int[] getMatchingDictIds() { - return _dictIds.toArray(); - } } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/InPredicateEvaluatorFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/InPredicateEvaluatorFactory.java index 9ad0a78014c2..5ebc9a1a6beb 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/InPredicateEvaluatorFactory.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/InPredicateEvaluatorFactory.java @@ -71,8 +71,7 @@ public static BaseDictionaryBasedPredicateEvaluator newDictionaryBasedEvaluator( * @param dataType Data type for the column * @return Raw value based IN predicate evaluator */ - public static InRawPredicateEvaluator newRawValueBasedEvaluator(InPredicate inPredicate, - DataType dataType) { + public static InRawPredicateEvaluator newRawValueBasedEvaluator(InPredicate inPredicate, DataType dataType) { switch (dataType) { case INT: { int[] intValues = inPredicate.getIntValues(); @@ -157,42 +156,34 @@ public static InRawPredicateEvaluator newRawValueBasedEvaluator(InPredicate inPr private static final class DictionaryBasedInPredicateEvaluator extends BaseDictionaryBasedPredicateEvaluator { final IntSet _matchingDictIdSet; - final int _numMatchingDictIds; - int[] _matchingDictIds; DictionaryBasedInPredicateEvaluator(InPredicate inPredicate, Dictionary dictionary, DataType dataType, @Nullable QueryContext queryContext) { - super(inPredicate); + super(inPredicate, dictionary); _matchingDictIdSet = PredicateUtils.getDictIdSet(inPredicate, dictionary, dataType, queryContext); - _numMatchingDictIds = _matchingDictIdSet.size(); - if (_numMatchingDictIds == 0) { + int numMatchingDictIds = _matchingDictIdSet.size(); + if (numMatchingDictIds == 0) { _alwaysFalse = true; - } else if (dictionary.length() == _numMatchingDictIds) { + } else if (dictionary.length() == numMatchingDictIds) { _alwaysTrue = true; } } @Override - public boolean applySV(int dictId) { - return _matchingDictIdSet.contains(dictId); - } - - @Override - public int getNumMatchingDictIds() { - return _numMatchingDictIds; + protected int[] calculateMatchingDictIds() { + int[] matchingDictIds = _matchingDictIdSet.toIntArray(); + Arrays.sort(matchingDictIds); + return matchingDictIds; } @Override public int getNumMatchingItems() { - return getNumMatchingDictIds(); + return _matchingDictIdSet.size(); } @Override - public int[] getMatchingDictIds() { - if (_matchingDictIds == null) { - _matchingDictIds = _matchingDictIdSet.toIntArray(); - } - return _matchingDictIds; + public boolean applySV(int dictId) { + return _matchingDictIdSet.contains(dictId); } @Override @@ -477,9 +468,7 @@ public boolean applySV(byte[] value) { @Override public R accept(MultiValueVisitor visitor) { - byte[][] bytes = _matchingValues.stream() - .map(ByteArray::getBytes) - .toArray(byte[][]::new); + byte[][] bytes = _matchingValues.stream().map(ByteArray::getBytes).toArray(byte[][]::new); return visitor.visitBytes(bytes); } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotEqualsPredicateEvaluatorFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotEqualsPredicateEvaluatorFactory.java index 54ce7df58cb1..fdcff7c579c3 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotEqualsPredicateEvaluatorFactory.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotEqualsPredicateEvaluatorFactory.java @@ -58,8 +58,7 @@ public static BaseDictionaryBasedPredicateEvaluator newDictionaryBasedEvaluator( * @param dataType Data type for the column * @return Raw value based NOT_EQ predicate evaluator */ - public static NeqRawPredicateEvaluator newRawValueBasedEvaluator(NotEqPredicate notEqPredicate, - DataType dataType) { + public static NeqRawPredicateEvaluator newRawValueBasedEvaluator(NotEqPredicate notEqPredicate, DataType dataType) { String value = notEqPredicate.getValue(); switch (dataType) { case INT: @@ -87,12 +86,9 @@ public static NeqRawPredicateEvaluator newRawValueBasedEvaluator(NotEqPredicate private static final class DictionaryBasedNeqPredicateEvaluator extends BaseDictionaryBasedPredicateEvaluator { final int _nonMatchingDictId; - final int[] _nonMatchingDictIds; - final Dictionary _dictionary; - int[] _matchingDictIds; DictionaryBasedNeqPredicateEvaluator(NotEqPredicate notEqPredicate, Dictionary dictionary, DataType dataType) { - super(notEqPredicate); + super(notEqPredicate, dictionary); String predicateValue = PredicateUtils.getStoredValue(notEqPredicate.getValue(), dataType); _nonMatchingDictId = dictionary.indexOf(predicateValue); if (_nonMatchingDictId >= 0) { @@ -104,7 +100,11 @@ private static final class DictionaryBasedNeqPredicateEvaluator extends BaseDict _nonMatchingDictIds = new int[0]; _alwaysTrue = true; } - _dictionary = dictionary; + } + + @Override + protected int[] calculateMatchingDictIds() { + return PredicateUtils.getDictIds(_dictionary.length(), _nonMatchingDictId); } @Override @@ -129,33 +129,6 @@ public int applySV(int limit, int[] docIds, int[] values) { } return matches; } - - @Override - public int[] getMatchingDictIds() { - if (_matchingDictIds == null) { - int dictionarySize = _dictionary.length(); - if (_nonMatchingDictId >= 0) { - _matchingDictIds = new int[dictionarySize - 1]; - int index = 0; - for (int dictId = 0; dictId < dictionarySize; dictId++) { - if (dictId != _nonMatchingDictId) { - _matchingDictIds[index++] = dictId; - } - } - } else { - _matchingDictIds = new int[dictionarySize]; - for (int dictId = 0; dictId < dictionarySize; dictId++) { - _matchingDictIds[dictId] = dictId; - } - } - } - return _matchingDictIds; - } - - @Override - public int[] getNonMatchingDictIds() { - return _nonMatchingDictIds; - } } public static abstract class NeqRawPredicateEvaluator extends BaseRawValueBasedPredicateEvaluator { diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotInPredicateEvaluatorFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotInPredicateEvaluatorFactory.java index 5fe7b51d3571..4682225aa7e1 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotInPredicateEvaluatorFactory.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotInPredicateEvaluatorFactory.java @@ -71,8 +71,7 @@ public static BaseDictionaryBasedPredicateEvaluator newDictionaryBasedEvaluator( * @param dataType Data type for the column * @return Raw value based NOT_IN predicate evaluator */ - public static NotInRawPredicateEvaluator newRawValueBasedEvaluator(NotInPredicate notInPredicate, - DataType dataType) { + public static NotInRawPredicateEvaluator newRawValueBasedEvaluator(NotInPredicate notInPredicate, DataType dataType) { switch (dataType) { case INT: { int[] intValues = notInPredicate.getIntValues(); @@ -157,27 +156,34 @@ public static NotInRawPredicateEvaluator newRawValueBasedEvaluator(NotInPredicat public static final class DictionaryBasedNotInPredicateEvaluator extends BaseDictionaryBasedPredicateEvaluator { final IntSet _nonMatchingDictIdSet; - final int _numNonMatchingDictIds; - final Dictionary _dictionary; - int[] _matchingDictIds; - int[] _nonMatchingDictIds; DictionaryBasedNotInPredicateEvaluator(NotInPredicate notInPredicate, Dictionary dictionary, DataType dataType, @Nullable QueryContext queryContext) { - super(notInPredicate); + super(notInPredicate, dictionary); _nonMatchingDictIdSet = PredicateUtils.getDictIdSet(notInPredicate, dictionary, dataType, queryContext); - _numNonMatchingDictIds = _nonMatchingDictIdSet.size(); - if (_numNonMatchingDictIds == 0) { + int numNonMatchingDictIds = _nonMatchingDictIdSet.size(); + if (numNonMatchingDictIds == 0) { _alwaysTrue = true; - } else if (dictionary.length() == _numNonMatchingDictIds) { + } else if (dictionary.length() == numNonMatchingDictIds) { _alwaysFalse = true; } - _dictionary = dictionary; + } + + @Override + protected int[] calculateMatchingDictIds() { + return PredicateUtils.flipDictIds(getNonMatchingDictIds(), _dictionary.length()); + } + + @Override + protected int[] calculateNonMatchingDictIds() { + int[] nonMatchingDictIds = _nonMatchingDictIdSet.toIntArray(); + Arrays.sort(nonMatchingDictIds); + return nonMatchingDictIds; } @Override public int getNumMatchingItems() { - return -_numNonMatchingDictIds; + return -_nonMatchingDictIdSet.size(); } @Override @@ -197,34 +203,6 @@ public int applySV(int limit, int[] docIds, int[] values) { } return matches; } - - @Override - public int[] getMatchingDictIds() { - if (_matchingDictIds == null) { - int dictionarySize = _dictionary.length(); - _matchingDictIds = new int[dictionarySize - _numNonMatchingDictIds]; - int index = 0; - for (int dictId = 0; dictId < dictionarySize; dictId++) { - if (!_nonMatchingDictIdSet.contains(dictId)) { - _matchingDictIds[index++] = dictId; - } - } - } - return _matchingDictIds; - } - - @Override - public int getNumNonMatchingDictIds() { - return _numNonMatchingDictIds; - } - - @Override - public int[] getNonMatchingDictIds() { - if (_nonMatchingDictIds == null) { - _nonMatchingDictIds = _nonMatchingDictIdSet.toIntArray(); - } - return _nonMatchingDictIds; - } } public static abstract class NotInRawPredicateEvaluator extends BaseRawValueBasedPredicateEvaluator { @@ -491,9 +469,7 @@ public boolean applySV(byte[] value) { @Override public R accept(MultiValueVisitor visitor) { - byte[][] bytes = _nonMatchingValues.stream() - .map(ByteArray::getBytes) - .toArray(byte[][]::new); + byte[][] bytes = _nonMatchingValues.stream().map(ByteArray::getBytes).toArray(byte[][]::new); return visitor.visitBytes(bytes); } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateEvaluator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateEvaluator.java index 889e28710794..09b420f48f04 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateEvaluator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateEvaluator.java @@ -102,35 +102,24 @@ default int applySV(int limit, int[] docIds, int[] values) { boolean applyMV(int[] values, int length); /** - * APIs for dictionary based predicate evaluator - */ - - /** - * return the number of matching items specified by predicate - * negative number indicates exclusive (not eq, not in) match - * return {@code Integer.MIN_VALUE} for not applicable + * Get the number of matching items. Negative number indicates exclusive (e.g. NOT_EQ, NOT_IN) match. Returns + * {@code Integer.MIN_VALUE} if not applicable. */ default int getNumMatchingItems() { return Integer.MIN_VALUE; - }; + } /** - * Get the number of matching dictionary ids. + * APIs for dictionary based predicate evaluator */ - int getNumMatchingDictIds(); /** - * Get the matching dictionary ids. + * Get the matching dictionary ids. The returned ids should be sorted. */ int[] getMatchingDictIds(); /** - * Get the number of non-matching dictionary ids. - */ - int getNumNonMatchingDictIds(); - - /** - * Get the non-matching dictionary ids. + * Get the non-matching dictionary ids. The returned ids should be sorted. */ int[] getNonMatchingDictIds(); diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateUtils.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateUtils.java index 9135a85f785b..c7b93cf086c7 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateUtils.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateUtils.java @@ -190,4 +190,38 @@ public static IntSet getDictIdSet(BaseInPredicate inPredicate, Dictionary dictio } return dictIdSet; } + + public static int[] flipDictIds(int[] dictIds, int length) { + int numDictIds = dictIds.length; + int[] flippedDictIds = new int[length - numDictIds]; + int flippedDictIdsIndex = 0; + int dictIdsIndex = 0; + for (int dictId = 0; dictId < length; dictId++) { + if (dictIdsIndex < numDictIds && dictId == dictIds[dictIdsIndex]) { + dictIdsIndex++; + } else { + flippedDictIds[flippedDictIdsIndex++] = dictId; + } + } + return flippedDictIds; + } + + public static int[] getDictIds(int length, int excludeId) { + int[] dictIds; + if (excludeId >= 0) { + dictIds = new int[length - 1]; + int index = 0; + for (int dictId = 0; dictId < length; dictId++) { + if (dictId != excludeId) { + dictIds[index++] = dictId; + } + } + } else { + dictIds = new int[length]; + for (int dictId = 0; dictId < length; dictId++) { + dictIds[dictId] = dictId; + } + } + return dictIds; + } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RangePredicateEvaluatorFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RangePredicateEvaluatorFactory.java index ca8e1f126a2c..e9bd3b4b0a7d 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RangePredicateEvaluatorFactory.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RangePredicateEvaluatorFactory.java @@ -122,11 +122,10 @@ public static final class SortedDictionaryBasedRangePredicateEvaluator extends B // Exclusive final int _endDictId; final int _numMatchingDictIds; - int[] _matchingDictIds; SortedDictionaryBasedRangePredicateEvaluator(RangePredicate rangePredicate, Dictionary dictionary, DataType dataType) { - super(rangePredicate); + super(rangePredicate, dictionary); String lowerBound = rangePredicate.getLowerBound(); String upperBound = rangePredicate.getUpperBound(); boolean lowerInclusive = rangePredicate.isLowerInclusive(); @@ -161,8 +160,8 @@ public static final class SortedDictionaryBasedRangePredicateEvaluator extends B } } - _numMatchingDictIds = _endDictId - _startDictId; - if (_numMatchingDictIds <= 0) { + _numMatchingDictIds = Integer.max(_endDictId - _startDictId, 0); + if (_numMatchingDictIds == 0) { _alwaysFalse = true; } else if (dictionary.length() == _numMatchingDictIds) { _alwaysTrue = true; @@ -178,46 +177,61 @@ public int getEndDictId() { } @Override - public boolean applySV(int dictId) { - return _startDictId <= dictId && _endDictId > dictId; + protected int[] calculateMatchingDictIds() { + if (_numMatchingDictIds == 0) { + return new int[0]; + } else { + int[] matchingDictIds = new int[_numMatchingDictIds]; + for (int i = 0; i < _numMatchingDictIds; i++) { + matchingDictIds[i] = _startDictId + i; + } + return matchingDictIds; + } } @Override - public int applySV(int limit, int[] docIds, int[] dictIds) { - // reimplemented here to ensure applySV can be inlined - int matches = 0; - for (int i = 0; i < limit; i++) { - int dictId = dictIds[i]; - if (applySV(dictId)) { - docIds[matches++] = docIds[i]; + protected int[] calculateNonMatchingDictIds() { + int dictionarySize = _dictionary.length(); + if (_numMatchingDictIds == 0) { + int[] nonMatchingDictIds = new int[dictionarySize]; + for (int i = 0; i < dictionarySize; i++) { + nonMatchingDictIds[i] = i; + } + return nonMatchingDictIds; + } else { + int[] nonMatchingDictIds = new int[dictionarySize - _numMatchingDictIds]; + int index = 0; + for (int i = 0; i < _startDictId; i++) { + nonMatchingDictIds[index++] = i; + } + for (int i = _endDictId; i < dictionarySize; i++) { + nonMatchingDictIds[index++] = i; } + return nonMatchingDictIds; } - return matches; } @Override - public int getNumMatchingDictIds() { + public int getNumMatchingItems() { return _numMatchingDictIds; } @Override - public int[] getMatchingDictIds() { - if (_matchingDictIds == null) { - if (_numMatchingDictIds <= 0) { - _matchingDictIds = new int[0]; - } else { - _matchingDictIds = new int[_numMatchingDictIds]; - for (int i = 0; i < _numMatchingDictIds; i++) { - _matchingDictIds[i] = _startDictId + i; - } - } - } - return _matchingDictIds; + public boolean applySV(int dictId) { + return _startDictId <= dictId && _endDictId > dictId; } @Override - public int getNumMatchingItems() { - return Math.max(_numMatchingDictIds, 0); + public int applySV(int limit, int[] docIds, int[] dictIds) { + // reimplemented here to ensure applySV can be inlined + int matches = 0; + for (int i = 0; i < limit; i++) { + int dictId = dictIds[i]; + if (applySV(dictId)) { + docIds[matches++] = docIds[i]; + } + } + return matches; } @Override @@ -238,15 +252,13 @@ private static final class UnsortedDictionaryBasedRangePredicateEvaluator // TODO: Tune this threshold private static final int DICT_ID_SET_BASED_CARDINALITY_THRESHOLD = 1000; - final Dictionary _dictionary; final boolean _dictIdSetBased; final IntSet _matchingDictIdSet; final BaseRawValueBasedPredicateEvaluator _rawValueBasedEvaluator; UnsortedDictionaryBasedRangePredicateEvaluator(RangePredicate rangePredicate, Dictionary dictionary, DataType dataType) { - super(rangePredicate); - _dictionary = dictionary; + super(rangePredicate, dictionary); int cardinality = dictionary.length(); if (cardinality < DICT_ID_SET_BASED_CARDINALITY_THRESHOLD) { _dictIdSetBased = true; @@ -274,6 +286,16 @@ private static final class UnsortedDictionaryBasedRangePredicateEvaluator } } + @Override + public int[] getMatchingDictIds() { + throw new UnsupportedOperationException(); + } + + @Override + public int getNumMatchingItems() { + return _matchingDictIdSet != null ? _matchingDictIdSet.size() : Integer.MIN_VALUE; + } + @Override public boolean applySV(int dictId) { if (_dictIdSetBased) { @@ -299,16 +321,6 @@ public boolean applySV(int dictId) { } } } - - @Override - public int getNumMatchingItems() { - return _matchingDictIdSet == null ? super.getNumMatchingItems() : _matchingDictIdSet.size(); - } - - @Override - public int[] getMatchingDictIds() { - throw new UnsupportedOperationException(); - } } private static final class IntRawValueBasedRangePredicateEvaluator extends BaseRawValueBasedPredicateEvaluator diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RegexpLikePredicateEvaluatorFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RegexpLikePredicateEvaluatorFactory.java index 82477c35600f..09ffdb62f0ac 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RegexpLikePredicateEvaluatorFactory.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RegexpLikePredicateEvaluatorFactory.java @@ -19,8 +19,6 @@ package org.apache.pinot.core.operator.filter.predicate; import com.google.common.base.Preconditions; -import it.unimi.dsi.fastutil.ints.IntArrayList; -import it.unimi.dsi.fastutil.ints.IntList; import java.util.regex.Matcher; import org.apache.pinot.common.request.context.predicate.RegexpLikePredicate; import org.apache.pinot.segment.spi.index.reader.Dictionary; @@ -66,12 +64,9 @@ private static final class DictionaryBasedRegexpLikePredicateEvaluator extends B // Reuse matcher to avoid excessive allocation. This is safe to do because the evaluator is always used // within the scope of a single thread. final Matcher _matcher; - final Dictionary _dictionary; - int[] _matchingDictIds; public DictionaryBasedRegexpLikePredicateEvaluator(RegexpLikePredicate regexpLikePredicate, Dictionary dictionary) { - super(regexpLikePredicate); - _dictionary = dictionary; + super(regexpLikePredicate, dictionary); _matcher = regexpLikePredicate.getPattern().matcher(""); } @@ -92,21 +87,6 @@ public int applySV(int limit, int[] docIds, int[] values) { } return matches; } - - @Override - public int[] getMatchingDictIds() { - if (_matchingDictIds == null) { - IntList matchingDictIds = new IntArrayList(); - int dictionarySize = _dictionary.length(); - for (int dictId = 0; dictId < dictionarySize; dictId++) { - if (applySV(dictId)) { - matchingDictIds.add(dictId); - } - } - _matchingDictIds = matchingDictIds.toIntArray(); - } - return _matchingDictIds; - } } private static final class RawValueBasedRegexpLikePredicateEvaluator extends BaseRawValueBasedPredicateEvaluator { diff --git a/pinot-core/src/main/java/org/apache/pinot/core/startree/CompositePredicateEvaluator.java b/pinot-core/src/main/java/org/apache/pinot/core/startree/CompositePredicateEvaluator.java index 9424bc0cdbd1..1725364aeeec 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/startree/CompositePredicateEvaluator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/startree/CompositePredicateEvaluator.java @@ -18,6 +18,7 @@ */ package org.apache.pinot.core.startree; +import it.unimi.dsi.fastutil.objects.ObjectBooleanPair; import java.util.List; import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluator; @@ -26,19 +27,19 @@ * Represents a composite predicate. * * A composite predicate evaluator represents a single predicate evaluator or multiple predicate evaluators conjoined - * with OR. - * Consider the given predicate: (d1 > 10 OR d1 < 50). A composite predicate will represent two predicates -- (d1 > 10) - * and (d1 < 50) and represent that they are related by the operator OR. + * with OR. Each predicate evaluator is associated with a boolean value indicating whether the predicate is negated. + * Consider the given predicate: (d1 > 10 OR NOT d1 > 50). A composite predicate will represent two predicates: + * (d1 > 10) and NOT(d1 > 50) and represent that they are related by the operator OR. */ public class CompositePredicateEvaluator { - private final List _predicateEvaluators; + private final List> _predicateEvaluators; - public CompositePredicateEvaluator(List predicateEvaluators) { + public CompositePredicateEvaluator(List> predicateEvaluators) { assert !predicateEvaluators.isEmpty(); _predicateEvaluators = predicateEvaluators; } - public List getPredicateEvaluators() { + public List> getPredicateEvaluators() { return _predicateEvaluators; } @@ -47,8 +48,8 @@ public List getPredicateEvaluators() { * predicate evaluator, {@code false} otherwise. */ public boolean apply(int dictId) { - for (PredicateEvaluator predicateEvaluator : _predicateEvaluators) { - if (predicateEvaluator.applySV(dictId)) { + for (ObjectBooleanPair predicateEvaluator : _predicateEvaluators) { + if (predicateEvaluator.left().applySV(dictId) != predicateEvaluator.rightBoolean()) { return true; } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/startree/StarTreeUtils.java b/pinot-core/src/main/java/org/apache/pinot/core/startree/StarTreeUtils.java index f79070ae9f6a..68bd26e7801a 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/startree/StarTreeUtils.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/startree/StarTreeUtils.java @@ -18,6 +18,7 @@ */ package org.apache.pinot.core.startree; +import it.unimi.dsi.fastutil.objects.ObjectBooleanPair; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Collections; @@ -75,13 +76,13 @@ public static AggregationFunctionColumnPair[] extractAggregationFunctionPairs( } /** - * Extracts a map from the column to a list of {@link PredicateEvaluator}s for it. Returns {@code null} if the filter - * cannot be solved by the star-tree. + * Extracts a map from the column to a list of {@link CompositePredicateEvaluator}s for it. Returns {@code null} if + * the filter cannot be solved by the star-tree. * * A predicate can be simple (d1 > 10) or composite (d1 > 10 AND d2 < 50) or multi levelled - * (d1 > 50 AND (d2 > 10 OR d2 < 35)). + * (d1 > 50 AND (d2 > 10 OR NOT d2 > 35)). * This method represents a list of CompositePredicates per dimension. For each dimension, all CompositePredicates in - * the list are implicitly ANDed together. Any OR predicates are nested within a CompositePredicate. + * the list are implicitly ANDed together. Any OR and NOT predicates are nested within a CompositePredicate. * * A map from predicates to their evaluators is passed in to accelerate the computation. */ @@ -102,21 +103,50 @@ public static Map> extractPredicateEva queue.addAll(filterNode.getChildren()); break; case OR: - Pair> pair = + Pair pair = isOrClauseValidForStarTree(indexSegment, filterNode, predicateEvaluatorMapping); if (pair == null) { return null; } - List predicateEvaluators = pair.getRight(); - // NOTE: Empty list means always true - if (!predicateEvaluators.isEmpty()) { - predicateEvaluatorsMap.computeIfAbsent(pair.getLeft(), k -> new ArrayList<>()) - .add(new CompositePredicateEvaluator(predicateEvaluators)); + // NOTE: Null identifier means always true + if (pair.getLeft() != null) { + predicateEvaluatorsMap.computeIfAbsent(pair.getLeft(), k -> new ArrayList<>()).add(pair.getRight()); } break; case NOT: - // TODO: Support NOT in star-tree - return null; + boolean negated = true; + FilterContext negatedChild = filterNode.getChildren().get(0); + while (true) { + FilterContext.Type type = negatedChild.getType(); + if (type == FilterContext.Type.PREDICATE) { + Predicate predicate = negatedChild.getPredicate(); + PredicateEvaluator predicateEvaluator = + getPredicateEvaluator(indexSegment, predicate, predicateEvaluatorMapping); + // Do not use star-tree when the predicate cannot be solved with star-tree + if (predicateEvaluator == null) { + return null; + } + // Do not use star-tree when the predicate is always false + if ((predicateEvaluator.isAlwaysTrue() && negated) || (predicateEvaluator.isAlwaysFalse() && !negated)) { + return null; + } + // Skip adding always true predicate + if ((predicateEvaluator.isAlwaysTrue() && !negated) || (predicateEvaluator.isAlwaysFalse() && negated)) { + break; + } + predicateEvaluatorsMap.computeIfAbsent(predicate.getLhs().getIdentifier(), k -> new ArrayList<>()) + .add(new CompositePredicateEvaluator(List.of(ObjectBooleanPair.of(predicateEvaluator, negated)))); + break; + } + if (type == FilterContext.Type.NOT) { + negated = !negated; + negatedChild = negatedChild.getChildren().get(0); + continue; + } + // Do not allow nested AND/OR under NOT + return null; + } + break; case PREDICATE: Predicate predicate = filterNode.getPredicate(); PredicateEvaluator predicateEvaluator = @@ -127,7 +157,7 @@ public static Map> extractPredicateEva } if (!predicateEvaluator.isAlwaysTrue()) { predicateEvaluatorsMap.computeIfAbsent(predicate.getLhs().getIdentifier(), k -> new ArrayList<>()) - .add(new CompositePredicateEvaluator(Collections.singletonList(predicateEvaluator))); + .add(new CompositePredicateEvaluator(List.of(ObjectBooleanPair.of(predicateEvaluator, false)))); } break; default: @@ -177,70 +207,91 @@ public static boolean isFitForStarTree(StarTreeV2Metadata starTreeV2Metadata, * StarTree supports OR predicates on a single dimension only (d1 < 10 OR d1 > 50). * * @return The pair of single identifier and predicate evaluators applied to it if true; {@code null} if the OR clause - * cannot be solved with star-tree; empty predicate evaluator list if the OR clause always evaluates to true. + * cannot be solved with star-tree; a pair of nulls if the OR clause always evaluates to true. */ @Nullable - private static Pair> isOrClauseValidForStarTree(IndexSegment indexSegment, + private static Pair isOrClauseValidForStarTree(IndexSegment indexSegment, FilterContext filter, List> predicateEvaluatorMapping) { assert filter.getType() == FilterContext.Type.OR; - List predicates = new ArrayList<>(); + List> predicates = new ArrayList<>(); if (!extractOrClausePredicates(filter, predicates)) { return null; } String identifier = null; - List predicateEvaluators = new ArrayList<>(); - for (Predicate predicate : predicates) { - PredicateEvaluator predicateEvaluator = getPredicateEvaluator(indexSegment, predicate, predicateEvaluatorMapping); + List> predicateEvaluators = new ArrayList<>(); + for (ObjectBooleanPair predicate : predicates) { + PredicateEvaluator predicateEvaluator = + getPredicateEvaluator(indexSegment, predicate.left(), predicateEvaluatorMapping); if (predicateEvaluator == null) { // The predicate cannot be solved with star-tree return null; } - if (predicateEvaluator.isAlwaysTrue()) { - // Use empty predicate evaluators to represent always true - return Pair.of(null, Collections.emptyList()); + boolean negated = predicate.rightBoolean(); + // Use a pair of null values to represent always true + if ((predicateEvaluator.isAlwaysTrue() && !negated) || (predicateEvaluator.isAlwaysFalse() && negated)) { + return Pair.of(null, null); } - if (!predicateEvaluator.isAlwaysFalse()) { - String predicateIdentifier = predicate.getLhs().getIdentifier(); - if (identifier == null) { - identifier = predicateIdentifier; - } else { - if (!identifier.equals(predicateIdentifier)) { - // The predicates are applied to multiple columns - return null; - } + // Skip the always false predicate + if ((predicateEvaluator.isAlwaysTrue() && negated) || (predicateEvaluator.isAlwaysFalse() && !negated)) { + continue; + } + String predicateIdentifier = predicate.left().getLhs().getIdentifier(); + if (identifier == null) { + identifier = predicateIdentifier; + } else { + if (!identifier.equals(predicateIdentifier)) { + // The predicates are applied to multiple columns + return null; } - predicateEvaluators.add(predicateEvaluator); } + predicateEvaluators.add(ObjectBooleanPair.of(predicateEvaluator, negated)); } // When all predicates are always false, do not use star-tree if (predicateEvaluators.isEmpty()) { return null; } - return Pair.of(identifier, predicateEvaluators); + return Pair.of(identifier, new CompositePredicateEvaluator(predicateEvaluators)); } /** * Extracts the predicates under the given OR clause, returns {@code false} if there is nested AND or NOT under OR * clause. - * TODO: Support NOT in star-tree */ - private static boolean extractOrClausePredicates(FilterContext filter, List predicates) { + private static boolean extractOrClausePredicates(FilterContext filter, + List> predicates) { assert filter.getType() == FilterContext.Type.OR; for (FilterContext child : filter.getChildren()) { switch (child.getType()) { case AND: - case NOT: return false; case OR: if (!extractOrClausePredicates(child, predicates)) { return false; } break; + case NOT: + boolean negated = true; + FilterContext negatedChild = child.getChildren().get(0); + while (true) { + FilterContext.Type type = negatedChild.getType(); + if (type == FilterContext.Type.PREDICATE) { + predicates.add(ObjectBooleanPair.of(negatedChild.getPredicate(), negated)); + break; + } + if (type == FilterContext.Type.NOT) { + negated = !negated; + negatedChild = negatedChild.getChildren().get(0); + continue; + } + // Do not allow nested AND/OR under NOT + return false; + } + break; case PREDICATE: - predicates.add(child.getPredicate()); + predicates.add(ObjectBooleanPair.of(child.getPredicate(), false)); break; default: throw new IllegalStateException(); diff --git a/pinot-core/src/main/java/org/apache/pinot/core/startree/operator/StarTreeFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/startree/operator/StarTreeFilterOperator.java index 583dad5e0ac8..107390fecd60 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/startree/operator/StarTreeFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/startree/operator/StarTreeFilterOperator.java @@ -18,9 +18,11 @@ */ package org.apache.pinot.core.startree.operator; +import it.unimi.dsi.fastutil.ints.IntImmutableList; import it.unimi.dsi.fastutil.ints.IntIterator; import it.unimi.dsi.fastutil.ints.IntOpenHashSet; import it.unimi.dsi.fastutil.ints.IntSet; +import it.unimi.dsi.fastutil.objects.ObjectBooleanPair; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Collections; @@ -175,19 +177,17 @@ private BaseFilterOperator getFilterOperator() { _predicateEvaluatorsMap.get(remainingPredicateColumn); DataSource dataSource = _starTreeV2.getDataSource(remainingPredicateColumn); for (CompositePredicateEvaluator compositePredicateEvaluator : compositePredicateEvaluators) { - List predicateEvaluators = compositePredicateEvaluator.getPredicateEvaluators(); + List> predicateEvaluators = + compositePredicateEvaluator.getPredicateEvaluators(); int numPredicateEvaluators = predicateEvaluators.size(); if (numPredicateEvaluators == 1) { // Single predicate evaluator - childFilterOperators.add( - FilterOperatorUtils.getLeafFilterOperator(_queryContext, predicateEvaluators.get(0), dataSource, - numDocs)); + childFilterOperators.add(getFilterOperator(predicateEvaluators.get(0), dataSource, numDocs)); } else { // Predicate evaluators conjoined with OR List orChildFilterOperators = new ArrayList<>(numPredicateEvaluators); - for (PredicateEvaluator childPredicateEvaluator : predicateEvaluators) { - orChildFilterOperators.add( - FilterOperatorUtils.getLeafFilterOperator(_queryContext, childPredicateEvaluator, dataSource, numDocs)); + for (ObjectBooleanPair childPredicateEvaluator : predicateEvaluators) { + orChildFilterOperators.add(getFilterOperator(childPredicateEvaluator, dataSource, numDocs)); } childFilterOperators.add( FilterOperatorUtils.getOrFilterOperator(_queryContext, orChildFilterOperators, numDocs)); @@ -198,6 +198,17 @@ private BaseFilterOperator getFilterOperator() { return FilterOperatorUtils.getAndFilterOperator(_queryContext, childFilterOperators, numDocs); } + private BaseFilterOperator getFilterOperator(ObjectBooleanPair predicateEvaluator, + DataSource dataSource, int numDocs) { + BaseFilterOperator leafFilterOperator = + FilterOperatorUtils.getLeafFilterOperator(_queryContext, predicateEvaluator.left(), dataSource, numDocs); + if (predicateEvaluator.rightBoolean()) { + return FilterOperatorUtils.getNotFilterOperator(_queryContext, leafFilterOperator, numDocs); + } else { + return leafFilterOperator; + } + } + /** * Helper method to traverse the star tree, get matching documents and keep track of all the predicate columns that * are not matched. Returns {@code null} if no matching dictionary id found for a column (i.e. the result for the @@ -386,24 +397,28 @@ public int compare(CompositePredicateEvaluator o1, CompositePredicateEvaluator o } int getPriority(CompositePredicateEvaluator compositePredicateEvaluator) { - List predicateEvaluators = compositePredicateEvaluator.getPredicateEvaluators(); + List> predicateEvaluators = + compositePredicateEvaluator.getPredicateEvaluators(); if (predicateEvaluators.size() == 1) { - switch (predicateEvaluators.get(0).getPredicateType()) { + ObjectBooleanPair predicateEvaluator = predicateEvaluators.get(0); + boolean negated = predicateEvaluator.rightBoolean(); + switch (predicateEvaluator.left().getPredicateType()) { case EQ: - return 1; + return negated ? 5 : 1; case IN: - return 2; + return negated ? 4 : 2; case RANGE: return 3; - case NOT_EQ: case NOT_IN: - return 4; + return negated ? 2 : 4; + case NOT_EQ: + return negated ? 1 : 5; default: - throw new UnsupportedOperationException(); + throw new IllegalStateException(); } } else { // Process OR at last - return 5; + return 6; } } }); @@ -433,12 +448,25 @@ int getPriority(CompositePredicateEvaluator compositePredicateEvaluator) { * Returns the matching dictionary ids for the given composite predicate evaluator. */ private IntSet getMatchingDictIds(CompositePredicateEvaluator compositePredicateEvaluator) { - IntSet matchingDictIds = new IntOpenHashSet(); - for (PredicateEvaluator predicateEvaluator : compositePredicateEvaluator.getPredicateEvaluators()) { - for (int matchingDictId : predicateEvaluator.getMatchingDictIds()) { - matchingDictIds.add(matchingDictId); + List> predicateEvaluators = + compositePredicateEvaluator.getPredicateEvaluators(); + if (predicateEvaluators.size() == 1) { + ObjectBooleanPair predicateEvaluator = predicateEvaluators.get(0); + if (predicateEvaluator.rightBoolean()) { + return new IntOpenHashSet(predicateEvaluator.left().getNonMatchingDictIds()); + } else { + return new IntOpenHashSet(predicateEvaluator.left().getMatchingDictIds()); } + } else { + IntSet matchingDictIds = new IntOpenHashSet(); + for (ObjectBooleanPair predicateEvaluator : predicateEvaluators) { + if (predicateEvaluator.rightBoolean()) { + matchingDictIds.addAll(new IntImmutableList(predicateEvaluator.left().getNonMatchingDictIds())); + } else { + matchingDictIds.addAll(new IntImmutableList(predicateEvaluator.left().getMatchingDictIds())); + } + } + return matchingDictIds; } - return matchingDictIds; } } diff --git a/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/BaseStarTreeV2Test.java b/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/BaseStarTreeV2Test.java index d4e6d3da4694..d323f6d55042 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/BaseStarTreeV2Test.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/BaseStarTreeV2Test.java @@ -103,20 +103,25 @@ abstract class BaseStarTreeV2Test { private static final String QUERY_FILTER_AND = " WHERE d1__COLUMN_NAME = 0 AND __d2 < 10"; // StarTree supports OR predicates only on a single dimension private static final String QUERY_FILTER_OR = " WHERE d1__COLUMN_NAME > 10 OR d1__COLUMN_NAME < 50"; + private static final String QUERY_FILTER_NOT = " WHERE NOT d1__COLUMN_NAME > 10"; + private static final String QUERY_FILTER_AND_NOT = " WHERE d1__COLUMN_NAME > 10 AND NOT __d2 < 10"; + private static final String QUERY_FILTER_OR_NOT = " WHERE d1__COLUMN_NAME > 50 OR NOT d1__COLUMN_NAME > 10"; + private static final String QUERY_NOT_NOT = " WHERE NOT NOT d1__COLUMN_NAME > 10"; private static final String QUERY_FILTER_COMPLEX_OR_MULTIPLE_DIMENSIONS = - " WHERE __d2 < 95 AND (d1__COLUMN_NAME > 10 OR d1__COLUMN_NAME < 50)"; + " WHERE __d2 < 95 AND (NOT d1__COLUMN_NAME > 10 OR d1__COLUMN_NAME > 50)"; private static final String QUERY_FILTER_COMPLEX_AND_MULTIPLE_DIMENSIONS_THREE_PREDICATES = - " WHERE __d2 < 95 AND __d2 > 25 AND (d1__COLUMN_NAME > 10 OR d1__COLUMN_NAME < 50)"; + " WHERE __d2 < 95 AND NOT __d2 < 25 AND (d1__COLUMN_NAME > 10 OR d1__COLUMN_NAME < 50)"; private static final String QUERY_FILTER_COMPLEX_OR_MULTIPLE_DIMENSIONS_THREE_PREDICATES = " WHERE (__d2 > 95 OR __d2 < 25) AND (d1__COLUMN_NAME > 10 OR d1__COLUMN_NAME < 50)"; private static final String QUERY_FILTER_COMPLEX_OR_SINGLE_DIMENSION = - " WHERE d1__COLUMN_NAME = 95 AND (d1__COLUMN_NAME > 90 OR d1__COLUMN_NAME < 100)"; + " WHERE NOT d1__COLUMN_NAME = 95 AND (d1__COLUMN_NAME > 90 OR d1__COLUMN_NAME < 100)"; // Unsupported filters private static final String QUERY_FILTER_OR_MULTIPLE_DIMENSIONS = " WHERE d1__COLUMN_NAME > 10 OR __d2 < 50"; private static final String QUERY_FILTER_OR_ON_AND = " WHERE (d1__COLUMN_NAME > 10 AND d1__COLUMN_NAME < 50) OR d1__COLUMN_NAME < 50"; - private static final String QUERY_FILTER_OR_ON_NOT = " WHERE (NOT d1__COLUMN_NAME > 10) OR d1__COLUMN_NAME < 50"; + private static final String QUERY_FILTER_NOT_ON_AND = " WHERE NOT (d1__COLUMN_NAME > 10 AND d1__COLUMN_NAME < 50)"; + private static final String QUERY_FILTER_NOT_ON_OR = " WHERE NOT (d1__COLUMN_NAME < 10 OR d1__COLUMN_NAME > 50)"; // Always false filters private static final String QUERY_FILTER_ALWAYS_FALSE = " WHERE d1__COLUMN_NAME > 100"; private static final String QUERY_FILTER_OR_ALWAYS_FALSE = " WHERE d1__COLUMN_NAME > 100 OR d1__COLUMN_NAME < 0"; @@ -199,7 +204,8 @@ public void testUnsupportedFilters() { String query = String.format("SELECT %s FROM %s", _aggregation, TABLE_NAME); testUnsupportedFilter(query + QUERY_FILTER_OR_MULTIPLE_DIMENSIONS); testUnsupportedFilter(query + QUERY_FILTER_OR_ON_AND); - testUnsupportedFilter(query + QUERY_FILTER_OR_ON_NOT); + testUnsupportedFilter(query + QUERY_FILTER_NOT_ON_AND); + testUnsupportedFilter(query + QUERY_FILTER_NOT_ON_OR); testUnsupportedFilter(query + QUERY_FILTER_ALWAYS_FALSE); testUnsupportedFilter(query + QUERY_FILTER_OR_ALWAYS_FALSE); } @@ -213,6 +219,10 @@ public void testQueries() testQuery(query); testQuery(query + QUERY_FILTER_AND); testQuery(query + QUERY_FILTER_OR); + testQuery(query + QUERY_FILTER_NOT); + testQuery(query + QUERY_FILTER_AND_NOT); + testQuery(query + QUERY_FILTER_OR_NOT); + testQuery(query + QUERY_NOT_NOT); testQuery(query + QUERY_FILTER_COMPLEX_OR_MULTIPLE_DIMENSIONS); testQuery(query + QUERY_FILTER_COMPLEX_AND_MULTIPLE_DIMENSIONS_THREE_PREDICATES); testQuery(query + QUERY_FILTER_COMPLEX_OR_MULTIPLE_DIMENSIONS_THREE_PREDICATES); diff --git a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkScanDocIdIterators.java b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkScanDocIdIterators.java index 718d99e9baf5..9669b82d91bb 100644 --- a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkScanDocIdIterators.java +++ b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkScanDocIdIterators.java @@ -179,21 +179,11 @@ public boolean applyMV(int[] values, int length) { return false; } - @Override - public int getNumMatchingDictIds() { - return 0; - } - @Override public int[] getMatchingDictIds() { return new int[0]; } - @Override - public int getNumNonMatchingDictIds() { - return 0; - } - @Override public int[] getNonMatchingDictIds() { return new int[0]; From 0be51ca9558580e5f03979a1b65972459d83aaf7 Mon Sep 17 00:00:00 2001 From: Xiang Fu Date: Sat, 27 Apr 2024 14:22:37 +0800 Subject: [PATCH 085/102] Allow apply both environment variables and system properties to user and table configs, Environment variables take precedence over system properties (#13011) --- .../common/metadata/ZKMetadataProvider.java | 4 +- .../apache/pinot/spi/config/ConfigUtils.java | 41 ++++++++++++------- .../pinot/spi/config/ConfigUtilsTest.java | 30 +++++++++++--- 3 files changed, 52 insertions(+), 23 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/metadata/ZKMetadataProvider.java b/pinot-common/src/main/java/org/apache/pinot/common/metadata/ZKMetadataProvider.java index d69d386a261f..0fdf94388a0b 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/metadata/ZKMetadataProvider.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/metadata/ZKMetadataProvider.java @@ -313,7 +313,7 @@ public static UserConfig getUserConfig(ZkHelixPropertyStore propertySt } try { UserConfig userConfig = AccessControlUserConfigUtils.fromZNRecord(znRecord); - return ConfigUtils.applyConfigWithEnvVariables(userConfig); + return ConfigUtils.applyConfigWithEnvVariablesAndSystemProperties(userConfig); } catch (Exception e) { LOGGER.error("Caught exception while getting user configuration for user: {}", username, e); return null; @@ -422,7 +422,7 @@ private static TableConfig toTableConfig(@Nullable ZNRecord znRecord) { } try { TableConfig tableConfig = TableConfigUtils.fromZNRecord(znRecord); - return ConfigUtils.applyConfigWithEnvVariables(tableConfig); + return ConfigUtils.applyConfigWithEnvVariablesAndSystemProperties(tableConfig); } catch (Exception e) { LOGGER.error("Caught exception while creating table config from ZNRecord: {}", znRecord.getId(), e); return null; diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/config/ConfigUtils.java b/pinot-spi/src/main/java/org/apache/pinot/spi/config/ConfigUtils.java index 5bd637411bbd..289eef7db00a 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/config/ConfigUtils.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/config/ConfigUtils.java @@ -23,6 +23,7 @@ import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.JsonNodeType; import java.io.IOException; +import java.util.HashMap; import java.util.Iterator; import java.util.Map; import org.apache.pinot.spi.utils.JsonUtils; @@ -35,37 +36,47 @@ private ConfigUtils() { private static final Map ENVIRONMENT_VARIABLES = System.getenv(); /** - * Apply environment variables to any given BaseJsonConfig. + * Apply system properties and environment variables to any given BaseJsonConfig. + * Environment variables take precedence over system properties. + * Since the System properties are mutable, this method will read it at runtime. * - * @return Config with environment variable applied. + * @return Config with both system properties and environment variables applied. */ - public static T applyConfigWithEnvVariables(T config) { - return applyConfigWithEnvVariables(ENVIRONMENT_VARIABLES, config); + public static T applyConfigWithEnvVariablesAndSystemProperties(T config) { + Map combinedMap = new HashMap<>(); + // Add all system properties to the map + System.getProperties().forEach((key, value) -> combinedMap.put(String.valueOf(key), String.valueOf(value))); + // Add all environment variables to the map, potentially overwriting system properties + combinedMap.putAll(ENVIRONMENT_VARIABLES); + return applyConfigWithEnvVariablesAndSystemProperties(combinedMap, config); } /** - * Apply environment variables to any given BaseJsonConfig. + * Apply a map of config to any given BaseJsonConfig with templates. * - * @return Config with environment variable applied. + * @return Config with the configs applied. */ - public static T applyConfigWithEnvVariables(Map environment, T config) { + public static T applyConfigWithEnvVariablesAndSystemProperties( + Map configValues, T configTemplate) { JsonNode jsonNode; try { - jsonNode = applyConfigWithEnvVariables(environment, config.toJsonNode()); + jsonNode = applyConfigWithEnvVariablesAndSystemProperties(configValues, configTemplate.toJsonNode()); } catch (RuntimeException e) { throw new RuntimeException(String - .format("Unable to apply environment variables on json config class [%s].", config.getClass().getName()), e); + .format("Unable to apply environment variables on json config class [%s].", + configTemplate.getClass().getName()), e); } try { - return (T) JsonUtils.jsonNodeToObject(jsonNode, config.getClass()); + return (T) JsonUtils.jsonNodeToObject(jsonNode, configTemplate.getClass()); } catch (IOException e) { throw new RuntimeException(String .format("Unable to read JsonConfig to class [%s] after applying environment variables, jsonConfig is: '%s'.", - config.getClass().getName(), jsonNode.toString()), e); + configTemplate.getClass().getName(), jsonNode.toString()), e); } } - private static JsonNode applyConfigWithEnvVariables(Map environment, JsonNode jsonNode) { + private static JsonNode applyConfigWithEnvVariablesAndSystemProperties(Map configValues, + JsonNode jsonNode) { final JsonNodeType nodeType = jsonNode.getNodeType(); switch (nodeType) { case OBJECT: @@ -73,7 +84,7 @@ private static JsonNode applyConfigWithEnvVariables(Map environm Iterator> iterator = jsonNode.fields(); while (iterator.hasNext()) { final Map.Entry next = iterator.next(); - next.setValue(applyConfigWithEnvVariables(environment, next.getValue())); + next.setValue(applyConfigWithEnvVariablesAndSystemProperties(configValues, next.getValue())); } } break; @@ -82,7 +93,7 @@ private static JsonNode applyConfigWithEnvVariables(Map environm ArrayNode arrayNode = (ArrayNode) jsonNode; for (int i = 0; i < arrayNode.size(); i++) { JsonNode arrayElement = arrayNode.get(i); - arrayNode.set(i, applyConfigWithEnvVariables(environment, arrayElement)); + arrayNode.set(i, applyConfigWithEnvVariablesAndSystemProperties(configValues, arrayElement)); } } break; @@ -91,7 +102,7 @@ private static JsonNode applyConfigWithEnvVariables(Map environm if (field.startsWith("${") && field.endsWith("}")) { String[] envVarSplits = field.substring(2, field.length() - 1).split(":", 2); String envVarKey = envVarSplits[0]; - String value = environment.get(envVarKey); + String value = configValues.get(envVarKey); if (value != null) { return JsonNodeFactory.instance.textNode(value); } else if (envVarSplits.length > 1) { diff --git a/pinot-spi/src/test/java/org/apache/pinot/spi/config/ConfigUtilsTest.java b/pinot-spi/src/test/java/org/apache/pinot/spi/config/ConfigUtilsTest.java index 07028659faf1..296bd00d2774 100644 --- a/pinot-spi/src/test/java/org/apache/pinot/spi/config/ConfigUtilsTest.java +++ b/pinot-spi/src/test/java/org/apache/pinot/spi/config/ConfigUtilsTest.java @@ -39,6 +39,25 @@ public class ConfigUtilsTest { @Test public void testIndexing() { + Map environment = + ImmutableMap.of("LOAD_MODE", "MMAP", "AWS_ACCESS_KEY", "default_aws_access_key", "AWS_SECRET_KEY", + "default_aws_secret_key"); + testIndexingWithConfig(environment); + } + + @Test + public void testIndexingWithSystemProperties() { + // Use default System properties + System.setProperty("LOAD_MODE", "MMAP"); + System.setProperty("AWS_ACCESS_KEY", "default_aws_access_key"); + System.setProperty("AWS_SECRET_KEY", "default_aws_secret_key"); + testIndexingWithConfig(null); + System.clearProperty("LOAD_MODE"); + System.clearProperty("AWS_ACCESS_KEY"); + System.clearProperty("AWS_SECRET_KEY"); + } + + private void testIndexingWithConfig(Map configOverride) { IndexingConfig indexingConfig = new IndexingConfig(); indexingConfig.setLoadMode("${LOAD_MODE}"); indexingConfig.setAggregateMetrics(true); @@ -80,12 +99,11 @@ public void testIndexing() { streamConfigMap.put(StreamConfigProperties.constructStreamProperty(streamType, "aws.secretKey"), "${AWS_SECRET_KEY}"); indexingConfig.setStreamConfigs(streamConfigMap); - - Map environment = - ImmutableMap.of("LOAD_MODE", "MMAP", "AWS_ACCESS_KEY", "default_aws_access_key", "AWS_SECRET_KEY", - "default_aws_secret_key"); - - indexingConfig = ConfigUtils.applyConfigWithEnvVariables(environment, indexingConfig); + if (configOverride != null) { + indexingConfig = ConfigUtils.applyConfigWithEnvVariablesAndSystemProperties(configOverride, indexingConfig); + } else { + indexingConfig = ConfigUtils.applyConfigWithEnvVariablesAndSystemProperties(indexingConfig); + } assertEquals(indexingConfig.getLoadMode(), "MMAP"); assertTrue(indexingConfig.isAggregateMetrics()); assertEquals(indexingConfig.getInvertedIndexColumns(), invertedIndexColumns); From fc967d0d15ff0296b58c52d28d472b95569ffd37 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Apr 2024 11:03:13 -0700 Subject: [PATCH 086/102] Bump org.testng:testng from 7.10.1 to 7.10.2 (#13021) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 8a9e80ae6e45..a3128a7db4d8 100644 --- a/pom.xml +++ b/pom.xml @@ -175,7 +175,7 @@ 2.25.39 2.12.7 3.1.12 - 7.10.1 + 7.10.2 6.6.2 8.3.4 From 2a7f3205935db8d637c2512652ebe97eebf74f9d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Apr 2024 11:04:42 -0700 Subject: [PATCH 087/102] Bump aws.sdk.version from 2.25.39 to 2.25.40 (#13022) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index a3128a7db4d8..0a3a2940b4fb 100644 --- a/pom.xml +++ b/pom.xml @@ -172,7 +172,7 @@ 0.15.0 0.4.4 4.2.2 - 2.25.39 + 2.25.40 2.12.7 3.1.12 7.10.2 From e2cadfabf11ea2effdc0b45cdec96c4d86819676 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Apr 2024 11:05:04 -0700 Subject: [PATCH 088/102] Bump com.google.errorprone:error_prone_annotations from 2.26.1 to 2.27.0 (#13023) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 0a3a2940b4fb..6de9cfc63a94 100644 --- a/pom.xml +++ b/pom.xml @@ -884,7 +884,7 @@ com.google.errorprone error_prone_annotations - 2.26.1 + 2.27.0 From bbf63c7d72c6aa0aaf79e282d8e373f26a0a5bfa Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Apr 2024 11:05:25 -0700 Subject: [PATCH 089/102] Bump org.apache.datasketches:datasketches-java from 5.0.2 to 6.0.0 (#13024) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 6de9cfc63a94..b0e6fa2cb885 100644 --- a/pom.xml +++ b/pom.xml @@ -1223,7 +1223,7 @@ org.apache.datasketches datasketches-java - 5.0.2 + 6.0.0 com.dynatrace.hash4j From bdfb34a4b82f2c700dfd476c7734bb6cb076ec18 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Apr 2024 11:05:42 -0700 Subject: [PATCH 090/102] Bump commons-codec:commons-codec from 1.16.1 to 1.17.0 (#13025) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index b0e6fa2cb885..656d28d4baef 100644 --- a/pom.xml +++ b/pom.xml @@ -192,7 +192,7 @@ 1.10.0 2.10.1 2.16.1 - 1.16.1 + 1.17.0 1.7.0 3.10.0 1.8.0 From 14651a2fef171141848f603cc1997972cabd7f66 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Apr 2024 11:13:04 -0700 Subject: [PATCH 091/102] Bump com.puppycrawl.tools:checkstyle from 10.15.0 to 10.16.0 (#13027) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 656d28d4baef..e362a240fced 100644 --- a/pom.xml +++ b/pom.xml @@ -1942,7 +1942,7 @@ com.puppycrawl.tools checkstyle - 10.15.0 + 10.16.0 From 7b06b9f5c3b945c585eba0fa3a7118845e58802e Mon Sep 17 00:00:00 2001 From: Aditya Mahajan Date: Tue, 30 Apr 2024 01:54:15 +0530 Subject: [PATCH 092/102] Issue #12367 (#12922) --- .../function/CaseTransformFunctionTest.java | 31 ++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/CaseTransformFunctionTest.java b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/CaseTransformFunctionTest.java index 79a415e5a92f..315b53e9e08d 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/CaseTransformFunctionTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/CaseTransformFunctionTest.java @@ -36,6 +36,7 @@ import org.testng.annotations.Test; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotEquals; public class CaseTransformFunctionTest extends BaseTransformFunctionTest { @@ -106,7 +107,8 @@ public void testCaseTransformFunctionWithIntResults() { testCaseQueries(String.format("%s(%s, %s)", functionType.getName(), LONG_SV_COLUMN, String.format("%d", _longSVValues[INDEX_TO_COMPARE])), getPredicateResults(LONG_SV_COLUMN, functionType)); testCaseQueries(String.format("%s(%s, %s)", functionType.getName(), FLOAT_SV_COLUMN, - String.format("%f", _floatSVValues[INDEX_TO_COMPARE])), getPredicateResults(FLOAT_SV_COLUMN, functionType)); + "CAST(" + String.format("%f", _floatSVValues[INDEX_TO_COMPARE]) + " AS FLOAT)"), + getPredicateResults(FLOAT_SV_COLUMN, functionType)); testCaseQueries(String.format("%s(%s, %s)", functionType.getName(), DOUBLE_SV_COLUMN, String.format("%.20f", _doubleSVValues[INDEX_TO_COMPARE])), getPredicateResults(DOUBLE_SV_COLUMN, functionType)); @@ -116,6 +118,33 @@ public void testCaseTransformFunctionWithIntResults() { } } + @Test + public void testCaseTransformFunctionWithoutCastForFloatValues() { + boolean[] predicateResults = new boolean[1]; + Arrays.fill(predicateResults, true); + int[] expectedValues = new int[1]; + int index = -1; + for (int i = 0; i < NUM_ROWS; i++) { + if (Double.compare(_floatSVValues[i], Double.parseDouble(String.format("%f", _floatSVValues[i]))) != 0) { + index = i; + expectedValues[0] = predicateResults[0] ? _intSVValues[i] : 10; + break; + } + } + + if (index != -1) { + String predicate = String.format("%s(%s, %s)", TransformFunctionType.EQUALS, FLOAT_SV_COLUMN, + String.format("%f", _floatSVValues[index])); + String expression = String.format("CASE WHEN %s THEN %s ELSE 10 END", predicate, INT_SV_COLUMN); + ExpressionContext expressionContext = RequestContextUtils.getExpression(expression); + TransformFunction transformFunction = TransformFunctionFactory.get(expressionContext, _dataSourceMap); + Assert.assertTrue(transformFunction instanceof CaseTransformFunction); + assertEquals(transformFunction.getResultMetadata().getDataType(), DataType.INT); + int[] intValues = transformFunction.transformToIntValuesSV(_projectionBlock); + assertNotEquals(intValues[index], expectedValues[0]); + } + } + @DataProvider public static String[] illegalExpressions() { //@formatter:off From 475708f14f3f27a3a8b03ae710d84e2cb70eda74 Mon Sep 17 00:00:00 2001 From: Yash Mayya Date: Wed, 1 May 2024 00:31:22 +0530 Subject: [PATCH 093/102] Use try-with-resources to close file walk stream in LocalPinotFS (#13029) --- .../apache/pinot/spi/filesystem/LocalPinotFS.java | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/filesystem/LocalPinotFS.java b/pinot-spi/src/main/java/org/apache/pinot/spi/filesystem/LocalPinotFS.java index 5eae4d92671f..7fd8ca5906ea 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/filesystem/LocalPinotFS.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/filesystem/LocalPinotFS.java @@ -32,6 +32,7 @@ import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; +import java.util.stream.Stream; import org.apache.commons.io.FileUtils; import org.apache.pinot.spi.env.PinotConfiguration; @@ -112,8 +113,9 @@ public String[] listFiles(URI fileUri, boolean recursive) if (!recursive) { return Arrays.stream(file.list()).map(s -> new File(file, s)).map(File::getAbsolutePath).toArray(String[]::new); } else { - return Files.walk(Paths.get(fileUri)). - filter(s -> !s.equals(file.toPath())).map(Path::toString).toArray(String[]::new); + try (Stream pathStream = Files.walk(Paths.get(fileUri))) { + return pathStream.filter(s -> !s.equals(file.toPath())).map(Path::toString).toArray(String[]::new); + } } } @@ -124,8 +126,10 @@ public List listFilesWithMetadata(URI fileUri, boolean recursive) if (!recursive) { return Arrays.stream(file.list()).map(s -> getFileMetadata(new File(file, s))).collect(Collectors.toList()); } else { - return Files.walk(Paths.get(fileUri)).filter(s -> !s.equals(file.toPath())).map(p -> getFileMetadata(p.toFile())) - .collect(Collectors.toList()); + try (Stream pathStream = Files.walk(Paths.get(fileUri))) { + return pathStream.filter(s -> !s.equals(file.toPath())).map(p -> getFileMetadata(p.toFile())) + .collect(Collectors.toList()); + } } } From 7413e993ee784e34dcc00942cb24f594333d206e Mon Sep 17 00:00:00 2001 From: "Xiaotian (Jackie) Jiang" <17555551+Jackie-Jiang@users.noreply.github.com> Date: Tue, 30 Apr 2024 12:29:03 -0700 Subject: [PATCH 094/102] Upgrade s3mock to 2.17.0 (#13028) --- pinot-plugins/pinot-file-system/pinot-s3/pom.xml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pinot-plugins/pinot-file-system/pinot-s3/pom.xml b/pinot-plugins/pinot-file-system/pinot-s3/pom.xml index 0f4d2eea78f0..d94330574fa8 100644 --- a/pinot-plugins/pinot-file-system/pinot-s3/pom.xml +++ b/pinot-plugins/pinot-file-system/pinot-s3/pom.xml @@ -35,15 +35,10 @@ https://pinot.apache.org ${basedir}/../../.. - 2.12.2 + 2.17.0 - - org.apache.pinot - pinot-spi - - software.amazon.awssdk s3 From ea0c71b3c88e800ba2fb610826a85e188d896150 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 30 Apr 2024 12:37:53 -0700 Subject: [PATCH 095/102] Bump org.scala-lang:scala-library from 2.11.11 to 2.11.12 and from 2.12.18 to 2.12.19 (#13034) --- .../pinot-batch-ingestion-spark-2.4/pom.xml | 2 +- pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-2.4/pom.xml b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-2.4/pom.xml index 748d6de20c34..bf0d055afa3e 100644 --- a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-2.4/pom.xml +++ b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-2.4/pom.xml @@ -38,7 +38,7 @@ package 2.11 2.4.6 - 2.11.11 + 2.11.12 diff --git a/pom.xml b/pom.xml index e362a240fced..9683078cbf3d 100644 --- a/pom.xml +++ b/pom.xml @@ -237,7 +237,7 @@ 1.61.1 - 2.12.18 + 2.12.19 2.12 From f1530112ec7d5a07d564637e425dc611f557a93c Mon Sep 17 00:00:00 2001 From: Abhishek Sharma Date: Tue, 30 Apr 2024 17:16:29 -0400 Subject: [PATCH 096/102] Upgrade jna to version 5.14.0 for Mac M1/M2 local execution support (#13018) --- LICENSE-binary | 4 ++-- pinot-plugins/pinot-file-system/pinot-adls/pom.xml | 10 ---------- pom.xml | 8 +++++++- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index 5944cc2bba50..0303ffe9c16b 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -371,8 +371,8 @@ jakarta.validation:jakarta.validation-api:2.0.2 javax.inject:javax.inject:1 javax.validation:validation-api:2.0.1.Final joda-time:joda-time:2.12.5 -net.java.dev.jna:jna-platform:5.6.0 -net.java.dev.jna:jna:5.5.0 +net.java.dev.jna:jna-platform:5.14.0 +net.java.dev.jna:jna:5.14.0 net.minidev:accessors-smart:2.5.0 net.minidev:json-smart:2.5.0 net.openhft:chronicle-analytics:2.24ea0 diff --git a/pinot-plugins/pinot-file-system/pinot-adls/pom.xml b/pinot-plugins/pinot-file-system/pinot-adls/pom.xml index f2f31cb65f6a..33767b2f63f1 100644 --- a/pinot-plugins/pinot-file-system/pinot-adls/pom.xml +++ b/pinot-plugins/pinot-file-system/pinot-adls/pom.xml @@ -64,16 +64,6 @@ reactor-core 3.6.5 - - net.java.dev.jna - jna-platform - 5.14.0 - - - net.java.dev.jna - jna - 5.6.0 - com.microsoft.azure msal4j diff --git a/pom.xml b/pom.xml index 9683078cbf3d..e3d1f26a9bfc 100644 --- a/pom.xml +++ b/pom.xml @@ -251,6 +251,7 @@ 9.37.3 1.78 0.26 + 5.14.0 @@ -1442,10 +1443,15 @@ asm 9.7 + + net.java.dev.jna + jna-platform + ${jna.version} + net.java.dev.jna jna - 5.5.0 + ${jna.version} From 087fca37d03bf18930ecaad55b2955ef5d20ecac Mon Sep 17 00:00:00 2001 From: "Xiaotian (Jackie) Jiang" <17555551+Jackie-Jiang@users.noreply.github.com> Date: Tue, 30 Apr 2024 14:17:08 -0700 Subject: [PATCH 097/102] Ensure all the lists used in PinotQuery are ArrayList (#13017) --- .../BaseBrokerRequestHandler.java | 20 +- .../common/utils/request/RequestUtils.java | 39 ++ .../pinot/sql/parsers/CalciteSqlParser.java | 83 +-- .../sql/parsers/rewriter/ClpRewriter.java | 77 +-- .../parsers/rewriter/ExprMinMaxRewriter.java | 4 +- ...egationGroupByToDistinctQueryRewriter.java | 11 +- .../sql/parsers/rewriter/OrdinalsUpdater.java | 8 +- .../rewriter/PredicateComparisonRewriter.java | 20 +- .../sql/parsers/CalciteSqlCompilerTest.java | 654 +++++++++--------- .../filter/MergeEqInFilterOptimizer.java | 9 +- .../filter/MergeRangeFilterOptimizer.java | 7 +- .../filter/TextMatchFilterOptimizer.java | 22 +- .../filter/TimePredicateFilterOptimizer.java | 19 +- .../maker/QueryOverrideWithHintsTest.java | 12 +- .../query/optimizer/QueryOptimizerTest.java | 12 +- .../plan/server/ServerPlanRequestUtils.java | 48 +- 16 files changed, 505 insertions(+), 540 deletions(-) diff --git a/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java b/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java index b2d4e24d3f32..2fdc36e1ea3f 100644 --- a/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java +++ b/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java @@ -24,7 +24,6 @@ import com.google.common.collect.ImmutableMap; import java.net.URI; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.List; @@ -1361,8 +1360,7 @@ private static void handleApproximateFunctionOverride(Expression expression) { try { int percentile = Integer.parseInt(remainingFunctionName); function.setOperator("percentilesmarttdigest"); - function.setOperands( - Arrays.asList(function.getOperands().get(0), RequestUtils.getLiteralExpression(percentile))); + function.addToOperands(RequestUtils.getLiteralExpression(percentile)); } catch (Exception e) { throw new BadQueryRequestException("Illegal function name: " + functionName); } @@ -1370,8 +1368,7 @@ private static void handleApproximateFunctionOverride(Expression expression) { try { int percentile = Integer.parseInt(remainingFunctionName.substring(0, remainingFunctionName.length() - 2)); function.setOperator("percentilesmarttdigest"); - function.setOperands( - Arrays.asList(function.getOperands().get(0), RequestUtils.getLiteralExpression(percentile))); + function.addToOperands(RequestUtils.getLiteralExpression(percentile)); } catch (Exception e) { throw new BadQueryRequestException("Illegal function name: " + functionName); } @@ -1849,18 +1846,17 @@ static void validateRequest(PinotQuery pinotQuery, int queryResponseLimit) { */ private static void attachTimeBoundary(PinotQuery pinotQuery, TimeBoundaryInfo timeBoundaryInfo, boolean isOfflineRequest) { + String functionName = isOfflineRequest ? FilterKind.LESS_THAN_OR_EQUAL.name() : FilterKind.GREATER_THAN.name(); String timeColumn = timeBoundaryInfo.getTimeColumn(); String timeValue = timeBoundaryInfo.getTimeValue(); - Expression timeFilterExpression = RequestUtils.getFunctionExpression( - isOfflineRequest ? FilterKind.LESS_THAN_OR_EQUAL.name() : FilterKind.GREATER_THAN.name()); - timeFilterExpression.getFunctionCall().setOperands( - Arrays.asList(RequestUtils.getIdentifierExpression(timeColumn), RequestUtils.getLiteralExpression(timeValue))); + Expression timeFilterExpression = + RequestUtils.getFunctionExpression(functionName, RequestUtils.getIdentifierExpression(timeColumn), + RequestUtils.getLiteralExpression(timeValue)); Expression filterExpression = pinotQuery.getFilterExpression(); if (filterExpression != null) { - Expression andFilterExpression = RequestUtils.getFunctionExpression(FilterKind.AND.name()); - andFilterExpression.getFunctionCall().setOperands(Arrays.asList(filterExpression, timeFilterExpression)); - pinotQuery.setFilterExpression(andFilterExpression); + pinotQuery.setFilterExpression( + RequestUtils.getFunctionExpression(FilterKind.AND.name(), filterExpression, timeFilterExpression)); } else { pinotQuery.setFilterExpression(timeFilterExpression); } diff --git a/pinot-common/src/main/java/org/apache/pinot/common/utils/request/RequestUtils.java b/pinot-common/src/main/java/org/apache/pinot/common/utils/request/RequestUtils.java index 42abb0b80dd9..f6818371a059 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/utils/request/RequestUtils.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/utils/request/RequestUtils.java @@ -24,8 +24,10 @@ import com.google.common.base.Splitter; import com.google.common.collect.ImmutableSet; import java.math.BigDecimal; +import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -301,6 +303,43 @@ public static Expression getLiteralExpression(Object object) { return RequestUtils.getLiteralExpression(object.toString()); } + public static Function getFunction(String canonicalName, List operands) { + Function function = new Function(canonicalName); + function.setOperands(operands); + return function; + } + + public static Function getFunction(String canonicalName, Expression operand) { + // NOTE: Create an ArrayList because we might need to modify the list later + List operands = new ArrayList<>(1); + operands.add(operand); + return getFunction(canonicalName, operands); + } + + public static Function getFunction(String canonicalName, Expression... operands) { + // NOTE: Create an ArrayList because we might need to modify the list later + return getFunction(canonicalName, new ArrayList<>(Arrays.asList(operands))); + } + + public static Expression getFunctionExpression(Function function) { + Expression expression = new Expression(ExpressionType.FUNCTION); + expression.setFunctionCall(function); + return expression; + } + + public static Expression getFunctionExpression(String canonicalName, List operands) { + return getFunctionExpression(getFunction(canonicalName, operands)); + } + + public static Expression getFunctionExpression(String canonicalName, Expression operand) { + return getFunctionExpression(getFunction(canonicalName, operand)); + } + + public static Expression getFunctionExpression(String canonicalName, Expression... operands) { + return getFunctionExpression(getFunction(canonicalName, operands)); + } + + @Deprecated public static Expression getFunctionExpression(String canonicalName) { assert canonicalName.equalsIgnoreCase(canonicalizeFunctionNamePreservingSpecialKey(canonicalName)); Expression expression = new Expression(ExpressionType.FUNCTION); diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java index 3232cfad2cc9..0787aacf9fe8 100644 --- a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java +++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java @@ -23,10 +23,8 @@ import com.google.common.collect.ImmutableSet; import java.io.StringReader; import java.util.ArrayList; -import java.util.Collections; import java.util.HashMap; import java.util.HashSet; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; @@ -379,8 +377,7 @@ public static Set extractIdentifiers(List expressions, boole Function function = expression.getFunctionCall(); if (function != null) { if (excludeAs && function.getOperator().equals("as")) { - identifiers.addAll( - extractIdentifiers(new ArrayList<>(Collections.singletonList(function.getOperands().get(0))), true)); + identifiers.addAll(extractIdentifiers(List.of(function.getOperands().get(0)), true)); } else { identifiers.addAll(extractIdentifiers(function.getOperands(), excludeAs)); } @@ -589,25 +586,22 @@ private static Map extractOptionsMap(List optionsStateme } private static List convertDistinctSelectList(SqlNodeList selectList) { - List selectExpr = new ArrayList<>(); + // NOTE: Create an ArrayList because we might need to modify the list later + List selectExpr = new ArrayList<>(1); selectExpr.add(convertDistinctAndSelectListToFunctionExpression(selectList)); return selectExpr; } private static List convertSelectList(SqlNodeList selectList) { - List selectExpr = new ArrayList<>(); - - final Iterator iterator = selectList.iterator(); - while (iterator.hasNext()) { - final SqlNode next = iterator.next(); - selectExpr.add(toExpression(next)); + List selectExpr = new ArrayList<>(selectList.size()); + for (SqlNode sqlNode : selectList) { + selectExpr.add(toExpression(sqlNode)); } - return selectExpr; } private static List convertOrderByList(SqlNodeList orderList) { - List orderByExpr = new ArrayList<>(); + List orderByExpr = new ArrayList<>(orderList.size()); for (SqlNode sqlNode : orderList) { orderByExpr.add(convertOrderBy(sqlNode, true)); } @@ -621,19 +615,17 @@ private static Expression convertOrderBy(SqlNode node, boolean createAscExpressi Expression expression; if (node.getKind() == SqlKind.NULLS_LAST) { SqlBasicCall basicCall = (SqlBasicCall) node; - expression = RequestUtils.getFunctionExpression(NULLS_LAST); - expression.getFunctionCall().addToOperands(convertOrderBy(basicCall.getOperandList().get(0), true)); + expression = + RequestUtils.getFunctionExpression(NULLS_LAST, convertOrderBy(basicCall.getOperandList().get(0), true)); } else if (node.getKind() == SqlKind.NULLS_FIRST) { SqlBasicCall basicCall = (SqlBasicCall) node; - expression = RequestUtils.getFunctionExpression(NULLS_FIRST); - expression.getFunctionCall().addToOperands(convertOrderBy(basicCall.getOperandList().get(0), true)); + expression = + RequestUtils.getFunctionExpression(NULLS_FIRST, convertOrderBy(basicCall.getOperandList().get(0), true)); } else if (node.getKind() == SqlKind.DESCENDING) { SqlBasicCall basicCall = (SqlBasicCall) node; - expression = RequestUtils.getFunctionExpression(DESC); - expression.getFunctionCall().addToOperands(convertOrderBy(basicCall.getOperandList().get(0), false)); + expression = RequestUtils.getFunctionExpression(DESC, convertOrderBy(basicCall.getOperandList().get(0), false)); } else if (createAscExpression) { - expression = RequestUtils.getFunctionExpression(ASC); - expression.getFunctionCall().addToOperands(toExpression(node)); + expression = RequestUtils.getFunctionExpression(ASC, toExpression(node)); } else { return toExpression(node); } @@ -648,7 +640,7 @@ private static Expression convertOrderBy(SqlNode node, boolean createAscExpressi * @return DISTINCT function expression */ private static Expression convertDistinctAndSelectListToFunctionExpression(SqlNodeList selectList) { - Expression functionExpression = RequestUtils.getFunctionExpression("distinct"); + List operands = new ArrayList<>(selectList.size()); for (SqlNode node : selectList) { Expression columnExpression = toExpression(node); if (columnExpression.getType() == ExpressionType.IDENTIFIER && columnExpression.getIdentifier().getName() @@ -662,9 +654,9 @@ private static Expression convertDistinctAndSelectListToFunctionExpression(SqlNo "Syntax error: Use of DISTINCT with aggregation functions is not supported"); } } - functionExpression.getFunctionCall().addToOperands(columnExpression); + operands.add(columnExpression); } - return functionExpression; + return RequestUtils.getFunctionExpression("distinct", operands); } private static Expression toExpression(SqlNode node) { @@ -705,10 +697,7 @@ private static Expression toExpression(SqlNode node) { return leftExpr; } } - Expression asFuncExpr = RequestUtils.getFunctionExpression("as"); - asFuncExpr.getFunctionCall().addToOperands(leftExpr); - asFuncExpr.getFunctionCall().addToOperands(rightExpr); - return asFuncExpr; + return RequestUtils.getFunctionExpression("as", leftExpr, rightExpr); case CASE: // CASE WHEN Statement is model as a function with variable length parameters. // Assume N is number of WHEN Statements, total number of parameters is (2 * N + 1). @@ -717,26 +706,22 @@ private static Expression toExpression(SqlNode node) { // - 1: Convert ELSE Statement into an Expression. SqlCase caseSqlNode = (SqlCase) node; SqlNodeList whenOperands = caseSqlNode.getWhenOperands(); + int numWhenOperands = whenOperands.size(); SqlNodeList thenOperands = caseSqlNode.getThenOperands(); + Preconditions.checkState(numWhenOperands == thenOperands.size()); SqlNode elseOperand = caseSqlNode.getElseOperand(); - Expression caseFuncExpr = RequestUtils.getFunctionExpression("case"); - Preconditions.checkState(whenOperands.size() == thenOperands.size()); - for (int i = 0; i < whenOperands.size(); i++) { - SqlNode whenSqlNode = whenOperands.get(i); - Expression whenExpression = toExpression(whenSqlNode); - caseFuncExpr.getFunctionCall().addToOperands(whenExpression); - - SqlNode thenSqlNode = thenOperands.get(i); - Expression thenExpression = toExpression(thenSqlNode); - caseFuncExpr.getFunctionCall().addToOperands(thenExpression); + List caseOperands = new ArrayList<>(2 * numWhenOperands + 1); + for (int i = 0; i < numWhenOperands; i++) { + caseOperands.add(toExpression(whenOperands.get(i))); + caseOperands.add(toExpression(thenOperands.get(i))); } Expression elseExpression = toExpression(elseOperand); if (isAggregateExpression(elseExpression)) { throw new SqlCompilationException( "Aggregation functions inside ELSE Clause is not supported - " + elseExpression); } - caseFuncExpr.getFunctionCall().addToOperands(elseExpression); - return caseFuncExpr; + caseOperands.add(elseExpression); + return RequestUtils.getFunctionExpression("case", caseOperands); default: if (node instanceof SqlDataTypeSpec) { // This is to handle expression like: CAST(col AS INT) @@ -808,15 +793,9 @@ private static Expression compileFunctionExpression(SqlBasicCall functionNode) { } } ParserUtils.validateFunction(canonicalName, operands); - Expression functionExpression = RequestUtils.getFunctionExpression(canonicalName); - functionExpression.getFunctionCall().setOperands(operands); + Expression functionExpression = RequestUtils.getFunctionExpression(canonicalName, operands); if (negated) { - Expression negatedFunctionExpression = RequestUtils.getFunctionExpression(FilterKind.NOT.name()); - // Do not use `Collections.singletonList()` because we might modify the operand later - List negatedFunctionOperands = new ArrayList<>(1); - negatedFunctionOperands.add(functionExpression); - negatedFunctionExpression.getFunctionCall().setOperands(negatedFunctionOperands); - return negatedFunctionExpression; + return RequestUtils.getFunctionExpression(FilterKind.NOT.name(), functionExpression); } else { return functionExpression; } @@ -886,9 +865,7 @@ private static Expression compileAndExpression(SqlBasicCall andNode) { operands.add(toExpression(childNode)); } } - Expression andExpression = RequestUtils.getFunctionExpression(FilterKind.AND.name()); - andExpression.getFunctionCall().setOperands(operands); - return andExpression; + return RequestUtils.getFunctionExpression(FilterKind.AND.name(), operands); } /** @@ -904,9 +881,7 @@ private static Expression compileOrExpression(SqlBasicCall orNode) { operands.add(toExpression(childNode)); } } - Expression andExpression = RequestUtils.getFunctionExpression(FilterKind.OR.name()); - andExpression.getFunctionCall().setOperands(operands); - return andExpression; + return RequestUtils.getFunctionExpression(FilterKind.OR.name(), operands); } public static boolean isLiteralOnlyExpression(Expression e) { diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/ClpRewriter.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/ClpRewriter.java index 4fdddac57d79..599545897520 100644 --- a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/ClpRewriter.java +++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/ClpRewriter.java @@ -23,6 +23,7 @@ import com.yscope.clp.compressorfrontend.ByteSegment; import com.yscope.clp.compressorfrontend.EightByteClpEncodedSubquery; import com.yscope.clp.compressorfrontend.EightByteClpWildcardQueryEncoder; +import java.util.ArrayList; import java.util.List; import javax.annotation.Nullable; import org.apache.calcite.sql.SqlKind; @@ -403,58 +404,39 @@ private void rewriteCLPDecodeFunction(Expression expression) { private ClpSqlSubqueryGenerationResult convertSubqueryToSql(String logtypeColumnName, String dictionaryVarsColumnName, String encodedVarsColumnName, String wildcardQuery, int subqueryIdx, EightByteClpEncodedSubquery[] subqueries) { EightByteClpEncodedSubquery subquery = subqueries[subqueryIdx]; - + Function logtypeMatchFunction = createLogtypeMatchFunction(logtypeColumnName, subquery.getLogtypeQueryAsString(), + subquery.logtypeQueryContainsWildcards()); if (!subquery.containsVariables()) { - Function f = createLogtypeMatchFunction(logtypeColumnName, subquery.getLogtypeQueryAsString(), - subquery.logtypeQueryContainsWildcards()); - return new ClpSqlSubqueryGenerationResult(false, f); + return new ClpSqlSubqueryGenerationResult(false, logtypeMatchFunction); } - Function subqueryFunc = new Function(SqlKind.AND.name()); - - Expression e; + List subqueryFunctionOperands = new ArrayList<>(); // Add logtype query - Function f = createLogtypeMatchFunction(logtypeColumnName, subquery.getLogtypeQueryAsString(), - subquery.logtypeQueryContainsWildcards()); - e = new Expression(ExpressionType.FUNCTION); - e.setFunctionCall(f); - subqueryFunc.addToOperands(e); + subqueryFunctionOperands.add(RequestUtils.getFunctionExpression(logtypeMatchFunction)); // Add any dictionary variables int numDictVars = 0; for (ByteSegment dictVar : subquery.getDictVars()) { - f = createStringColumnMatchFunction(SqlKind.EQUALS.name(), dictionaryVarsColumnName, dictVar.toString()); - e = new Expression(ExpressionType.FUNCTION); - e.setFunctionCall(f); - subqueryFunc.addToOperands(e); - - ++numDictVars; + subqueryFunctionOperands.add(RequestUtils.getFunctionExpression( + createStringColumnMatchFunction(SqlKind.EQUALS.name(), dictionaryVarsColumnName, dictVar.toString()))); + numDictVars++; } // Add any encoded variables int numEncodedVars = 0; for (long encodedVar : subquery.getEncodedVars()) { - f = new Function(SqlKind.EQUALS.name()); - f.addToOperands(RequestUtils.getIdentifierExpression(encodedVarsColumnName)); - f.addToOperands(RequestUtils.getLiteralExpression(encodedVar)); - - e = new Expression(ExpressionType.FUNCTION); - e.setFunctionCall(f); - subqueryFunc.addToOperands(e); - - ++numEncodedVars; + subqueryFunctionOperands.add(RequestUtils.getFunctionExpression(SqlKind.EQUALS.name(), + RequestUtils.getIdentifierExpression(encodedVarsColumnName), RequestUtils.getLiteralExpression(encodedVar))); + numEncodedVars++; } // Add any wildcard dictionary variables for (VariableWildcardQuery varWildcardQuery : subquery.getDictVarWildcardQueries()) { - f = createStringColumnMatchFunction(_REGEXP_LIKE_LOWERCASE_FUNCTION_NAME, dictionaryVarsColumnName, - wildcardQueryToRegex(varWildcardQuery.getQuery().toString())); - e = new Expression(ExpressionType.FUNCTION); - e.setFunctionCall(f); - subqueryFunc.addToOperands(e); - - ++numDictVars; + subqueryFunctionOperands.add(RequestUtils.getFunctionExpression( + createStringColumnMatchFunction(_REGEXP_LIKE_LOWERCASE_FUNCTION_NAME, dictionaryVarsColumnName, + wildcardQueryToRegex(varWildcardQuery.getQuery().toString())))); + numDictVars++; } // Add any wildcard encoded variables @@ -464,20 +446,14 @@ private ClpSqlSubqueryGenerationResult convertSubqueryToSql(String logtypeColumn // Create call to clpEncodedVarsMatch Expression clpEncodedVarsExp = RequestUtils.getFunctionExpression( RequestUtils.canonicalizeFunctionNamePreservingSpecialKey( - TransformFunctionType.CLP_ENCODED_VARS_MATCH.getName())); - f = clpEncodedVarsExp.getFunctionCall(); - f.addToOperands(RequestUtils.getIdentifierExpression(logtypeColumnName)); - f.addToOperands(RequestUtils.getIdentifierExpression(encodedVarsColumnName)); - f.addToOperands(RequestUtils.getLiteralExpression(wildcardQuery)); - f.addToOperands(RequestUtils.getLiteralExpression(subqueryIdx)); + TransformFunctionType.CLP_ENCODED_VARS_MATCH.getName()), + RequestUtils.getIdentifierExpression(logtypeColumnName), + RequestUtils.getIdentifierExpression(encodedVarsColumnName), RequestUtils.getLiteralExpression(wildcardQuery), + RequestUtils.getLiteralExpression(subqueryIdx)); // Create `clpEncodedVarsMatch(...) = true` - e = RequestUtils.getFunctionExpression(SqlKind.EQUALS.name()); - f = e.getFunctionCall(); - f.addToOperands(clpEncodedVarsExp); - f.addToOperands(RequestUtils.getLiteralExpression(true)); - - subqueryFunc.addToOperands(e); + subqueryFunctionOperands.add(RequestUtils.getFunctionExpression(SqlKind.EQUALS.name(), clpEncodedVarsExp, + RequestUtils.getLiteralExpression(true))); } // We require a decompress and match in the following cases: @@ -494,7 +470,8 @@ private ClpSqlSubqueryGenerationResult convertSubqueryToSql(String logtypeColumn // value "user dv123 joined" but it could also match "user dv456 joined dv123". boolean requiresDecompAndMatch = !(numDictVars < 2 && numEncodedVars < 2 && !subquery.logtypeQueryContainsWildcards()); - return new ClpSqlSubqueryGenerationResult(requiresDecompAndMatch, subqueryFunc); + return new ClpSqlSubqueryGenerationResult(requiresDecompAndMatch, + RequestUtils.getFunction(SqlKind.AND.name(), subqueryFunctionOperands)); } private Function createLogtypeMatchFunction(String columnName, String query, boolean containsWildcards) { @@ -511,10 +488,8 @@ private Function createLogtypeMatchFunction(String columnName, String query, boo } private Function createStringColumnMatchFunction(String canonicalName, String columnName, String query) { - Function func = new Function(canonicalName); - func.addToOperands(RequestUtils.getIdentifierExpression(columnName)); - func.addToOperands(RequestUtils.getLiteralExpression(query)); - return func; + return RequestUtils.getFunction(canonicalName, RequestUtils.getIdentifierExpression(columnName), + RequestUtils.getLiteralExpression(query)); } /** diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/ExprMinMaxRewriter.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/ExprMinMaxRewriter.java index 2363ae5c42ac..62ce44540a74 100644 --- a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/ExprMinMaxRewriter.java +++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/ExprMinMaxRewriter.java @@ -101,14 +101,12 @@ private void appendParentExprMinMaxFunctions(boolean isMax, List sel for (Map.Entry, Set> entry : exprMinMaxFunctionMap.entrySet()) { List measuringColumns = entry.getKey(); Set projectionColumns = entry.getValue(); - Expression functionExpression = RequestUtils.getFunctionExpression(isMax ? EXPR_MAX_PARENT : EXPR_MIN_PARENT); List operands = new ArrayList<>(2 + measuringColumns.size() + projectionColumns.size()); operands.add(RequestUtils.getLiteralExpression((int) exprMinMaxFunctionIDMap.get(measuringColumns))); operands.add(RequestUtils.getLiteralExpression(measuringColumns.size())); operands.addAll(measuringColumns); operands.addAll(projectionColumns); - functionExpression.getFunctionCall().setOperands(operands); - selectList.add(functionExpression); + selectList.add(RequestUtils.getFunctionExpression(isMax ? EXPR_MAX_PARENT : EXPR_MIN_PARENT, operands)); } } diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/NonAggregationGroupByToDistinctQueryRewriter.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/NonAggregationGroupByToDistinctQueryRewriter.java index 8005dbd759b4..2a51829f7ebf 100644 --- a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/NonAggregationGroupByToDistinctQueryRewriter.java +++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/NonAggregationGroupByToDistinctQueryRewriter.java @@ -18,8 +18,9 @@ */ package org.apache.pinot.sql.parsers.rewriter; -import java.util.Collections; +import java.util.ArrayList; import java.util.HashSet; +import java.util.List; import java.util.Set; import org.apache.pinot.common.request.Expression; import org.apache.pinot.common.request.Function; @@ -76,9 +77,11 @@ public PinotQuery rewrite(PinotQuery pinotQuery) { } Set groupByExpressions = new HashSet<>(pinotQuery.getGroupByList()); if (selectExpressions.equals(groupByExpressions)) { - Expression distinct = RequestUtils.getFunctionExpression("distinct"); - distinct.getFunctionCall().setOperands(pinotQuery.getSelectList()); - pinotQuery.setSelectList(Collections.singletonList(distinct)); + Expression distinct = RequestUtils.getFunctionExpression("distinct", pinotQuery.getSelectList()); + // NOTE: Create an ArrayList because we might need to modify the list later + List newSelectList = new ArrayList<>(1); + newSelectList.add(distinct); + pinotQuery.setSelectList(newSelectList); pinotQuery.setGroupByList(null); return pinotQuery; } else { diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/OrdinalsUpdater.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/OrdinalsUpdater.java index 204ccca2b3da..605f9488a589 100644 --- a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/OrdinalsUpdater.java +++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/OrdinalsUpdater.java @@ -18,7 +18,7 @@ */ package org.apache.pinot.sql.parsers.rewriter; -import java.util.Collections; +import java.util.ArrayList; import java.util.List; import org.apache.pinot.common.request.Expression; import org.apache.pinot.common.request.Function; @@ -49,8 +49,10 @@ public PinotQuery rewrite(PinotQuery pinotQuery) { if (isNullsLast != null) { functionToSet = functionToSet.getOperands().get(0).getFunctionCall(); } - functionToSet.setOperands( - Collections.singletonList(getExpressionFromOrdinal(pinotQuery.getSelectList(), ordinal))); + // NOTE: Create an ArrayList because we might need to modify the list later + List newOperands = new ArrayList<>(1); + newOperands.add(getExpressionFromOrdinal(pinotQuery.getSelectList(), ordinal)); + functionToSet.setOperands(newOperands); } } return pinotQuery; diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/PredicateComparisonRewriter.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/PredicateComparisonRewriter.java index c59b5126ec4a..929ef701a3bb 100644 --- a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/PredicateComparisonRewriter.java +++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/PredicateComparisonRewriter.java @@ -19,8 +19,6 @@ package org.apache.pinot.sql.parsers.rewriter; import com.google.common.base.Preconditions; -import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import org.apache.commons.lang3.EnumUtils; import org.apache.pinot.common.request.Expression; @@ -96,10 +94,7 @@ private static Expression updateFunctionExpression(Expression expression) { case AND: case OR: case NOT: - for (int i = 0; i < operands.size(); i++) { - Expression operand = operands.get(i); - operands.set(i, updatePredicate(operand)); - } + operands.replaceAll(PredicateComparisonRewriter::updatePredicate); break; case EQUALS: case NOT_EQUALS: @@ -122,8 +117,7 @@ private static Expression updateFunctionExpression(Expression expression) { // Handle predicate like 'a > b' -> 'a - b > 0' if (!secondOperand.isSetLiteral()) { - Expression minusExpression = RequestUtils.getFunctionExpression("minus"); - minusExpression.getFunctionCall().setOperands(Arrays.asList(firstOperand, secondOperand)); + Expression minusExpression = RequestUtils.getFunctionExpression("minus", firstOperand, secondOperand); operands.set(0, minusExpression); operands.set(1, RequestUtils.getLiteralExpression(0)); break; @@ -181,14 +175,8 @@ private static Expression updateFunctionExpression(Expression expression) { * @return Rewritten expression */ private static Expression convertPredicateToEqualsBooleanExpression(Expression expression) { - Expression newExpression; - newExpression = RequestUtils.getFunctionExpression(FilterKind.EQUALS.name()); - List operands = new ArrayList<>(); - operands.add(expression); - operands.add(RequestUtils.getLiteralExpression(true)); - newExpression.getFunctionCall().setOperands(operands); - - return newExpression; + return RequestUtils.getFunctionExpression(FilterKind.EQUALS.name(), expression, + RequestUtils.getLiteralExpression(true)); } /** diff --git a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java index 55d607b6ed09..574d6ad429da 100644 --- a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java @@ -21,6 +21,7 @@ import java.time.Instant; import java.time.ZoneId; import java.time.format.DateTimeFormatter; +import java.util.ArrayList; import java.util.List; import java.util.concurrent.TimeUnit; import org.apache.calcite.sql.SqlNumericLiteral; @@ -48,21 +49,66 @@ public class CalciteSqlCompilerTest { private static final long ONE_HOUR_IN_MS = TimeUnit.HOURS.toMillis(1); + /* Verify all lists in PinotQuery are ArrayLists because we might need to modify them during query optimization */ + + private Expression compileToExpression(String expressionStr) { + Expression expression = CalciteSqlParser.compileToExpression(expressionStr); + verifyListInExpression(expression); + return expression; + } + + private void verifyListInExpression(Expression expression) { + Function function = expression.getFunctionCall(); + if (function != null) { + verifyListInExpressions(function.getOperands()); + } + } + + private void verifyListInExpressions(List expressions) { + Assert.assertTrue(expressions instanceof ArrayList); + for (Expression expression : expressions) { + verifyListInExpression(expression); + } + } + + private PinotQuery compileToPinotQuery(String sql) { + PinotQuery query = CalciteSqlParser.compileToPinotQuery(sql); + List selectList = query.getSelectList(); + verifyListInExpressions(selectList); + Expression filterExpression = query.getFilterExpression(); + if (filterExpression != null) { + verifyListInExpression(filterExpression); + } + List groupByList = query.getGroupByList(); + if (groupByList != null) { + verifyListInExpressions(groupByList); + } + List orderByList = query.getOrderByList(); + if (orderByList != null) { + verifyListInExpressions(orderByList); + } + Expression havingExpression = query.getHavingExpression(); + if (havingExpression != null) { + verifyListInExpression(havingExpression); + } + return query; + } + @Test public void testCanonicalFunctionName() { - Expression expression = CalciteSqlParser.compileToExpression("dIsTiNcT_cOuNt(AbC)"); + Expression expression = compileToExpression("dIsTiNcT_cOuNt(AbC)"); Function function = expression.getFunctionCall(); Assert.assertEquals(function.getOperator(), AggregationFunctionType.DISTINCTCOUNT.name().toLowerCase()); Assert.assertEquals(function.getOperands().size(), 1); Assert.assertEquals(function.getOperands().get(0).getIdentifier().getName(), "AbC"); - expression = CalciteSqlParser.compileToExpression("ReGeXpLiKe(AbC)"); + expression = compileToExpression("ReGeXpLiKe(AbC)"); function = expression.getFunctionCall(); Assert.assertEquals(function.getOperator(), FilterKind.REGEXP_LIKE.name()); Assert.assertEquals(function.getOperands().size(), 1); Assert.assertEquals(function.getOperands().get(0).getIdentifier().getName(), "AbC"); - expression = CalciteSqlParser.compileToExpression("aBc > DeF"); + expression = compileToExpression("aBc > DeF"); function = expression.getFunctionCall(); Assert.assertEquals(function.getOperator(), FilterKind.GREATER_THAN.name()); Assert.assertEquals(function.getOperands().size(), 2); @@ -73,7 +119,7 @@ public void testCanonicalFunctionName() { @Test public void testCaseWhenStatements() { //@formatter:off - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery( + PinotQuery pinotQuery = compileToPinotQuery( "SELECT OrderID, Quantity,\n" + "CASE\n" + " WHEN Quantity > 30 THEN 'The quantity is greater than 30'\n" @@ -102,7 +148,7 @@ public void testCaseWhenStatements() { Assert.assertEquals(caseFunc.getOperands().get(4).getLiteral().getFieldValue(), "The quantity is under 30"); //@formatter:off - pinotQuery = CalciteSqlParser.compileToPinotQuery( + pinotQuery = compileToPinotQuery( "SELECT Quantity,\n" + "SUM(CASE\n" + " WHEN Quantity > 30 THEN 3\n" @@ -141,7 +187,7 @@ public void testCaseWhenStatements() { @Test public void testAggregationInCaseWhenStatementsWithGroupBy() { //@formatter:off - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery( + PinotQuery pinotQuery = compileToPinotQuery( "SELECT OrderID, SUM(Quantity),\n" + "CASE\n" + " WHEN sum(Quantity) > 30 THEN 'The quantity is greater than 30'\n" @@ -168,7 +214,7 @@ public void testAggregationInCaseWhenStatementsWithGroupBy() { @Test public void testAggregationInCaseWhenStatements() { //@formatter:off - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery( + PinotQuery pinotQuery = compileToPinotQuery( "SELECT sum(Quantity),\n" + "CASE\n" + " WHEN sum(Quantity) > 30 THEN 'The quantity is greater than 30'\n" @@ -193,24 +239,22 @@ public void testAggregationInCaseWhenStatements() { @Test public void testQuotedStrings() { - PinotQuery pinotQuery = - CalciteSqlParser.compileToPinotQuery("select * from vegetables where origin = 'Martha''s Vineyard'"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetables where origin = 'Martha''s Vineyard'"); Assert.assertEquals( pinotQuery.getFilterExpression().getFunctionCall().getOperands().get(1).getLiteral().getStringValue(), "Martha's Vineyard"); - pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetables where origin = 'Martha\"\"s Vineyard'"); + pinotQuery = compileToPinotQuery("select * from vegetables where origin = 'Martha\"\"s Vineyard'"); Assert.assertEquals( pinotQuery.getFilterExpression().getFunctionCall().getOperands().get(1).getLiteral().getStringValue(), "Martha\"\"s Vineyard"); - pinotQuery = - CalciteSqlParser.compileToPinotQuery("select * from vegetables where origin = \"Martha\"\"s Vineyard\""); + pinotQuery = compileToPinotQuery("select * from vegetables where origin = \"Martha\"\"s Vineyard\""); Assert.assertEquals( pinotQuery.getFilterExpression().getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(1) .getIdentifier().getName(), "Martha\"s Vineyard"); - pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetables where origin = \"Martha''s Vineyard\""); + pinotQuery = compileToPinotQuery("select * from vegetables where origin = \"Martha''s Vineyard\""); Assert.assertEquals( pinotQuery.getFilterExpression().getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(1) .getIdentifier().getName(), "Martha''s Vineyard"); @@ -220,28 +264,28 @@ public void testQuotedStrings() { public void testExtract() { { // Case 1 -- Year and date format ('2017-06-15') - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery("SELECT EXTRACT(YEAR FROM '2017-06-15')"); + PinotQuery pinotQuery = compileToPinotQuery("SELECT EXTRACT(YEAR FROM '2017-06-15')"); Function function = pinotQuery.getSelectList().get(0).getFunctionCall(); Assert.assertEquals(function.getOperands().get(0).getLiteral().getStringValue(), "YEAR"); Assert.assertEquals(function.getOperands().get(1).getLiteral().getStringValue(), "2017-06-15"); } { // Case 2 -- date format ('2017-06-15 09:34:21') - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery("SELECT EXTRACT(YEAR FROM '2017-06-15 09:34:21')"); + PinotQuery pinotQuery = compileToPinotQuery("SELECT EXTRACT(YEAR FROM '2017-06-15 09:34:21')"); Function function = pinotQuery.getSelectList().get(0).getFunctionCall(); Assert.assertEquals(function.getOperands().get(0).getLiteral().getStringValue(), "YEAR"); Assert.assertEquals(function.getOperands().get(1).getLiteral().getStringValue(), "2017-06-15 09:34:21"); } { // Case 3 -- Month - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery("SELECT EXTRACT(MONTH FROM '2017-06-15')"); + PinotQuery pinotQuery = compileToPinotQuery("SELECT EXTRACT(MONTH FROM '2017-06-15')"); Function function = pinotQuery.getSelectList().get(0).getFunctionCall(); Assert.assertEquals(function.getOperands().get(0).getLiteral().getStringValue(), "MONTH"); Assert.assertEquals(function.getOperands().get(1).getLiteral().getStringValue(), "2017-06-15"); } { // Case 4 -- Day - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery("SELECT EXTRACT(DAY FROM '2017-06-15')"); + PinotQuery pinotQuery = compileToPinotQuery("SELECT EXTRACT(DAY FROM '2017-06-15')"); Function function = pinotQuery.getSelectList().get(0).getFunctionCall(); Assert.assertEquals(function.getOperands().get(0).getLiteral().getStringValue(), "DAY"); Assert.assertEquals(function.getOperands().get(1).getLiteral().getStringValue(), "2017-06-15"); @@ -251,7 +295,7 @@ public void testExtract() { @Test public void testFilterClauses() { { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetables where a > 1.5"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetables where a > 1.5"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.GREATER_THAN.name()); Assert.assertEquals(func.getOperands().get(0).getIdentifier().getName(), "a"); @@ -259,7 +303,7 @@ public void testFilterClauses() { } { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetables where b < 100"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetables where b < 100"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.LESS_THAN.name()); Assert.assertEquals(func.getOperands().get(0).getIdentifier().getName(), "b"); @@ -267,7 +311,7 @@ public void testFilterClauses() { } { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetables where c >= 10"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetables where c >= 10"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.GREATER_THAN_OR_EQUAL.name()); Assert.assertEquals(func.getOperands().get(0).getIdentifier().getName(), "c"); @@ -275,7 +319,7 @@ public void testFilterClauses() { } { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetables where d <= 50"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetables where d <= 50"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.LESS_THAN_OR_EQUAL.name()); Assert.assertEquals(func.getOperands().get(0).getIdentifier().getName(), "d"); @@ -283,8 +327,7 @@ public void testFilterClauses() { } { - PinotQuery pinotQuery = - CalciteSqlParser.compileToPinotQuery("select * from vegetables where e BETWEEN 70 AND 80"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetables where e BETWEEN 70 AND 80"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.BETWEEN.name()); Assert.assertEquals(func.getOperands().get(0).getIdentifier().getName(), "e"); @@ -293,8 +336,7 @@ public void testFilterClauses() { } { - PinotQuery pinotQuery = - CalciteSqlParser.compileToPinotQuery("select * from vegetables where regexp_like(E, '^U.*')"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetables where regexp_like(E, '^U.*')"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), "REGEXP_LIKE"); Assert.assertEquals(func.getOperands().get(0).getIdentifier().getName(), "E"); @@ -302,8 +344,7 @@ public void testFilterClauses() { } { - PinotQuery pinotQuery = - CalciteSqlParser.compileToPinotQuery("select * from vegetables where g IN (12, 13, 15.2, 17)"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetables where g IN (12, 13, 15.2, 17)"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.IN.name()); Assert.assertEquals(func.getOperands().get(0).getIdentifier().getName(), "g"); @@ -314,7 +355,7 @@ public void testFilterClauses() { } { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetable where g"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetable where g"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.EQUALS.name()); Assert.assertEquals(func.getOperands().get(0).getIdentifier().getName(), "g"); @@ -322,7 +363,7 @@ public void testFilterClauses() { } { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetable where g or f = true"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetable where g or f = true"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.OR.name()); List operands = func.getOperands(); @@ -337,8 +378,7 @@ public void testFilterClauses() { } { - PinotQuery pinotQuery = - CalciteSqlParser.compileToPinotQuery("select * from vegetable where startsWith(g, 'str')"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetable where startsWith(g, 'str')"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.EQUALS.name()); Assert.assertEquals(func.getOperands().get(0).getFunctionCall().getOperator(), "startswith"); @@ -346,8 +386,8 @@ public void testFilterClauses() { } { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery( - "select * from vegetable where startsWith(g, 'str')=true and startsWith(f, 'str')"); + PinotQuery pinotQuery = + compileToPinotQuery("select * from vegetable where startsWith(g, 'str')=true and startsWith(f, 'str')"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.AND.name()); List operands = func.getOperands(); @@ -365,7 +405,7 @@ public void testFilterClauses() { } { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery( + PinotQuery pinotQuery = compileToPinotQuery( "select * from vegetable where (startsWith(g, 'str')=true and startsWith(f, 'str')) AND (e and d=true)"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.AND.name()); @@ -394,8 +434,7 @@ public void testFilterClauses() { } { - PinotQuery pinotQuery = - CalciteSqlParser.compileToPinotQuery("select * from vegetable where isSubnetOf('192.168.0.1/24', foo)"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetable where isSubnetOf('192.168.0.1/24', foo)"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.EQUALS.name()); List operands = func.getOperands(); @@ -405,7 +444,7 @@ public void testFilterClauses() { } { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery( + PinotQuery pinotQuery = compileToPinotQuery( "select * from vegetable where isSubnetOf('192.168.0.1/24', foo)=true AND isSubnetOf('192.168.0.1/24', " + "foo)"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); @@ -429,7 +468,7 @@ public void testFilterClauses() { @Test public void testFilterClausesWithRightExpression() { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetables where a > b"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetables where a > b"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.GREATER_THAN.name()); Assert.assertEquals(func.getOperands().get(0).getFunctionCall().getOperator(), "minus"); @@ -438,7 +477,7 @@ public void testFilterClausesWithRightExpression() { Assert.assertEquals(func.getOperands().get(0).getFunctionCall().getOperands().get(1).getIdentifier().getName(), "b"); Assert.assertEquals(func.getOperands().get(1).getLiteral().getLongValue(), 0L); - pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetables where 0 < a-b"); + pinotQuery = compileToPinotQuery("select * from vegetables where 0 < a-b"); func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.GREATER_THAN.name()); Assert.assertEquals(func.getOperands().get(0).getFunctionCall().getOperator(), "minus"); @@ -448,7 +487,7 @@ public void testFilterClausesWithRightExpression() { "b"); Assert.assertEquals(func.getOperands().get(1).getLiteral().getLongValue(), 0L); - pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetables where b < 100 + c"); + pinotQuery = compileToPinotQuery("select * from vegetables where b < 100 + c"); func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.LESS_THAN.name()); Assert.assertEquals(func.getOperands().get(0).getFunctionCall().getOperator(), "minus"); @@ -463,7 +502,7 @@ public void testFilterClausesWithRightExpression() { func.getOperands().get(0).getFunctionCall().getOperands().get(1).getFunctionCall().getOperands().get(1) .getIdentifier().getName(), "c"); Assert.assertEquals(func.getOperands().get(1).getLiteral().getLongValue(), 0L); - pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetables where b -(100+c)< 0"); + pinotQuery = compileToPinotQuery("select * from vegetables where b -(100+c)< 0"); func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.LESS_THAN.name()); Assert.assertEquals(func.getOperands().get(0).getFunctionCall().getOperator(), "minus"); @@ -479,8 +518,7 @@ public void testFilterClausesWithRightExpression() { .getIdentifier().getName(), "c"); Assert.assertEquals(func.getOperands().get(1).getLiteral().getLongValue(), 0L); - pinotQuery = - CalciteSqlParser.compileToPinotQuery("select * from vegetables where foo1(bar1(a-b)) <= foo2(bar2(c+d))"); + pinotQuery = compileToPinotQuery("select * from vegetables where foo1(bar1(a-b)) <= foo2(bar2(c+d))"); func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.LESS_THAN_OR_EQUAL.name()); Assert.assertEquals(func.getOperands().get(0).getFunctionCall().getOperator(), "minus"); @@ -517,8 +555,7 @@ public void testFilterClausesWithRightExpression() { .getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(1).getIdentifier().getName(), "d"); Assert.assertEquals(func.getOperands().get(1).getLiteral().getLongValue(), 0L); - pinotQuery = - CalciteSqlParser.compileToPinotQuery("select * from vegetables where foo1(bar1(a-b)) - foo2(bar2(c+d)) <= 0"); + pinotQuery = compileToPinotQuery("select * from vegetables where foo1(bar1(a-b)) - foo2(bar2(c+d)) <= 0"); func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.LESS_THAN_OR_EQUAL.name()); Assert.assertEquals(func.getOperands().get(0).getFunctionCall().getOperator(), "minus"); @@ -556,12 +593,12 @@ public void testFilterClausesWithRightExpression() { "d"); Assert.assertEquals(func.getOperands().get(1).getLiteral().getLongValue(), 0L); - pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetables where c >= 10"); + pinotQuery = compileToPinotQuery("select * from vegetables where c >= 10"); func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.GREATER_THAN_OR_EQUAL.name()); Assert.assertEquals(func.getOperands().get(0).getIdentifier().getName(), "c"); Assert.assertEquals(func.getOperands().get(1).getLiteral().getLongValue(), 10L); - pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetables where 10 <= c"); + pinotQuery = compileToPinotQuery("select * from vegetables where 10 <= c"); func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.GREATER_THAN_OR_EQUAL.name()); Assert.assertEquals(func.getOperands().get(0).getIdentifier().getName(), "c"); @@ -584,7 +621,7 @@ public void testInvalidFilterClauses() { private void testInvalidFilterClause(String filter) { try { - CalciteSqlParser.compileToPinotQuery("select * from vegetables where " + filter); + compileToPinotQuery("select * from vegetables where " + filter); } catch (SqlCompilationException e) { // Expected return; @@ -614,8 +651,7 @@ public void testTopZero() { public void testLimitOffsets() { PinotQuery pinotQuery; try { - pinotQuery = - CalciteSqlParser.compileToPinotQuery("select a, b, c from meetupRsvp order by a, b, c limit 100 offset 200"); + pinotQuery = compileToPinotQuery("select a, b, c from meetupRsvp order by a, b, c limit 100 offset 200"); } catch (SqlCompilationException e) { throw e; } @@ -626,8 +662,7 @@ public void testLimitOffsets() { Assert.assertEquals(200, pinotQuery.getOffset()); try { - pinotQuery = - CalciteSqlParser.compileToPinotQuery("select a, b, c from meetupRsvp order by a, b, c limit 200,100"); + pinotQuery = compileToPinotQuery("select a, b, c from meetupRsvp order by a, b, c limit 200,100"); } catch (SqlCompilationException e) { throw e; } @@ -643,7 +678,7 @@ public void testGroupbys() { PinotQuery pinotQuery; try { - pinotQuery = CalciteSqlParser.compileToPinotQuery( + pinotQuery = compileToPinotQuery( "select sum(rsvp_count), count(*), group_city from meetupRsvp group by group_city order by sum(rsvp_count) " + "limit 10"); } catch (SqlCompilationException e) { @@ -660,7 +695,7 @@ public void testGroupbys() { Assert.assertEquals(10, pinotQuery.getLimit()); try { - pinotQuery = CalciteSqlParser.compileToPinotQuery( + pinotQuery = compileToPinotQuery( "select sum(rsvp_count), count(*) from meetupRsvp group by group_city order by sum(rsvp_count) limit 10"); } catch (SqlCompilationException e) { throw e; @@ -676,7 +711,7 @@ public void testGroupbys() { Assert.assertEquals(10, pinotQuery.getLimit()); try { - pinotQuery = CalciteSqlParser.compileToPinotQuery( + pinotQuery = compileToPinotQuery( "select group_city, sum(rsvp_count), count(*) from meetupRsvp group by group_city order by sum(rsvp_count)," + " count(*) limit 10"); } catch (SqlCompilationException e) { @@ -703,10 +738,9 @@ public void testGroupbys() { // nested functions in group by try { - pinotQuery = CalciteSqlParser.compileToPinotQuery( - "select concat(upper(playerName), lower(teamID), '-') playerTeam, " - + "upper(league) leagueUpper, count(playerName) cnt from baseballStats group by playerTeam, lower" - + "(teamID), leagueUpper having cnt > 1 order by cnt desc limit 10"); + pinotQuery = compileToPinotQuery("select concat(upper(playerName), lower(teamID), '-') playerTeam, " + + "upper(league) leagueUpper, count(playerName) cnt from baseballStats group by playerTeam, lower" + + "(teamID), leagueUpper having cnt > 1 order by cnt desc limit 10"); } catch (SqlCompilationException e) { throw e; } @@ -723,7 +757,7 @@ public void testGroupbys() { private void assertCompilationFails(String query) { try { - CalciteSqlParser.compileToPinotQuery(query); + compileToPinotQuery(query); } catch (SqlCompilationException e) { // Expected return; @@ -735,7 +769,7 @@ private void assertCompilationFails(String query) { private void testTopZeroFor(String s, final int expectedTopN, boolean parseException) { PinotQuery pinotQuery; try { - pinotQuery = CalciteSqlParser.compileToPinotQuery(s); + pinotQuery = compileToPinotQuery(s); } catch (SqlCompilationException e) { if (parseException) { return; @@ -766,7 +800,7 @@ public void testParseExceptionHasCharacterPosition() { final String query = "select foo from bar where baz ? 2"; try { - CalciteSqlParser.compileToPinotQuery(query); + compileToPinotQuery(query); } catch (SqlCompilationException e) { // Expected Assert.assertTrue(e.getCause().getMessage().contains("at line 1, column 31."), @@ -780,11 +814,10 @@ public void testParseExceptionHasCharacterPosition() { @Test public void testCStyleInequalityOperator() { - PinotQuery pinotQuery = - CalciteSqlParser.compileToPinotQuery("select * from vegetables where name <> 'Brussels sprouts'"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetables where name <> 'Brussels sprouts'"); Assert.assertEquals(pinotQuery.getFilterExpression().getFunctionCall().getOperator(), "NOT_EQUALS"); - pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetables where name != 'Brussels sprouts'"); + pinotQuery = compileToPinotQuery("select * from vegetables where name != 'Brussels sprouts'"); Assert.assertEquals(pinotQuery.getFilterExpression().getFunctionCall().getOperator(), "NOT_EQUALS"); } @@ -792,18 +825,17 @@ public void testCStyleInequalityOperator() { @Deprecated // TODO: to be removed once OPTIONS REGEX match is deprecated public void testQueryOptions() { - PinotQuery pinotQuery = - CalciteSqlParser.compileToPinotQuery("select * from vegetables where name <> 'Brussels sprouts'"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetables where name <> 'Brussels sprouts'"); Assert.assertEquals(pinotQuery.getQueryOptionsSize(), 0); Assert.assertNull(pinotQuery.getQueryOptions()); - pinotQuery = CalciteSqlParser.compileToPinotQuery( - "select * from vegetables where name <> 'Brussels sprouts' OPTION (delicious=yes)"); + pinotQuery = + compileToPinotQuery("select * from vegetables where name <> 'Brussels sprouts' OPTION (delicious=yes)"); Assert.assertEquals(pinotQuery.getQueryOptionsSize(), 1); Assert.assertTrue(pinotQuery.getQueryOptions().containsKey("delicious")); Assert.assertEquals(pinotQuery.getQueryOptions().get("delicious"), "yes"); - pinotQuery = CalciteSqlParser.compileToPinotQuery( + pinotQuery = compileToPinotQuery( "select * from vegetables where name <> 'Brussels sprouts' OPTION (delicious=yes, foo=1234, bar='potato')"); Assert.assertEquals(pinotQuery.getQueryOptionsSize(), 3); Assert.assertTrue(pinotQuery.getQueryOptions().containsKey("delicious")); @@ -813,7 +845,7 @@ public void testQueryOptions() { // Assert that wrongly inserted query option will not be parsed. try { - CalciteSqlParser.compileToPinotQuery( + compileToPinotQuery( "select * from vegetables where name <> 'Brussels sprouts' OPTION (delicious=yes) option(foo=1234) option" + "(bar='potato')"); } catch (SqlCompilationException e) { @@ -821,7 +853,7 @@ public void testQueryOptions() { Assert.assertTrue(e.getCause().getMessage().contains("OPTION")); } try { - CalciteSqlParser.compileToPinotQuery("select * from vegetables where name <> 'Brussels OPTION (delicious=yes)"); + compileToPinotQuery("select * from vegetables where name <> 'Brussels OPTION (delicious=yes)"); } catch (SqlCompilationException e) { Assert.assertTrue(e.getCause() instanceof ParseException); } @@ -829,18 +861,16 @@ public void testQueryOptions() { @Test public void testQuerySetOptions() { - PinotQuery pinotQuery = - CalciteSqlParser.compileToPinotQuery("select * from vegetables where name <> 'Brussels sprouts'"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetables where name <> 'Brussels sprouts'"); Assert.assertEquals(pinotQuery.getQueryOptionsSize(), 0); Assert.assertNull(pinotQuery.getQueryOptions()); - pinotQuery = CalciteSqlParser.compileToPinotQuery( - "SET delicious='yes'; select * from vegetables where name <> 'Brussels sprouts'"); + pinotQuery = compileToPinotQuery("SET delicious='yes'; select * from vegetables where name <> 'Brussels sprouts'"); Assert.assertEquals(pinotQuery.getQueryOptionsSize(), 1); Assert.assertTrue(pinotQuery.getQueryOptions().containsKey("delicious")); Assert.assertEquals(pinotQuery.getQueryOptions().get("delicious"), "yes"); - pinotQuery = CalciteSqlParser.compileToPinotQuery("SET delicious='yes'; SET foo='1234'; SET bar='''potato''';" + pinotQuery = compileToPinotQuery("SET delicious='yes'; SET foo='1234'; SET bar='''potato''';" + "select * from vegetables where name <> 'Brussels sprouts' "); Assert.assertEquals(pinotQuery.getQueryOptionsSize(), 3); Assert.assertTrue(pinotQuery.getQueryOptions().containsKey("delicious")); @@ -848,7 +878,7 @@ public void testQuerySetOptions() { Assert.assertEquals(pinotQuery.getQueryOptions().get("foo"), "1234"); Assert.assertEquals(pinotQuery.getQueryOptions().get("bar"), "'potato'"); - pinotQuery = CalciteSqlParser.compileToPinotQuery("SET delicious='yes'; SET foo='1234'; " + pinotQuery = compileToPinotQuery("SET delicious='yes'; SET foo='1234'; " + "SET bar='''potato'''; select * from vegetables where name <> 'Brussels sprouts' "); Assert.assertEquals(pinotQuery.getQueryOptionsSize(), 3); Assert.assertTrue(pinotQuery.getQueryOptions().containsKey("delicious")); @@ -856,7 +886,7 @@ public void testQuerySetOptions() { Assert.assertEquals(pinotQuery.getQueryOptions().get("foo"), "1234"); Assert.assertEquals(pinotQuery.getQueryOptions().get("bar"), "'potato'"); - pinotQuery = CalciteSqlParser.compileToPinotQuery("SET delicious='yes'; SET foo='1234'; " + pinotQuery = compileToPinotQuery("SET delicious='yes'; SET foo='1234'; " + "select * from vegetables where name <> 'Brussels sprouts'; SET bar='''potato'''; "); Assert.assertEquals(pinotQuery.getQueryOptionsSize(), 3); Assert.assertTrue(pinotQuery.getQueryOptions().containsKey("delicious")); @@ -866,23 +896,21 @@ public void testQuerySetOptions() { // test invalid options try { - CalciteSqlParser.compileToPinotQuery( - "select * from vegetables SET delicious='yes', foo='1234' where name <> 'Brussels sprouts'"); + compileToPinotQuery("select * from vegetables SET delicious='yes', foo='1234' where name <> 'Brussels sprouts'"); Assert.fail("SQL should not be compiled"); } catch (SqlCompilationException sce) { // expected. } try { - CalciteSqlParser.compileToPinotQuery( - "select * from vegetables where name <> 'Brussels sprouts'; SET (delicious='yes', foo=1234)"); + compileToPinotQuery("select * from vegetables where name <> 'Brussels sprouts'; SET (delicious='yes', foo=1234)"); Assert.fail("SQL should not be compiled"); } catch (SqlCompilationException sce) { // expected. } try { - CalciteSqlParser.compileToPinotQuery( + compileToPinotQuery( "select * from vegetables where name <> 'Brussels sprouts'; SET (delicious='yes', foo=1234); select * from " + "meat"); Assert.fail("SQL should not be compiled"); @@ -934,15 +962,15 @@ public void testRemoveComments() { } private void testRemoveComments(String sqlWithComments, String expectedSqlWithoutComments) { - PinotQuery commentedResult = CalciteSqlParser.compileToPinotQuery(sqlWithComments); - PinotQuery expectedResult = CalciteSqlParser.compileToPinotQuery(expectedSqlWithoutComments); + PinotQuery commentedResult = compileToPinotQuery(sqlWithComments); + PinotQuery expectedResult = compileToPinotQuery(expectedSqlWithoutComments); Assert.assertEquals(commentedResult, expectedResult); } @Test public void testIdentifierQuoteCharacter() { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery( - "select avg(attributes.age) as avg_age from person group by attributes.address_city"); + PinotQuery pinotQuery = + compileToPinotQuery("select avg(attributes.age) as avg_age from person group by attributes.address_city"); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(0) .getIdentifier().getName(), "attributes.age"); @@ -955,8 +983,7 @@ public void testStringLiteral() { assertCompilationFails("SELECT 'foo' FROM table"); // Allow string literal column in aggregation and group-by query - PinotQuery pinotQuery = - CalciteSqlParser.compileToPinotQuery("SELECT SUM('foo'), MAX(bar) FROM myTable GROUP BY 'foo', bar"); + PinotQuery pinotQuery = compileToPinotQuery("SELECT SUM('foo'), MAX(bar) FROM myTable GROUP BY 'foo', bar"); List selectFunctionList = pinotQuery.getSelectList(); Assert.assertEquals(selectFunctionList.size(), 2); Assert.assertEquals(selectFunctionList.get(0).getFunctionCall().getOperands().get(0).getLiteral().getStringValue(), @@ -969,8 +996,7 @@ public void testStringLiteral() { Assert.assertEquals(groupbyList.get(1).getIdentifier().getName(), "bar"); // For UDF, string literal won't be treated as column but as LITERAL - pinotQuery = CalciteSqlParser.compileToPinotQuery( - "SELECT SUM(ADD(foo, 'bar')) FROM myTable GROUP BY sub(foo, bar), SUB(BAR, FOO)"); + pinotQuery = compileToPinotQuery("SELECT SUM(ADD(foo, 'bar')) FROM myTable GROUP BY sub(foo, bar), SUB(BAR, FOO)"); selectFunctionList = pinotQuery.getSelectList(); Assert.assertEquals(selectFunctionList.size(), 1); Assert.assertEquals(selectFunctionList.get(0).getFunctionCall().getOperator(), "sum"); @@ -1000,8 +1026,7 @@ public void testStringLiteral() { @Test public void testFilterUdf() { - PinotQuery pinotQuery = - CalciteSqlParser.compileToPinotQuery("select count(*) from baseballStats where DIV(numberOfGames,10) = 100"); + PinotQuery pinotQuery = compileToPinotQuery("select count(*) from baseballStats where DIV(numberOfGames,10) = 100"); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "count"); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "*"); @@ -1017,7 +1042,7 @@ public void testFilterUdf() { Assert.assertEquals( pinotQuery.getFilterExpression().getFunctionCall().getOperands().get(1).getLiteral().getLongValue(), 100); - pinotQuery = CalciteSqlParser.compileToPinotQuery( + pinotQuery = compileToPinotQuery( "SELECT count(*) FROM mytable WHERE timeConvert(DaysSinceEpoch,'DAYS','SECONDS') = 1394323200"); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "count"); Assert.assertEquals( @@ -1042,8 +1067,8 @@ public void testFilterUdf() { @Test public void testSelectionTransformFunction() { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery( - " select mapKey(mapField,k1) from baseballStats where mapKey(mapField,k1) = 'v1'"); + PinotQuery pinotQuery = + compileToPinotQuery(" select mapKey(mapField,k1) from baseballStats where mapKey(mapField,k1) = 'v1'"); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "mapkey"); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "mapField"); @@ -1067,7 +1092,7 @@ public void testSelectionTransformFunction() { @Test public void testTimeTransformFunction() { PinotQuery pinotQuery = - CalciteSqlParser.compileToPinotQuery(" select hour(ts), d1, sum(m1) from baseballStats group by hour(ts), d1"); + compileToPinotQuery(" select hour(ts), d1, sum(m1) from baseballStats group by hour(ts), d1"); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "hour"); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "ts"); @@ -1083,7 +1108,7 @@ public void testTimeTransformFunction() { public void testSqlDistinctQueryCompilation() { // test single column DISTINCT String sql = "SELECT DISTINCT c1 FROM foo"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + PinotQuery pinotQuery = compileToPinotQuery(sql); List selectListExpressions = pinotQuery.getSelectList(); Assert.assertEquals(selectListExpressions.size(), 1); Assert.assertEquals(selectListExpressions.get(0).getType(), ExpressionType.FUNCTION); @@ -1097,7 +1122,7 @@ public void testSqlDistinctQueryCompilation() { // test multi column DISTINCT sql = "SELECT DISTINCT c1, c2 FROM foo"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); selectListExpressions = pinotQuery.getSelectList(); Assert.assertEquals(selectListExpressions.size(), 1); Assert.assertEquals(selectListExpressions.get(0).getType(), ExpressionType.FUNCTION); @@ -1113,7 +1138,7 @@ public void testSqlDistinctQueryCompilation() { // test multi column DISTINCT with filter sql = "SELECT DISTINCT c1, c2, c3 FROM foo WHERE c3 > 100"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); selectListExpressions = pinotQuery.getSelectList(); Assert.assertEquals(selectListExpressions.size(), 1); @@ -1138,7 +1163,7 @@ public void testSqlDistinctQueryCompilation() { // not supported by Calcite SQL (this is in compliance with SQL standard) try { sql = "SELECT sum(c1), DISTINCT c2 FROM foo"; - CalciteSqlParser.compileToPinotQuery(sql); + compileToPinotQuery(sql); Assert.fail("Query should have failed compilation"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1147,7 +1172,7 @@ public void testSqlDistinctQueryCompilation() { // not supported by Calcite SQL (this is in compliance with SQL standard) try { sql = "SELECT c1, DISTINCT c2 FROM foo"; - CalciteSqlParser.compileToPinotQuery(sql); + compileToPinotQuery(sql); Assert.fail("Query should have failed compilation"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1156,7 +1181,7 @@ public void testSqlDistinctQueryCompilation() { // not supported by Calcite SQL (this is in compliance with SQL standard) try { sql = "SELECT DIV(c1,c2), DISTINCT c3 FROM foo"; - CalciteSqlParser.compileToPinotQuery(sql); + compileToPinotQuery(sql); Assert.fail("Query should have failed compilation"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1173,7 +1198,7 @@ public void testSqlDistinctQueryCompilation() { // transform try { sql = "SELECT DISTINCT c1, sum(c2) FROM foo"; - CalciteSqlParser.compileToPinotQuery(sql); + compileToPinotQuery(sql); Assert.fail("Query should have failed compilation"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1184,7 +1209,7 @@ public void testSqlDistinctQueryCompilation() { // same reason as above try { sql = "SELECT DISTINCT sum(c1) FROM foo"; - CalciteSqlParser.compileToPinotQuery(sql); + compileToPinotQuery(sql); Assert.fail("Query should have failed compilation"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1195,7 +1220,7 @@ public void testSqlDistinctQueryCompilation() { // Pinot currently does not support DISTINCT * syntax try { sql = "SELECT DISTINCT * FROM foo"; - CalciteSqlParser.compileToPinotQuery(sql); + compileToPinotQuery(sql); Assert.fail("Query should have failed compilation"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1207,7 +1232,7 @@ public void testSqlDistinctQueryCompilation() { // Pinot currently does not support DISTINCT * syntax try { sql = "SELECT DISTINCT *, C1 FROM foo"; - CalciteSqlParser.compileToPinotQuery(sql); + compileToPinotQuery(sql); Assert.fail("Query should have failed compilation"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1219,7 +1244,7 @@ public void testSqlDistinctQueryCompilation() { // Pinot currently does not support GROUP BY with DISTINCT try { sql = "SELECT DISTINCT C1, C2 FROM foo GROUP BY C1"; - CalciteSqlParser.compileToPinotQuery(sql); + compileToPinotQuery(sql); Assert.fail("Query should have failed compilation"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1231,7 +1256,7 @@ public void testSqlDistinctQueryCompilation() { // test DISTINCT with single transform function sql = "SELECT DISTINCT add(col1,col2) FROM foo"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); selectListExpressions = pinotQuery.getSelectList(); Assert.assertEquals(selectListExpressions.size(), 1); Assert.assertEquals(selectListExpressions.get(0).getType(), ExpressionType.FUNCTION); @@ -1250,7 +1275,7 @@ public void testSqlDistinctQueryCompilation() { // multi-column distinct with multiple transform functions sql = "SELECT DISTINCT add(div(col1, col2), mul(col3, col4)), sub(col3, col4) FROM foo"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); selectListExpressions = pinotQuery.getSelectList(); Assert.assertEquals(selectListExpressions.size(), 1); Assert.assertEquals(selectListExpressions.get(0).getType(), ExpressionType.FUNCTION); @@ -1293,7 +1318,7 @@ public void testSqlDistinctQueryCompilation() { // multi-column distinct with multiple transform columns and additional identifiers sql = "SELECT DISTINCT add(div(col1, col2), mul(col3, col4)), sub(col3, col4), col5, col6 FROM foo"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); selectListExpressions = pinotQuery.getSelectList(); Assert.assertEquals(selectListExpressions.size(), 1); Assert.assertEquals(selectListExpressions.get(0).getType(), ExpressionType.FUNCTION); @@ -1349,7 +1374,7 @@ public void testQueryValidation() { String sql = "select group_country, sum(rsvp_count), count(*) from meetupRsvp group by group_city, group_country ORDER BY " + "sum(rsvp_count), count(*) limit 50"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + PinotQuery pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getGroupByListSize(), 2); Assert.assertEquals(pinotQuery.getSelectListSize(), 3); @@ -1357,7 +1382,7 @@ public void testQueryValidation() { try { sql = "select group_city, group_country, sum(rsvp_count), count(*) from meetupRsvp group by group_country ORDER " + "BY sum(rsvp_count), count(*) limit 50"; - CalciteSqlParser.compileToPinotQuery(sql); + compileToPinotQuery(sql); Assert.fail("Query should have failed compilation"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1367,7 +1392,7 @@ public void testQueryValidation() { // Valid groupBy non-aggregate function should pass. sql = "select dateConvert(secondsSinceEpoch), sum(rsvp_count), count(*) from meetupRsvp group by dateConvert" + "(secondsSinceEpoch) limit 50"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getGroupByListSize(), 1); Assert.assertEquals(pinotQuery.getSelectListSize(), 3); @@ -1375,7 +1400,7 @@ public void testQueryValidation() { try { sql = "select secondsSinceEpoch, dateConvert(secondsSinceEpoch), sum(rsvp_count), count(*) from meetupRsvp" + " group by dateConvert(secondsSinceEpoch) limit 50"; - CalciteSqlParser.compileToPinotQuery(sql); + compileToPinotQuery(sql); Assert.fail("Query should have failed compilation"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1386,7 +1411,7 @@ public void testQueryValidation() { try { sql = "select sum(rsvp_count), count(*) from meetupRsvp group by group_country, sum(rsvp_count), count(*) limit " + "50"; - CalciteSqlParser.compileToPinotQuery(sql); + compileToPinotQuery(sql); Assert.fail("Query should have failed compilation"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1401,7 +1426,7 @@ public void testAliasQuery() { // Valid alias in query. sql = "select secondsSinceEpoch, sum(rsvp_count) as sum_rsvp_count, count(*) as cnt from meetupRsvp" + " group by secondsSinceEpoch order by cnt, sum_rsvp_count DESC limit 50"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getSelectListSize(), 3); Assert.assertEquals(pinotQuery.getGroupByListSize(), 1); Assert.assertEquals(pinotQuery.getOrderByListSize(), 2); @@ -1423,7 +1448,7 @@ public void testAliasQuery() { // Valid mixed alias expressions in query. sql = "select secondsSinceEpoch, sum(rsvp_count), count(*) as cnt from meetupRsvp group by secondsSinceEpoch" + " order by cnt, sum(rsvp_count) DESC limit 50"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getSelectListSize(), 3); Assert.assertEquals(pinotQuery.getGroupByListSize(), 1); Assert.assertEquals(pinotQuery.getOrderByListSize(), 2); @@ -1445,7 +1470,7 @@ public void testAliasQuery() { sql = "select secondsSinceEpoch/86400 AS daysSinceEpoch, sum(rsvp_count) as sum_rsvp_count, count(*) as cnt" + " from meetupRsvp where daysSinceEpoch = 18523 group by daysSinceEpoch order by cnt, sum_rsvp_count DESC" + " limit 50"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getSelectListSize(), 3); // Alias should not be applied to filter Assert.assertEquals(pinotQuery.getFilterExpression().getFunctionCall().getOperator(), FilterKind.EQUALS.name()); @@ -1466,7 +1491,7 @@ public void testAliasQuery() { // Invalid groupBy clause shouldn't contain aggregate expression, like sum(rsvp_count), count(*). try { sql = "select sum(rsvp_count), count(*) as cnt from meetupRsvp group by group_country, cnt limit 50"; - CalciteSqlParser.compileToPinotQuery(sql); + compileToPinotQuery(sql); Assert.fail("Query should have failed compilation"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1478,7 +1503,7 @@ public void testAliasQuery() { public void testAliasInSelection() { // Alias should not be applied String sql = "SELECT C1 AS ALIAS_C1, C2 AS ALIAS_C2, ALIAS_C1 + ALIAS_C2 FROM Foo"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + PinotQuery pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getSelectListSize(), 3); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "as"); Assert.assertEquals( @@ -1504,7 +1529,7 @@ public void testSameAliasInSelection() { String sql; PinotQuery pinotQuery; sql = "SELECT C1 AS C1, C2 AS C2 FROM Foo"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getSelectListSize(), 2); Assert.assertEquals(pinotQuery.getSelectList().get(0).getIdentifier().getName(), "C1"); Assert.assertEquals(pinotQuery.getSelectList().get(1).getIdentifier().getName(), "C2"); @@ -1514,7 +1539,7 @@ public void testSameAliasInSelection() { public void testAliasInFilter() { // Alias should not be applied String sql = "SELECT C1 AS ALIAS_CI FROM Foo WHERE ALIAS_CI > 10"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + PinotQuery pinotQuery = compileToPinotQuery(sql); Assert.assertEquals( pinotQuery.getFilterExpression().getFunctionCall().getOperands().get(0).getIdentifier().getName(), "ALIAS_CI"); } @@ -1522,7 +1547,7 @@ public void testAliasInFilter() { @Test public void testColumnOverride() { String sql = "SELECT C1 + 1 AS C1, COUNT(*) AS cnt FROM Foo GROUP BY 1"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + PinotQuery pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getGroupByList().get(0).getFunctionCall().getOperator(), "plus"); Assert.assertEquals( pinotQuery.getGroupByList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "C1"); @@ -1532,7 +1557,7 @@ public void testColumnOverride() { @Test public void testArithmeticOperator() { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery("select a,b+2,c*5,(d+5)*2 from myTable"); + PinotQuery pinotQuery = compileToPinotQuery("select a,b+2,c*5,(d+5)*2 from myTable"); Assert.assertEquals(pinotQuery.getSelectListSize(), 4); Assert.assertEquals(pinotQuery.getSelectList().get(1).getFunctionCall().getOperator(), "plus"); Assert.assertEquals( @@ -1557,7 +1582,7 @@ public void testArithmeticOperator() { Assert.assertEquals( pinotQuery.getSelectList().get(3).getFunctionCall().getOperands().get(1).getLiteral().getLongValue(), 2); - pinotQuery = CalciteSqlParser.compileToPinotQuery("select a % 200 + b * 5 from myTable"); + pinotQuery = compileToPinotQuery("select a % 200 + b * 5 from myTable"); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "plus"); Assert.assertEquals( @@ -1588,7 +1613,7 @@ public void testArithmeticOperator() { public void testReservedKeywords() { // min, max, avg, sum, value, count, groups - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery( + PinotQuery pinotQuery = compileToPinotQuery( "select max(value) as max, min(value) as min, sum(value) as sum, count(*) as count, avg(value) as avg from " + "myTable where groups = 'foo'"); Assert.assertEquals(pinotQuery.getSelectListSize(), 5); @@ -1644,7 +1669,7 @@ public void testReservedKeywords() { pinotQuery.getFilterExpression().getFunctionCall().getOperands().get(1).getLiteral().getStringValue(), "foo"); // language, module, return, position, system - pinotQuery = CalciteSqlParser.compileToPinotQuery( + pinotQuery = compileToPinotQuery( "select * from myTable where (language = 'en' or return > 100) and position < 10 order by module, system desc"); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals(pinotQuery.getFilterExpression().getFunctionCall().getOperator(), "AND"); @@ -1664,7 +1689,7 @@ public void testReservedKeywords() { // table - need to escape try { - CalciteSqlParser.compileToPinotQuery("select count(*) from myTable where table = 'foo'"); + compileToPinotQuery("select count(*) from myTable where table = 'foo'"); Assert.fail("Query should have failed to compile"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1672,7 +1697,7 @@ public void testReservedKeywords() { } // date - need to escape try { - CalciteSqlParser.compileToPinotQuery("select count(*) from myTable group by Date"); + compileToPinotQuery("select count(*) from myTable group by Date"); Assert.fail("Query should have failed to compile"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1680,7 +1705,7 @@ public void testReservedKeywords() { // timestamp - need to escape try { - CalciteSqlParser.compileToPinotQuery("select count(*) from myTable where timestamp < 1000"); + compileToPinotQuery("select count(*) from myTable where timestamp < 1000"); Assert.fail("Query should have failed to compile"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1688,7 +1713,7 @@ public void testReservedKeywords() { // time - need to escape try { - CalciteSqlParser.compileToPinotQuery("select count(*) from myTable where time > 100"); + compileToPinotQuery("select count(*) from myTable where time > 100"); Assert.fail("Query should have failed to compile"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1696,14 +1721,14 @@ public void testReservedKeywords() { // group - need to escape try { - CalciteSqlParser.compileToPinotQuery("select group from myTable where bar = 'foo'"); + compileToPinotQuery("select group from myTable where bar = 'foo'"); Assert.fail("Query should have failed to compile"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); } // escaping the above works - pinotQuery = CalciteSqlParser.compileToPinotQuery( + pinotQuery = compileToPinotQuery( "select sum(foo) from \"table\" where \"Date\" = 2019 and (\"timestamp\" < 100 or \"time\" > 200) group by " + "\"group\""); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); @@ -1722,23 +1747,23 @@ public void testReservedKeywords() { @Test public void testCastTransformation() { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery("select CAST(25.65 AS int) from myTable"); + PinotQuery pinotQuery = compileToPinotQuery("select CAST(25.65 AS int) from myTable"); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getLiteral().getLongValue(), 25); - pinotQuery = CalciteSqlParser.compileToPinotQuery("SELECT CAST('20170825' AS LONG) from myTable"); + pinotQuery = compileToPinotQuery("SELECT CAST('20170825' AS LONG) from myTable"); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getLiteral().getLongValue(), 20170825); - pinotQuery = CalciteSqlParser.compileToPinotQuery("SELECT CAST(20170825.0 AS Float) from myTable"); + pinotQuery = compileToPinotQuery("SELECT CAST(20170825.0 AS Float) from myTable"); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals((float) pinotQuery.getSelectList().get(0).getLiteral().getDoubleValue(), 20170825.0F); - pinotQuery = CalciteSqlParser.compileToPinotQuery("SELECT CAST(20170825.0 AS dOuble) from myTable"); + pinotQuery = compileToPinotQuery("SELECT CAST(20170825.0 AS dOuble) from myTable"); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals((float) pinotQuery.getSelectList().get(0).getLiteral().getDoubleValue(), 20170825.0F); - pinotQuery = CalciteSqlParser.compileToPinotQuery("SELECT CAST(column1 AS STRING) from myTable"); + pinotQuery = compileToPinotQuery("SELECT CAST(column1 AS STRING) from myTable"); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "cast"); Assert.assertEquals( @@ -1747,7 +1772,7 @@ public void testCastTransformation() { pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(1).getLiteral().getStringValue(), "STRING"); - pinotQuery = CalciteSqlParser.compileToPinotQuery("SELECT CAST(column1 AS varchar) from myTable"); + pinotQuery = compileToPinotQuery("SELECT CAST(column1 AS varchar) from myTable"); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "cast"); Assert.assertEquals( @@ -1756,7 +1781,7 @@ public void testCastTransformation() { pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(1).getLiteral().getStringValue(), "VARCHAR"); - pinotQuery = CalciteSqlParser.compileToPinotQuery( + pinotQuery = compileToPinotQuery( "SELECT SUM(CAST(CAST(ArrTime AS STRING) AS LONG)) FROM mytable WHERE DaysSinceEpoch <> 16312 AND Carrier = " + "'DL'"); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); @@ -1772,21 +1797,21 @@ public void testCastTransformation() { @Test public void testDistinctCountRewrite() { String query = "SELECT count(distinct bar) FROM foo"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinctcount"); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "bar"); query = "SELECT count(distinct bar) FROM foo GROUP BY city"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinctcount"); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "bar"); query = "SELECT count(distinct bar), distinctCount(bar) FROM foo GROUP BY city"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 2); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinctcount"); Assert.assertEquals( @@ -1797,14 +1822,14 @@ public void testDistinctCountRewrite() { pinotQuery.getSelectList().get(1).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "bar"); query = "SELECT count(distinct bar), count(*), sum(a),min(a),max(b) FROM foo GROUP BY city"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 5); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinctcount"); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "bar"); query = "SELECT count(distinct bar) AS distinct_bar, count(*), sum(a),min(a),max(b) FROM foo GROUP BY city"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 5); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "as"); Assert.assertEquals( @@ -1821,21 +1846,21 @@ public void testDistinctCountRewrite() { @Test public void testDistinctSumRewrite() { String query = "SELECT sum(distinct bar) FROM foo"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinctsum"); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "bar"); query = "SELECT sum(distinct bar) FROM foo GROUP BY city"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinctsum"); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "bar"); query = "SELECT sum(distinct bar), distinctSum(bar) FROM foo GROUP BY city"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 2); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinctsum"); Assert.assertEquals( @@ -1846,14 +1871,14 @@ public void testDistinctSumRewrite() { pinotQuery.getSelectList().get(1).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "bar"); query = "SELECT sum(distinct bar), count(*), sum(a),min(a),max(b) FROM foo GROUP BY city"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 5); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinctsum"); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "bar"); query = "SELECT sum(distinct bar) AS distinct_bar, count(*), sum(a),min(a),max(b) FROM foo GROUP BY city"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 5); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "as"); Assert.assertEquals( @@ -1868,7 +1893,7 @@ public void testDistinctSumRewrite() { query = "SELECT sum(distinct bar) AS distinct_bar, count(*), sum(a),min(a),max(b) FROM foo GROUP BY city ORDER BY " + "distinct_bar"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 5); Function selectFunctionCall = pinotQuery.getSelectList().get(0).getFunctionCall(); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "as"); @@ -1889,21 +1914,21 @@ public void testDistinctSumRewrite() { @Test public void testDistinctAvgRewrite() { String query = "SELECT avg(distinct bar) FROM foo"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinctavg"); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "bar"); query = "SELECT avg(distinct bar) FROM foo GROUP BY city"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinctavg"); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "bar"); query = "SELECT avg(distinct bar), distinctAvg(bar) FROM foo GROUP BY city"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 2); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinctavg"); Assert.assertEquals( @@ -1914,14 +1939,14 @@ public void testDistinctAvgRewrite() { pinotQuery.getSelectList().get(1).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "bar"); query = "SELECT avg(distinct bar), count(*), avg(a),min(a),max(b) FROM foo GROUP BY city"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 5); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinctavg"); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "bar"); query = "SELECT avg(distinct bar) AS distinct_bar, count(*), avg(a),min(a),max(b) FROM foo GROUP BY city"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 5); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "as"); Assert.assertEquals( @@ -1936,7 +1961,7 @@ public void testDistinctAvgRewrite() { query = "SELECT avg(distinct bar) AS distinct_bar, count(*), avg(a),min(a),max(b) FROM foo GROUP BY city ORDER BY" + " distinct_bar"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 5); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "as"); Assert.assertEquals( @@ -1961,7 +1986,7 @@ public void testDistinctAvgRewrite() { public void testInvalidDistinctAggregationRewrite() { String query = "SELECT max(distinct bar) FROM foo"; try { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); Assert.assertEquals(e.getMessage(), "Function 'max' on DISTINCT is not supported."); @@ -1971,7 +1996,7 @@ public void testInvalidDistinctAggregationRewrite() { @Test public void testOrdinalsQueryRewrite() { String query = "SELECT foo, bar, count(*) FROM t GROUP BY 1, 2 ORDER BY 1, 2 DESC"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().get(0).getIdentifier().getName(), "foo"); Assert.assertEquals(pinotQuery.getSelectList().get(1).getIdentifier().getName(), "bar"); Assert.assertEquals(pinotQuery.getGroupByList().get(0).getIdentifier().getName(), "foo"); @@ -1982,7 +2007,7 @@ public void testOrdinalsQueryRewrite() { pinotQuery.getOrderByList().get(1).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "bar"); query = "SELECT foo, bar, count(*) FROM t GROUP BY 2, 1 ORDER BY 2, 1 DESC"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().get(0).getIdentifier().getName(), "foo"); Assert.assertEquals(pinotQuery.getSelectList().get(1).getIdentifier().getName(), "bar"); Assert.assertEquals(pinotQuery.getGroupByList().get(0).getIdentifier().getName(), "bar"); @@ -1993,7 +2018,7 @@ public void testOrdinalsQueryRewrite() { pinotQuery.getOrderByList().get(1).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "foo"); query = "SELECT foo as f, bar as b, count(*) FROM t GROUP BY 2, 1 ORDER BY 2, 1 DESC"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getGroupByList().get(0).getIdentifier().getName(), "bar"); Assert.assertEquals(pinotQuery.getGroupByList().get(1).getIdentifier().getName(), "foo"); Assert.assertEquals( @@ -2002,7 +2027,7 @@ public void testOrdinalsQueryRewrite() { pinotQuery.getOrderByList().get(1).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "foo"); query = "select a, b + 2, array_sum(c) as array_sum_c, count(*) from data group by a, 2, array_sum_c"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getGroupByList().get(0).getIdentifier().getName(), "a"); Assert.assertEquals(pinotQuery.getGroupByList().get(1).getFunctionCall().getOperator(), "plus"); Assert.assertEquals( @@ -2014,9 +2039,9 @@ public void testOrdinalsQueryRewrite() { pinotQuery.getGroupByList().get(2).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "c"); Assert.expectThrows(SqlCompilationException.class, - () -> CalciteSqlParser.compileToPinotQuery("SELECT foo, bar, count(*) FROM t GROUP BY 0")); + () -> compileToPinotQuery("SELECT foo, bar, count(*) FROM t GROUP BY 0")); Assert.expectThrows(SqlCompilationException.class, - () -> CalciteSqlParser.compileToPinotQuery("SELECT foo, bar, count(*) FROM t GROUP BY 3")); + () -> compileToPinotQuery("SELECT foo, bar, count(*) FROM t GROUP BY 3")); } @Test @@ -2024,7 +2049,7 @@ public void testOrdinalsQueryRewriteWithDistinctOrderBy() { String query = "SELECT baseballStats.playerName AS playerName FROM baseballStats GROUP BY baseballStats.playerName ORDER BY " + "1 ASC"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(0) .getIdentifier().getName(), "baseballStats.playerName"); @@ -2037,17 +2062,17 @@ public void testOrdinalsQueryRewriteWithDistinctOrderBy() { @Test public void testNoArgFunction() { String query = "SELECT noArgFunc() FROM foo "; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "noargfunc"); query = "SELECT a FROM foo where time_col > noArgFunc()"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Function greaterThan = pinotQuery.getFilterExpression().getFunctionCall(); Function minus = greaterThan.getOperands().get(0).getFunctionCall(); Assert.assertEquals(minus.getOperands().get(1).getFunctionCall().getOperator(), "noargfunc"); query = "SELECT sum(a), noArgFunc() FROM foo group by noArgFunc()"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getGroupByList().get(0).getFunctionCall().getOperator(), "noargfunc"); } @@ -2055,7 +2080,7 @@ public void testNoArgFunction() { public void testCompilationInvokedFunction() { String query = "SELECT now() FROM foo"; long lowerBound = System.currentTimeMillis(); - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); long nowTs = pinotQuery.getSelectList().get(0).getLiteral().getLongValue(); long upperBound = System.currentTimeMillis(); Assert.assertTrue(nowTs >= lowerBound); @@ -2063,7 +2088,7 @@ public void testCompilationInvokedFunction() { query = "SELECT a FROM foo where time_col > now()"; lowerBound = System.currentTimeMillis(); - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Function greaterThan = pinotQuery.getFilterExpression().getFunctionCall(); nowTs = greaterThan.getOperands().get(1).getLiteral().getLongValue(); upperBound = System.currentTimeMillis(); @@ -2071,14 +2096,14 @@ public void testCompilationInvokedFunction() { Assert.assertTrue(nowTs <= upperBound); query = "SELECT a FROM foo where time_col > fromDateTime('2020-01-01 UTC', 'yyyy-MM-dd z')"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); greaterThan = pinotQuery.getFilterExpression().getFunctionCall(); nowTs = greaterThan.getOperands().get(1).getLiteral().getLongValue(); Assert.assertEquals(nowTs, 1577836800000L); query = "SELECT ago('PT1H') FROM foo"; lowerBound = System.currentTimeMillis() - ONE_HOUR_IN_MS; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); nowTs = pinotQuery.getSelectList().get(0).getLiteral().getLongValue(); upperBound = System.currentTimeMillis() - ONE_HOUR_IN_MS; Assert.assertTrue(nowTs >= lowerBound); @@ -2086,7 +2111,7 @@ public void testCompilationInvokedFunction() { query = "SELECT a FROM foo where time_col > ago('PT1H')"; lowerBound = System.currentTimeMillis() - ONE_HOUR_IN_MS; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); greaterThan = pinotQuery.getFilterExpression().getFunctionCall(); nowTs = greaterThan.getOperands().get(1).getLiteral().getLongValue(); upperBound = System.currentTimeMillis() - ONE_HOUR_IN_MS; @@ -2095,7 +2120,7 @@ public void testCompilationInvokedFunction() { query = "select encodeUrl('key1=value 1&key2=value@!$2&key3=value%3'), " + "decodeUrl('key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); String encoded = pinotQuery.getSelectList().get(0).getLiteral().getStringValue(); String decoded = pinotQuery.getSelectList().get(1).getLiteral().getStringValue(); Assert.assertEquals(encoded, "key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253"); @@ -2105,7 +2130,7 @@ public void testCompilationInvokedFunction() { + "encodeUrl('key1=val1 key2=45% key3=#47 key4={''key'':[3,5]} + key5=1;2;3;4 key6=(a|b)&c key7= " + "key8=5*(6/4) key9=https://pinot@pinot.com key10=CFLAGS=\"-O2 -mcpu=pentiumpro\" key12=$JAVA_HOME'),'') " + "from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); encoded = pinotQuery.getSelectList().get(0).getLiteral().getStringValue(); Assert.assertEquals(encoded, "https://www.google.com/search?q=key1%3Dval1+key2%3D45%25+key3%3D%2347+" + "key4%3D%7B%27key%27%3A%5B3%2C5%5D%7D+%2B+key5%3D1%3B2%3B3%3B4+" @@ -2116,7 +2141,7 @@ public void testCompilationInvokedFunction() { + "key4%3D%7B%27key%27%3A%5B3%2C5%5D%7D+%2B+key5%3D1%3B2%3B3%3B4+key6%3D%28a%7Cb%29%26c+" + "key7%3D+key8%3D5*%286%2F4%29+key9%3Dhttps%3A%2F%2Fpinot%40pinot.com+" + "key10%3DCFLAGS%3D%22-O2+-mcpu%3Dpentiumpro%22+key12%3D%24JAVA_HOME') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); decoded = pinotQuery.getSelectList().get(0).getLiteral().getStringValue(); Assert.assertEquals(decoded, "https://www.google.com/search?q=key1=val1 key2=45% key3=#47 " + "key4={'key':[3,5]} + key5=1;2;3;4 key6=(a|b)&c key7= " @@ -2124,7 +2149,7 @@ public void testCompilationInvokedFunction() { query = "select a from mytable where foo=encodeUrl('key1=value 1&key2=value@!$2&key3=value%3') and" + " bar=decodeUrl('key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253')"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Function and = pinotQuery.getFilterExpression().getFunctionCall(); encoded = and.getOperands().get(0).getFunctionCall().getOperands().get(1).getLiteral().getStringValue(); decoded = and.getOperands().get(1).getFunctionCall().getOperands().get(1).getLiteral().getStringValue(); @@ -2132,14 +2157,14 @@ public void testCompilationInvokedFunction() { Assert.assertEquals(decoded, "key1=value 1&key2=value@!$2&key3=value%3"); query = "select toBase64(toUtf8('hello!')), fromUtf8(fromBase64('aGVsbG8h')) from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); String encodedBase64 = pinotQuery.getSelectList().get(0).getLiteral().getStringValue(); String decodedBase64 = pinotQuery.getSelectList().get(1).getLiteral().getStringValue(); Assert.assertEquals(encodedBase64, "aGVsbG8h"); Assert.assertEquals(decodedBase64, "hello!"); query = "select toBase64(fromBase64('aGVsbG8h')), fromUtf8(fromBase64(toBase64(toUtf8('hello!')))) from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); encodedBase64 = pinotQuery.getSelectList().get(0).getLiteral().getStringValue(); decodedBase64 = pinotQuery.getSelectList().get(1).getLiteral().getStringValue(); Assert.assertEquals(encodedBase64, "aGVsbG8h"); @@ -2147,7 +2172,7 @@ public void testCompilationInvokedFunction() { query = "select toBase64(toUtf8(upper('hello!'))), fromUtf8(fromBase64(toBase64(toUtf8(upper('hello!'))))) from " + "mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); encodedBase64 = pinotQuery.getSelectList().get(0).getLiteral().getStringValue(); decodedBase64 = pinotQuery.getSelectList().get(1).getLiteral().getStringValue(); Assert.assertEquals(encodedBase64, "SEVMTE8h"); @@ -2155,7 +2180,7 @@ public void testCompilationInvokedFunction() { query = "select reverse(fromUtf8(fromBase64(toBase64(toUtf8(upper('hello!')))))) from mytable where " + "fromUtf8(fromBase64(toBase64(toUtf8(upper('hello!'))))) = bar"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); String arg1 = pinotQuery.getSelectList().get(0).getLiteral().getStringValue(); String leftOp = pinotQuery.getFilterExpression().getFunctionCall().getOperands().get(1).getLiteral().getStringValue(); @@ -2163,7 +2188,7 @@ public void testCompilationInvokedFunction() { Assert.assertEquals(leftOp, "HELLO!"); query = "select a from mytable where foo = toBase64(toUtf8('hello!')) and bar = fromUtf8(fromBase64('aGVsbG8h'))"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); and = pinotQuery.getFilterExpression().getFunctionCall(); encoded = and.getOperands().get(0).getFunctionCall().getOperands().get(1).getLiteral().getStringValue(); decoded = and.getOperands().get(1).getFunctionCall().getOperands().get(1).getLiteral().getStringValue(); @@ -2173,7 +2198,7 @@ public void testCompilationInvokedFunction() { query = "select fromBase64('hello') from mytable"; Exception expectedError = null; try { - CalciteSqlParser.compileToPinotQuery(query); + compileToPinotQuery(query); } catch (Exception e) { expectedError = e; } @@ -2183,7 +2208,7 @@ public void testCompilationInvokedFunction() { query = "select toBase64('hello!') from mytable"; expectedError = null; try { - CalciteSqlParser.compileToPinotQuery(query); + compileToPinotQuery(query); } catch (Exception e) { expectedError = e; } @@ -2191,108 +2216,108 @@ public void testCompilationInvokedFunction() { Assert.assertTrue(expectedError instanceof SqlCompilationException); query = "select isSubnetOf('192.168.0.1/24', '192.168.0.225') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); boolean result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('192.168.0.1/24', '192.168.0.1') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('130.191.23.32/27', '130.191.23.40') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('130.191.23.32/26', '130.192.23.33') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertFalse(result); query = "select isSubnetOf('153.87.199.160/28', '153.87.199.166') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('2001:4800:7825:103::/64', '2001:4800:7825:103::2050') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('130.191.23.32/26', '130.191.23.33') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('2001:4801:7825:103:be76:4efe::/96', '2001:4801:7825:103:be76:4efe::e15') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('122.152.15.0/26', '122.152.15.28') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('96.141.228.254/26', '96.141.228.254') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('3.175.47.128/26', '3.175.48.178') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertFalse(result); query = "select isSubnetOf('192.168.0.1/24', '192.168.0.0') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('10.3.128.1/22', '10.3.128.123') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('10.3.128.1/22', '10.3.131.255') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('10.3.128.1/22', '1.2.3.1') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertFalse(result); query = "select isSubnetOf('1.2.3.128/1', '127.255.255.255') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('1.2.3.128/0', '192.168.5.1') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('2001:db8:85a3::8a2e:370:7334/62', '2001:0db8:85a3:0003:ffff:ffff:ffff:ffff') from " + "mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('123:db8:85a3::8a2e:370:7334/72', '124:db8:85a3::8a2e:370:7334') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertFalse(result); query = "select isSubnetOf('7890:db8:113::8a2e:370:7334/127', '7890:db8:113::8a2e:370:7336') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertFalse(result); query = "select isSubnetOf('7890:db8:113::8a2e:370:7334/127', '7890:db8:113::8a2e:370:7335') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); } @@ -2300,7 +2325,7 @@ public void testCompilationInvokedFunction() { @Test public void testCompilationInvokedNestedFunctions() { String query = "SELECT a FROM foo where time_col > toDateTime(now(), 'yyyy-MM-dd z')"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Function greaterThan = pinotQuery.getFilterExpression().getFunctionCall(); String today = greaterThan.getOperands().get(1).getLiteral().getStringValue(); String expectedTodayStr = @@ -2312,7 +2337,7 @@ public void testCompilationInvokedNestedFunctions() { public void testCompileTimeExpression() { final CompileTimeFunctionsInvoker compileTimeFunctionsInvoker = new CompileTimeFunctionsInvoker(); long lowerBound = System.currentTimeMillis(); - Expression expression = CalciteSqlParser.compileToExpression("now()"); + Expression expression = compileToExpression("now()"); Assert.assertNotNull(expression.getFunctionCall()); PinotQuery pinotQuery = new PinotQuery(); pinotQuery.setFilterExpression(expression); @@ -2324,7 +2349,7 @@ public void testCompileTimeExpression() { Assert.assertTrue(result >= lowerBound && result <= upperBound); lowerBound = TimeUnit.MILLISECONDS.toHours(System.currentTimeMillis()) + 1; - expression = CalciteSqlParser.compileToExpression("to_epoch_hours(now() + 3600000)"); + expression = compileToExpression("to_epoch_hours(now() + 3600000)"); Assert.assertNotNull(expression.getFunctionCall()); pinotQuery.setFilterExpression(expression); pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery); @@ -2335,7 +2360,7 @@ public void testCompileTimeExpression() { Assert.assertTrue(result >= lowerBound && result <= upperBound); lowerBound = System.currentTimeMillis() - ONE_HOUR_IN_MS; - expression = CalciteSqlParser.compileToExpression("ago('PT1H')"); + expression = compileToExpression("ago('PT1H')"); Assert.assertNotNull(expression.getFunctionCall()); pinotQuery.setFilterExpression(expression); pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery); @@ -2346,7 +2371,7 @@ public void testCompileTimeExpression() { Assert.assertTrue(result >= lowerBound && result <= upperBound); lowerBound = System.currentTimeMillis() + ONE_HOUR_IN_MS; - expression = CalciteSqlParser.compileToExpression("ago('PT-1H')"); + expression = compileToExpression("ago('PT-1H')"); Assert.assertNotNull(expression.getFunctionCall()); pinotQuery.setFilterExpression(expression); pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery); @@ -2356,7 +2381,7 @@ public void testCompileTimeExpression() { result = expression.getLiteral().getLongValue(); Assert.assertTrue(result >= lowerBound && result <= upperBound); - expression = CalciteSqlParser.compileToExpression("toDateTime(millisSinceEpoch)"); + expression = compileToExpression("toDateTime(millisSinceEpoch)"); Assert.assertNotNull(expression.getFunctionCall()); pinotQuery.setFilterExpression(expression); pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery); @@ -2366,7 +2391,7 @@ public void testCompileTimeExpression() { Assert.assertEquals(expression.getFunctionCall().getOperands().get(0).getIdentifier().getName(), "millisSinceEpoch"); - expression = CalciteSqlParser.compileToExpression("encodeUrl('key1=value 1&key2=value@!$2&key3=value%3')"); + expression = compileToExpression("encodeUrl('key1=value 1&key2=value@!$2&key3=value%3')"); Assert.assertNotNull(expression.getFunctionCall()); pinotQuery.setFilterExpression(expression); pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery); @@ -2375,8 +2400,7 @@ public void testCompileTimeExpression() { Assert.assertEquals(expression.getLiteral().getFieldValue(), "key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253"); - expression = - CalciteSqlParser.compileToExpression("decodeUrl('key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253')"); + expression = compileToExpression("decodeUrl('key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253')"); Assert.assertNotNull(expression.getFunctionCall()); pinotQuery.setFilterExpression(expression); pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery); @@ -2384,7 +2408,7 @@ public void testCompileTimeExpression() { Assert.assertNotNull(expression.getLiteral()); Assert.assertEquals(expression.getLiteral().getFieldValue(), "key1=value 1&key2=value@!$2&key3=value%3"); - expression = CalciteSqlParser.compileToExpression("reverse(playerName)"); + expression = compileToExpression("reverse(playerName)"); Assert.assertNotNull(expression.getFunctionCall()); pinotQuery.setFilterExpression(expression); pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery); @@ -2393,7 +2417,7 @@ public void testCompileTimeExpression() { Assert.assertEquals(expression.getFunctionCall().getOperator(), "reverse"); Assert.assertEquals(expression.getFunctionCall().getOperands().get(0).getIdentifier().getName(), "playerName"); - expression = CalciteSqlParser.compileToExpression("reverse('playerName')"); + expression = compileToExpression("reverse('playerName')"); Assert.assertNotNull(expression.getFunctionCall()); pinotQuery.setFilterExpression(expression); pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery); @@ -2401,7 +2425,7 @@ public void testCompileTimeExpression() { Assert.assertNotNull(expression.getLiteral()); Assert.assertEquals(expression.getLiteral().getFieldValue(), "emaNreyalp"); - expression = CalciteSqlParser.compileToExpression("reverse(123)"); + expression = compileToExpression("reverse(123)"); Assert.assertNotNull(expression.getFunctionCall()); pinotQuery.setFilterExpression(expression); pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery); @@ -2409,7 +2433,7 @@ public void testCompileTimeExpression() { Assert.assertNotNull(expression.getLiteral()); Assert.assertEquals(expression.getLiteral().getFieldValue(), "321"); - expression = CalciteSqlParser.compileToExpression("count(*)"); + expression = compileToExpression("count(*)"); Assert.assertNotNull(expression.getFunctionCall()); pinotQuery.setFilterExpression(expression); pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery); @@ -2418,7 +2442,7 @@ public void testCompileTimeExpression() { Assert.assertEquals(expression.getFunctionCall().getOperator(), "count"); Assert.assertEquals(expression.getFunctionCall().getOperands().get(0).getIdentifier().getName(), "*"); - expression = CalciteSqlParser.compileToExpression("toBase64(toUtf8('hello!'))"); + expression = compileToExpression("toBase64(toUtf8('hello!'))"); Assert.assertNotNull(expression.getFunctionCall()); pinotQuery.setFilterExpression(expression); pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery); @@ -2426,7 +2450,7 @@ public void testCompileTimeExpression() { Assert.assertNotNull(expression.getLiteral()); Assert.assertEquals(expression.getLiteral().getFieldValue(), "aGVsbG8h"); - expression = CalciteSqlParser.compileToExpression("fromUtf8(fromBase64('aGVsbG8h'))"); + expression = compileToExpression("fromUtf8(fromBase64('aGVsbG8h'))"); Assert.assertNotNull(expression.getFunctionCall()); pinotQuery.setFilterExpression(expression); pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery); @@ -2434,7 +2458,7 @@ public void testCompileTimeExpression() { Assert.assertNotNull(expression.getLiteral()); Assert.assertEquals(expression.getLiteral().getFieldValue(), "hello!"); - expression = CalciteSqlParser.compileToExpression("fromBase64(foo)"); + expression = compileToExpression("fromBase64(foo)"); Assert.assertNotNull(expression.getFunctionCall()); pinotQuery.setFilterExpression(expression); pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery); @@ -2443,7 +2467,7 @@ public void testCompileTimeExpression() { Assert.assertEquals(expression.getFunctionCall().getOperator(), "frombase64"); Assert.assertEquals(expression.getFunctionCall().getOperands().get(0).getIdentifier().getName(), "foo"); - expression = CalciteSqlParser.compileToExpression("toBase64(foo)"); + expression = compileToExpression("toBase64(foo)"); Assert.assertNotNull(expression.getFunctionCall()); pinotQuery.setFilterExpression(expression); pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery); @@ -2455,56 +2479,54 @@ public void testCompileTimeExpression() { @Test public void testLiteralExpressionCheck() { - Assert.assertTrue(CalciteSqlParser.isLiteralOnlyExpression(CalciteSqlParser.compileToExpression("1123"))); - Assert.assertTrue(CalciteSqlParser.isLiteralOnlyExpression(CalciteSqlParser.compileToExpression("'ab'"))); - Assert.assertTrue( - CalciteSqlParser.isLiteralOnlyExpression(CalciteSqlParser.compileToExpression("AS('ab', randomStr)"))); - Assert.assertTrue( - CalciteSqlParser.isLiteralOnlyExpression(CalciteSqlParser.compileToExpression("AS(123, randomTime)"))); - Assert.assertFalse(CalciteSqlParser.isLiteralOnlyExpression(CalciteSqlParser.compileToExpression("sum(abc)"))); - Assert.assertFalse(CalciteSqlParser.isLiteralOnlyExpression(CalciteSqlParser.compileToExpression("count(*)"))); - Assert.assertFalse(CalciteSqlParser.isLiteralOnlyExpression(CalciteSqlParser.compileToExpression("a+B"))); - Assert.assertFalse(CalciteSqlParser.isLiteralOnlyExpression(CalciteSqlParser.compileToExpression("c+1"))); + Assert.assertTrue(CalciteSqlParser.isLiteralOnlyExpression(compileToExpression("1123"))); + Assert.assertTrue(CalciteSqlParser.isLiteralOnlyExpression(compileToExpression("'ab'"))); + Assert.assertTrue(CalciteSqlParser.isLiteralOnlyExpression(compileToExpression("AS('ab', randomStr)"))); + Assert.assertTrue(CalciteSqlParser.isLiteralOnlyExpression(compileToExpression("AS(123, randomTime)"))); + Assert.assertFalse(CalciteSqlParser.isLiteralOnlyExpression(compileToExpression("sum(abc)"))); + Assert.assertFalse(CalciteSqlParser.isLiteralOnlyExpression(compileToExpression("count(*)"))); + Assert.assertFalse(CalciteSqlParser.isLiteralOnlyExpression(compileToExpression("a+B"))); + Assert.assertFalse(CalciteSqlParser.isLiteralOnlyExpression(compileToExpression("c+1"))); } @Test public void testCaseInsensitiveFilter() { String query = "SELECT count(*) FROM foo where text_match(col, 'expr')"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getFilterExpression().getFunctionCall().getOperator(), "TEXT_MATCH"); query = "SELECT count(*) FROM foo where TEXT_MATCH(col, 'expr')"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getFilterExpression().getFunctionCall().getOperator(), "TEXT_MATCH"); query = "SELECT count(*) FROM foo where regexp_like(col, 'expr')"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getFilterExpression().getFunctionCall().getOperator(), "REGEXP_LIKE"); query = "SELECT count(*) FROM foo where REGEXP_LIKE(col, 'expr')"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getFilterExpression().getFunctionCall().getOperator(), "REGEXP_LIKE"); query = "SELECT count(*) FROM foo where col is not null"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getFilterExpression().getFunctionCall().getOperator(), "IS_NOT_NULL"); Assert.assertEquals( pinotQuery.getFilterExpression().getFunctionCall().getOperands().get(0).getIdentifier().getName(), "col"); query = "SELECT count(*) FROM foo where col IS NOT NULL"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getFilterExpression().getFunctionCall().getOperator(), "IS_NOT_NULL"); Assert.assertEquals( pinotQuery.getFilterExpression().getFunctionCall().getOperands().get(0).getIdentifier().getName(), "col"); query = "SELECT count(*) FROM foo where col is null"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getFilterExpression().getFunctionCall().getOperator(), "IS_NULL"); Assert.assertEquals( pinotQuery.getFilterExpression().getFunctionCall().getOperands().get(0).getIdentifier().getName(), "col"); query = "SELECT count(*) FROM foo where col IS NULL"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getFilterExpression().getFunctionCall().getOperator(), "IS_NULL"); Assert.assertEquals( pinotQuery.getFilterExpression().getFunctionCall().getOperands().get(0).getIdentifier().getName(), "col"); @@ -2513,14 +2535,14 @@ public void testCaseInsensitiveFilter() { @Test public void testNonAggregationGroupByQuery() { String query = "SELECT col1 FROM foo GROUP BY col1"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator().toUpperCase(), "DISTINCT"); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "col1"); query = "SELECT col1, col2 FROM foo GROUP BY col1, col2"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator().toUpperCase(), "DISTINCT"); Assert.assertEquals( @@ -2529,7 +2551,7 @@ public void testNonAggregationGroupByQuery() { pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(1).getIdentifier().getName(), "col2"); query = "SELECT col1+col2*5 FROM foo GROUP BY col1+col2*5"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator().toUpperCase(), "DISTINCT"); Assert.assertEquals( @@ -2549,7 +2571,7 @@ public void testNonAggregationGroupByQuery() { .getFunctionCall().getOperands().get(1).getLiteral().getLongValue(), 5L); query = "SELECT col1+col2*5 AS col3 FROM foo GROUP BY col3"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator().toUpperCase(), "DISTINCT"); Assert.assertEquals( @@ -2579,20 +2601,20 @@ public void testNonAggregationGroupByQuery() { @Test public void testInvalidNonAggregationGroupBy() { Assert.assertThrows(SqlCompilationException.class, - () -> CalciteSqlParser.compileToPinotQuery("SELECT col1 FROM foo GROUP BY col1, col2")); + () -> compileToPinotQuery("SELECT col1 FROM foo GROUP BY col1, col2")); Assert.assertThrows(SqlCompilationException.class, - () -> CalciteSqlParser.compileToPinotQuery("SELECT col1, col2 FROM foo GROUP BY col1")); + () -> compileToPinotQuery("SELECT col1, col2 FROM foo GROUP BY col1")); Assert.assertThrows(SqlCompilationException.class, - () -> CalciteSqlParser.compileToPinotQuery("SELECT col1 + col2 FROM foo GROUP BY col1")); + () -> compileToPinotQuery("SELECT col1 + col2 FROM foo GROUP BY col1")); Assert.assertThrows(SqlCompilationException.class, - () -> CalciteSqlParser.compileToPinotQuery("SELECT col1+col2 FROM foo GROUP BY col1,col2")); + () -> compileToPinotQuery("SELECT col1+col2 FROM foo GROUP BY col1,col2")); } @Test public void testFlattenAndOr() { { String query = "SELECT * FROM foo WHERE col1 > 0 AND (col2 > 0 AND col3 > 0) AND col4 > 0"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Function functionCall = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(functionCall.getOperator(), FilterKind.AND.name()); List operands = functionCall.getOperands(); @@ -2603,7 +2625,7 @@ public void testFlattenAndOr() { } { String query = "SELECT * FROM foo WHERE col1 > 0 AND (col2 AND col3 > 0) AND startsWith(col4, 'myStr')"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Function functionCall = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(functionCall.getOperator(), FilterKind.AND.name()); List operands = functionCall.getOperands(); @@ -2621,7 +2643,7 @@ public void testFlattenAndOr() { } { String query = "SELECT * FROM foo WHERE col1 > 0 AND (col2 AND col3 > 0) AND col4 = true"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Function functionCall = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(functionCall.getOperator(), FilterKind.AND.name()); List operands = functionCall.getOperands(); @@ -2639,7 +2661,7 @@ public void testFlattenAndOr() { } { String query = "SELECT * FROM foo WHERE col1 <= 0 OR col2 <= 0 OR (col3 <= 0 OR col4 <= 0)"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Function functionCall = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(functionCall.getOperator(), FilterKind.OR.name()); List operands = functionCall.getOperands(); @@ -2650,7 +2672,7 @@ public void testFlattenAndOr() { } { String query = "SELECT * FROM foo WHERE col1 <= 0 OR col2 OR (col3 <= 0 OR col4)"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Function functionCall = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(functionCall.getOperator(), FilterKind.OR.name()); List operands = functionCall.getOperands(); @@ -2666,7 +2688,7 @@ public void testFlattenAndOr() { { String query = "SELECT * FROM foo WHERE col1 > 0 AND ((col2 > 0 AND col3 > 0) AND (col1 <= 0 OR (col2 <= 0 OR " + "(col3 <= 0 OR col4 <= 0) OR (col3 > 0 AND col4 > 0))))"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Function functionCall = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(functionCall.getOperator(), FilterKind.AND.name()); List operands = functionCall.getOperands(); @@ -2695,7 +2717,7 @@ public void testFlattenAndOr() { public void testHavingClause() { { String query = "SELECT SUM(col1), col2 FROM foo WHERE true GROUP BY col2 HAVING SUM(col1) > 10"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Function functionCall = pinotQuery.getHavingExpression().getFunctionCall(); Assert.assertEquals(functionCall.getOperator(), FilterKind.GREATER_THAN.name()); List operands = functionCall.getOperands(); @@ -2706,7 +2728,7 @@ public void testHavingClause() { { String query = "SELECT SUM(col1), col2 FROM foo WHERE true GROUP BY col2 " + "HAVING SUM(col1) > 10 AND SUM(col3) > 5 AND SUM(col4) > 15"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Function functionCall = pinotQuery.getHavingExpression().getFunctionCall(); Assert.assertEquals(functionCall.getOperator(), FilterKind.AND.name()); List operands = functionCall.getOperands(); @@ -2721,7 +2743,7 @@ public void testHavingClause() { public void testPostAggregation() { { String query = "SELECT SUM(col1) * SUM(col2) FROM foo"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); List selectList = pinotQuery.getSelectList(); Assert.assertEquals(selectList.size(), 1); Function functionCall = selectList.get(0).getFunctionCall(); @@ -2734,7 +2756,7 @@ public void testPostAggregation() { } { String query = "SELECT SUM(col1), col2 FROM foo GROUP BY col2 ORDER BY MAX(col1) - MAX(col3)"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); List orderByList = pinotQuery.getOrderByList(); Assert.assertEquals(orderByList.size(), 1); Function functionCall = orderByList.get(0).getFunctionCall(); @@ -2752,7 +2774,7 @@ public void testPostAggregation() { { // Having will be rewritten to (SUM(col1) + SUM(col3)) - MAX(col4) > 0 String query = "SELECT SUM(col1), col2 FROM foo GROUP BY col2 HAVING SUM(col1) + SUM(col3) > MAX(col4)"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Function functionCall = pinotQuery.getHavingExpression().getFunctionCall(); Assert.assertEquals(functionCall.getOperator(), FilterKind.GREATER_THAN.name()); List operands = functionCall.getOperands(); @@ -2777,7 +2799,7 @@ public void testArrayAggregationRewrite() { String sql; PinotQuery pinotQuery; sql = "SELECT sum(array_sum(a)) FROM Foo"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "summv"); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().size(), 1); @@ -2785,7 +2807,7 @@ public void testArrayAggregationRewrite() { pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "a"); sql = "SELECT MIN(ARRAYMIN(a)) FROM Foo"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "minmv"); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().size(), 1); @@ -2793,7 +2815,7 @@ public void testArrayAggregationRewrite() { pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "a"); sql = "SELECT Max(ArrayMax(a)) FROM Foo"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "maxmv"); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().size(), 1); @@ -2801,7 +2823,7 @@ public void testArrayAggregationRewrite() { pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "a"); sql = "SELECT Max(ArrayMax(a)) + 1 FROM Foo"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "plus"); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().size(), 2); @@ -2880,7 +2902,7 @@ public void testSupportedDistinctQueries() { private void testUnsupportedDistinctQuery(String query, String errorMessage) { try { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Assert.fail("Query should have failed"); } catch (Exception e) { Assert.assertEquals(errorMessage, e.getMessage()); @@ -2888,7 +2910,7 @@ private void testUnsupportedDistinctQuery(String query, String errorMessage) { } private void testSupportedDistinctQuery(String query) { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Assert.assertNotNull(pinotQuery); } @@ -2897,27 +2919,27 @@ public void testQueryWithSemicolon() { String sql; PinotQuery pinotQuery; sql = "SELECT col1, col2 FROM foo;"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getSelectListSize(), 2); Assert.assertEquals(pinotQuery.getSelectList().get(0).getIdentifier().getName(), "col1"); Assert.assertEquals(pinotQuery.getSelectList().get(1).getIdentifier().getName(), "col2"); // Query having extra white spaces before the semicolon sql = "SELECT col1, col2 FROM foo ;"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getSelectListSize(), 2); Assert.assertEquals(pinotQuery.getSelectList().get(0).getIdentifier().getName(), "col1"); Assert.assertEquals(pinotQuery.getSelectList().get(1).getIdentifier().getName(), "col2"); // Query having leading and trailing whitespaces sql = " SELECT col1, col2 FROM foo; "; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getSelectListSize(), 2); Assert.assertEquals(pinotQuery.getSelectList().get(0).getIdentifier().getName(), "col1"); Assert.assertEquals(pinotQuery.getSelectList().get(1).getIdentifier().getName(), "col2"); sql = "SELECT col1, count(*) FROM foo group by col1;"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getSelectListSize(), 2); Assert.assertEquals(pinotQuery.getSelectList().get(0).getIdentifier().getName(), "col1"); Assert.assertEquals(pinotQuery.getGroupByListSize(), 1); @@ -2927,14 +2949,14 @@ public void testQueryWithSemicolon() { // Check for Option SQL Query // TODO: change to SET syntax sql = "SELECT col1, count(*) FROM foo group by col1 option(skipUpsert=true);"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getQueryOptionsSize(), 1); Assert.assertTrue(pinotQuery.getQueryOptions().containsKey("skipUpsert")); // Check for the query where the literal has semicolon // TODO: change to SET syntax sql = "select col1, count(*) from foo where col1 = 'x;y' GROUP BY col1 option(skipUpsert=true);"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getQueryOptionsSize(), 1); Assert.assertTrue(pinotQuery.getQueryOptions().containsKey("skipUpsert")); } @@ -2943,41 +2965,40 @@ public void testQueryWithSemicolon() { public void testCatalogNameResolvedToDefault() { // Pinot doesn't support catalog. However, for backward compatibility, if a catalog is provided, we will resolve // the table from our default catalog. this means `a.foo` will be equivalent to `foo`. - PinotQuery randomCatalogQuery = CalciteSqlParser.compileToPinotQuery("SELECT count(*) FROM rand_catalog.foo"); - PinotQuery defaultCatalogQuery = CalciteSqlParser.compileToPinotQuery("SELECT count(*) FROM default.foo"); + PinotQuery randomCatalogQuery = compileToPinotQuery("SELECT count(*) FROM rand_catalog.foo"); + PinotQuery defaultCatalogQuery = compileToPinotQuery("SELECT count(*) FROM default.foo"); Assert.assertEquals(randomCatalogQuery.getDataSource().getTableName(), "rand_catalog.foo"); Assert.assertEquals(defaultCatalogQuery.getDataSource().getTableName(), "default.foo"); } @Test public void testInvalidQueryWithSemicolon() { - Assert.expectThrows(SqlCompilationException.class, () -> CalciteSqlParser.compileToPinotQuery(";")); + Assert.expectThrows(SqlCompilationException.class, () -> compileToPinotQuery(";")); - Assert.expectThrows(SqlCompilationException.class, () -> CalciteSqlParser.compileToPinotQuery(";;;;")); + Assert.expectThrows(SqlCompilationException.class, () -> compileToPinotQuery(";;;;")); Assert.expectThrows(SqlCompilationException.class, - () -> CalciteSqlParser.compileToPinotQuery("SELECT col1, count(*) FROM foo GROUP BY ; col1")); + () -> compileToPinotQuery("SELECT col1, count(*) FROM foo GROUP BY ; col1")); // Query having multiple SQL statements - Assert.expectThrows(SqlCompilationException.class, () -> CalciteSqlParser.compileToPinotQuery( + Assert.expectThrows(SqlCompilationException.class, () -> compileToPinotQuery( "SELECT col1, count(*) FROM foo GROUP BY col1; SELECT col2, count(*) FROM foo GROUP BY col2")); // Query having multiple SQL statements with trailing and leading whitespaces - Assert.expectThrows(SqlCompilationException.class, () -> CalciteSqlParser.compileToPinotQuery( + Assert.expectThrows(SqlCompilationException.class, () -> compileToPinotQuery( " SELECT col1, count(*) FROM foo GROUP BY col1; " + "SELECT col2, count(*) FROM foo GROUP BY col2 ")); } @Test public void testInvalidQueryWithAggregateFunction() { - Assert.expectThrows(SqlCompilationException.class, - () -> CalciteSqlParser.compileToPinotQuery("SELECT col1, count(*) from foo")); + Assert.expectThrows(SqlCompilationException.class, () -> compileToPinotQuery("SELECT col1, count(*) from foo")); Assert.expectThrows(SqlCompilationException.class, - () -> CalciteSqlParser.compileToPinotQuery("SELECT UPPER(col1), count(*) from foo")); + () -> compileToPinotQuery("SELECT UPPER(col1), count(*) from foo")); Assert.expectThrows(SqlCompilationException.class, - () -> CalciteSqlParser.compileToPinotQuery("SELECT UPPER(col1), avg(col2) from foo")); + () -> compileToPinotQuery("SELECT UPPER(col1), avg(col2) from foo")); } /** @@ -2986,7 +3007,8 @@ public void testInvalidQueryWithAggregateFunction() { @Test public void testParserExtensionImpl() { String customSql = "INSERT INTO db.tbl FROM FILE 'file:///tmp/file1', FILE 'file:///tmp/file2'"; - SqlNodeAndOptions sqlNodeAndOptions = CalciteSqlParser.compileToSqlNodeAndOptions(customSql);; + SqlNodeAndOptions sqlNodeAndOptions = CalciteSqlParser.compileToSqlNodeAndOptions(customSql); + ; Assert.assertTrue(sqlNodeAndOptions.getSqlNode() instanceof SqlInsertFromFile); Assert.assertEquals(sqlNodeAndOptions.getSqlType(), PinotSqlType.DML); } @@ -2997,7 +3019,7 @@ public void shouldParseBasicAtTimeZoneExtension() { String sql = "SELECT ts AT TIME ZONE 'pst' FROM myTable;"; // When: - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + PinotQuery pinotQuery = compileToPinotQuery(sql); // Then: Assert.assertEquals(pinotQuery.getSelectListSize(), 1); @@ -3014,7 +3036,7 @@ public void shouldParseNestedTimeExprAtTimeZoneExtension() { String sql = "SELECT ts + 123 AT TIME ZONE 'pst' FROM myTable;"; // When: - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + PinotQuery pinotQuery = compileToPinotQuery(sql); // Then: Assert.assertEquals(pinotQuery.getSelectListSize(), 1); @@ -3036,7 +3058,7 @@ public void shouldParseOutsideExprAtTimeZoneExtension() { String sql = "SELECT ts AT TIME ZONE 'pst' > 123 FROM myTable;"; // When: - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + PinotQuery pinotQuery = compileToPinotQuery(sql); // Then: Assert.assertEquals(pinotQuery.getSelectListSize(), 1); @@ -3054,7 +3076,7 @@ public void shouldParseOutsideExprAtTimeZoneExtension() { @Test public void testJoin() { String query = "SELECT T1.a, T2.b FROM T1 JOIN T2"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); DataSource dataSource = pinotQuery.getDataSource(); Assert.assertNull(dataSource.getTableName()); Assert.assertNull(dataSource.getSubquery()); @@ -3066,7 +3088,7 @@ public void testJoin() { Assert.assertNull(join.getCondition()); query = "SELECT T1.a, T2.b FROM T1 INNER JOIN T2 ON T1.key = T2.key"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); dataSource = pinotQuery.getDataSource(); Assert.assertNull(dataSource.getTableName()); Assert.assertNull(dataSource.getSubquery()); @@ -3075,10 +3097,10 @@ public void testJoin() { Assert.assertEquals(join.getType(), JoinType.INNER); Assert.assertEquals(join.getLeft().getTableName(), "T1"); Assert.assertEquals(join.getRight().getTableName(), "T2"); - Assert.assertEquals(join.getCondition(), CalciteSqlParser.compileToExpression("T1.key = T2.key")); + Assert.assertEquals(join.getCondition(), compileToExpression("T1.key = T2.key")); query = "SELECT T1.a, T2.b FROM T1 FULL JOIN T2 ON T1.key = T2.key"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); dataSource = pinotQuery.getDataSource(); Assert.assertNull(dataSource.getTableName()); Assert.assertNull(dataSource.getSubquery()); @@ -3087,10 +3109,10 @@ public void testJoin() { Assert.assertEquals(join.getType(), JoinType.FULL); Assert.assertEquals(join.getLeft().getTableName(), "T1"); Assert.assertEquals(join.getRight().getTableName(), "T2"); - Assert.assertEquals(join.getCondition(), CalciteSqlParser.compileToExpression("T1.key = T2.key")); + Assert.assertEquals(join.getCondition(), compileToExpression("T1.key = T2.key")); query = "SELECT T1.a, T2.b FROM T1 LEFT JOIN T2 ON T1.a > T2.b"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); dataSource = pinotQuery.getDataSource(); Assert.assertNull(dataSource.getTableName()); Assert.assertNull(dataSource.getSubquery()); @@ -3099,11 +3121,11 @@ public void testJoin() { Assert.assertEquals(join.getType(), JoinType.LEFT); Assert.assertEquals(join.getLeft().getTableName(), "T1"); Assert.assertEquals(join.getRight().getTableName(), "T2"); - Assert.assertEquals(join.getCondition(), CalciteSqlParser.compileToExpression("T1.a > T2.b")); + Assert.assertEquals(join.getCondition(), compileToExpression("T1.a > T2.b")); query = "SELECT T1.a, T2.b FROM T1 RIGHT JOIN (SELECT a, COUNT(*) AS b FROM T3 GROUP BY a) AS T2 ON T1.key = T2.key"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); dataSource = pinotQuery.getDataSource(); Assert.assertNull(dataSource.getTableName()); Assert.assertNull(dataSource.getSubquery()); @@ -3114,13 +3136,12 @@ public void testJoin() { DataSource right = join.getRight(); Assert.assertEquals(right.getTableName(), "T2"); PinotQuery rightSubquery = right.getSubquery(); - Assert.assertEquals(rightSubquery, - CalciteSqlParser.compileToPinotQuery("SELECT a, COUNT(*) AS b FROM T3 GROUP BY a")); - Assert.assertEquals(join.getCondition(), CalciteSqlParser.compileToExpression("T1.key = T2.key")); + Assert.assertEquals(rightSubquery, compileToPinotQuery("SELECT a, COUNT(*) AS b FROM T3 GROUP BY a")); + Assert.assertEquals(join.getCondition(), compileToExpression("T1.key = T2.key")); query = "SELECT T1.a, T2.b FROM T1 JOIN (SELECT key, COUNT(*) AS b FROM T3 JOIN T4 GROUP BY key) AS T2 " + "ON T1.key = T2.key"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); dataSource = pinotQuery.getDataSource(); Assert.assertNull(dataSource.getTableName()); Assert.assertNull(dataSource.getSubquery()); @@ -3131,13 +3152,12 @@ public void testJoin() { right = join.getRight(); Assert.assertEquals(right.getTableName(), "T2"); rightSubquery = right.getSubquery(); - Assert.assertEquals(rightSubquery, - CalciteSqlParser.compileToPinotQuery("SELECT key, COUNT(*) AS b FROM T3 JOIN T4 GROUP BY key")); - Assert.assertEquals(join.getCondition(), CalciteSqlParser.compileToExpression("T1.key = T2.key")); + Assert.assertEquals(rightSubquery, compileToPinotQuery("SELECT key, COUNT(*) AS b FROM T3 JOIN T4 GROUP BY key")); + Assert.assertEquals(join.getCondition(), compileToExpression("T1.key = T2.key")); // test for self join queries. query = "SELECT T1.a FROM T1 JOIN(SELECT key FROM T1) as self ON T1.key=self.key"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); dataSource = pinotQuery.getDataSource(); Assert.assertNull(dataSource.getTableName()); Assert.assertNull(dataSource.getSubquery()); @@ -3148,72 +3168,72 @@ public void testJoin() { right = join.getRight(); Assert.assertEquals(right.getTableName(), "self"); rightSubquery = right.getSubquery(); - Assert.assertEquals(rightSubquery, CalciteSqlParser.compileToPinotQuery("SELECT key FROM T1")); - Assert.assertEquals(join.getCondition(), CalciteSqlParser.compileToExpression("T1.key = self.key")); + Assert.assertEquals(rightSubquery, compileToPinotQuery("SELECT key FROM T1")); + Assert.assertEquals(join.getCondition(), compileToExpression("T1.key = self.key")); } @Test public void testInPredicateWithOutNullPasses() { - CalciteSqlParser.compileToPinotQuery("SELECT * FROM testTable WHERE column1 IN (1, 2) AND column2 = 1"); + compileToPinotQuery("SELECT * FROM testTable WHERE column1 IN (1, 2) AND column2 = 1"); } @Test(expectedExceptions = {IllegalStateException.class}, expectedExceptionsMessageRegExp = "Using NULL in IN " + "filter is not supported") public void testSingleInPredicateWithNullFails() { - CalciteSqlParser.compileToPinotQuery("SELECT * FROM testTable WHERE column1 IN (1, 2, NULL)"); + compileToPinotQuery("SELECT * FROM testTable WHERE column1 IN (1, 2, NULL)"); } @Test(expectedExceptions = {IllegalStateException.class}, expectedExceptionsMessageRegExp = "Using NULL in NOT_IN " + "filter is not supported") public void testSingleNotInPredicateWithNullFails() { - CalciteSqlParser.compileToPinotQuery("SELECT * FROM testTable WHERE column1 NOT IN (1, 2, NULL)"); + compileToPinotQuery("SELECT * FROM testTable WHERE column1 NOT IN (1, 2, NULL)"); } @Test(expectedExceptions = {IllegalStateException.class}, expectedExceptionsMessageRegExp = "Using NULL in IN " + "filter is not supported") public void testAndFilterWithNullFails() { - CalciteSqlParser.compileToPinotQuery("SELECT * FROM testTable WHERE column1 IN (1, 2, NULL) AND column2 = 1"); + compileToPinotQuery("SELECT * FROM testTable WHERE column1 IN (1, 2, NULL) AND column2 = 1"); } @Test(expectedExceptions = {IllegalStateException.class}, expectedExceptionsMessageRegExp = "Using NULL in NOT_IN " + "filter is not supported") public void testOrFilterWithNullFails() { - CalciteSqlParser.compileToPinotQuery("SELECT * FROM testTable WHERE column1 NOT IN (1, 2, NULL) OR column2 = 1"); + compileToPinotQuery("SELECT * FROM testTable WHERE column1 NOT IN (1, 2, NULL) OR column2 = 1"); } @Test(expectedExceptions = {IllegalStateException.class}, expectedExceptionsMessageRegExp = "Using NULL in IN " + "filter is not supported") public void testNotFilterWithNullFails() { - CalciteSqlParser.compileToPinotQuery("SELECT * FROM testTable WHERE NOT(column1 IN (NULL, 1, 2))"); + compileToPinotQuery("SELECT * FROM testTable WHERE NOT(column1 IN (NULL, 1, 2))"); } @Test(expectedExceptions = {IllegalStateException.class}, expectedExceptionsMessageRegExp = "Using NULL in " + "GREATER_THAN filter is not supported") public void testGreaterThanNullFilterFails() { - CalciteSqlParser.compileToPinotQuery("SELECT * FROM testTable WHERE column1 > null"); + compileToPinotQuery("SELECT * FROM testTable WHERE column1 > null"); } @Test(expectedExceptions = {IllegalStateException.class}, expectedExceptionsMessageRegExp = "Using NULL in " + "LESS_THAN_OR_EQUAL filter is not supported") public void testLessThanOrEqualNullFilterFails() { - CalciteSqlParser.compileToPinotQuery("SELECT * FROM testTable WHERE column1 <= null"); + compileToPinotQuery("SELECT * FROM testTable WHERE column1 <= null"); } @Test(expectedExceptions = {IllegalStateException.class}, expectedExceptionsMessageRegExp = "Using NULL in LIKE " + "filter is not supported") public void testLikeFilterWithNullFails() { - CalciteSqlParser.compileToPinotQuery("SELECT * FROM testTable WHERE column1 LIKE null"); + compileToPinotQuery("SELECT * FROM testTable WHERE column1 LIKE null"); } @Test(expectedExceptions = {IllegalStateException.class}, expectedExceptionsMessageRegExp = "Using NULL in EQUALS " + "filter is not supported") public void testEqualFilterWithNullFails() { - CalciteSqlParser.compileToPinotQuery("SELECT * FROM testTable WHERE column1 = null"); + compileToPinotQuery("SELECT * FROM testTable WHERE column1 = null"); } @Test(expectedExceptions = {IllegalStateException.class}, expectedExceptionsMessageRegExp = "Using NULL in " + "NOT_EQUALS filter is not supported") public void testInEqualFilterWithNullFails() { - CalciteSqlParser.compileToPinotQuery("SELECT * FROM testTable WHERE column1 != null"); + compileToPinotQuery("SELECT * FROM testTable WHERE column1 != null"); } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/MergeEqInFilterOptimizer.java b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/MergeEqInFilterOptimizer.java index 49a9ad171f2c..6836f8022617 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/MergeEqInFilterOptimizer.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/MergeEqInFilterOptimizer.java @@ -19,7 +19,6 @@ package org.apache.pinot.core.query.optimizer.filter; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -164,16 +163,12 @@ private Expression optimize(Expression filterExpression) { private static Expression getFilterExpression(Expression lhs, Set values) { int numValues = values.size(); if (numValues == 1) { - Expression eqFilter = RequestUtils.getFunctionExpression(FilterKind.EQUALS.name()); - eqFilter.getFunctionCall().setOperands(Arrays.asList(lhs, values.iterator().next())); - return eqFilter; + return RequestUtils.getFunctionExpression(FilterKind.EQUALS.name(), lhs, values.iterator().next()); } else { - Expression inFilter = RequestUtils.getFunctionExpression(FilterKind.IN.name()); List operands = new ArrayList<>(numValues + 1); operands.add(lhs); operands.addAll(values); - inFilter.getFunctionCall().setOperands(operands); - return inFilter; + return RequestUtils.getFunctionExpression(FilterKind.IN.name(), operands); } } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/MergeRangeFilterOptimizer.java b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/MergeRangeFilterOptimizer.java index 1d0b91cc8809..68f895e0d69e 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/MergeRangeFilterOptimizer.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/MergeRangeFilterOptimizer.java @@ -19,7 +19,6 @@ package org.apache.pinot.core.query.optimizer.filter; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -154,9 +153,7 @@ private static Comparable getComparable(Expression literalExpression, DataType d * Helper method to construct a RANGE predicate filter Expression from the given column and range. */ private static Expression getRangeFilterExpression(String column, Range range) { - Expression rangeFilter = RequestUtils.getFunctionExpression(FilterKind.RANGE.name()); - rangeFilter.getFunctionCall().setOperands(Arrays.asList(RequestUtils.getIdentifierExpression(column), - RequestUtils.getLiteralExpression(range.getRangeString()))); - return rangeFilter; + return RequestUtils.getFunctionExpression(FilterKind.RANGE.name(), RequestUtils.getIdentifierExpression(column), + RequestUtils.getLiteralExpression(range.getRangeString())); } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TextMatchFilterOptimizer.java b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TextMatchFilterOptimizer.java index 8c742cfc98a4..c6b2e29838f6 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TextMatchFilterOptimizer.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TextMatchFilterOptimizer.java @@ -19,8 +19,6 @@ package org.apache.pinot.core.query.optimizer.filter; import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -161,26 +159,20 @@ private Expression getNewFilter(String operator, List newChildren, } else { mergedTextMatchFilter = String.join(SPACE + operator + SPACE, literals); } - Expression mergedTextMatchExpression = RequestUtils.getFunctionExpression(FilterKind.TEXT_MATCH.name()); - Expression mergedTextMatchFilterExpression = RequestUtils.getLiteralExpression("(" + mergedTextMatchFilter + ")"); - mergedTextMatchExpression.getFunctionCall() - .setOperands(Arrays.asList(entry.getKey(), mergedTextMatchFilterExpression)); - + Expression mergedTextMatchExpression = + RequestUtils.getFunctionExpression(FilterKind.TEXT_MATCH.name(), entry.getKey(), + RequestUtils.getLiteralExpression("(" + mergedTextMatchFilter + ")")); if (allNot) { - Expression notExpression = RequestUtils.getFunctionExpression(FilterKind.NOT.name()); - notExpression.getFunctionCall().setOperands(Collections.singletonList(mergedTextMatchExpression)); - newChildren.add(notExpression); - continue; + newChildren.add(RequestUtils.getFunctionExpression(FilterKind.NOT.name(), mergedTextMatchExpression)); + } else { + newChildren.add(mergedTextMatchExpression); } - newChildren.add(mergedTextMatchExpression); } if (newChildren.size() == 1) { return newChildren.get(0); } assert operator.equals(FilterKind.OR.name()) || operator.equals(FilterKind.AND.name()); - Expression newExpression = RequestUtils.getFunctionExpression(operator); - newExpression.getFunctionCall().setOperands(newChildren); - return newExpression; + return RequestUtils.getFunctionExpression(operator, newChildren); } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizer.java b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizer.java index 7a2603ce9089..3979eb029d0d 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizer.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizer.java @@ -20,7 +20,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; -import java.util.Arrays; +import java.util.ArrayList; import java.util.List; import java.util.concurrent.TimeUnit; import javax.annotation.Nullable; @@ -234,9 +234,7 @@ private void optimizeTimeConvert(Function filterFunction, FilterKind filterKind) // Step 3: Rewrite the filter function String rangeString = new Range(lowerValue, lowerInclusive, upperValue, upperInclusive).getRangeString(); - filterFunction.setOperator(FilterKind.RANGE.name()); - filterFunction.setOperands( - Arrays.asList(timeConvertOperands.get(0), RequestUtils.getLiteralExpression(rangeString))); + rewriteToRange(filterFunction, timeConvertOperands.get(0), rangeString); } catch (Exception e) { LOGGER.warn("Caught exception while optimizing TIME_CONVERT predicate: {}, skipping the optimization", filterFunction, e); @@ -400,9 +398,7 @@ && isStringLiteral(dateTimeConvertOperands.get(3)), // Step 3: Rewrite the filter function String rangeString = new Range(lowerValue, lowerInclusive, upperValue, upperInclusive).getRangeString(); - filterFunction.setOperator(FilterKind.RANGE.name()); - filterFunction.setOperands( - Arrays.asList(dateTimeConvertOperands.get(0), RequestUtils.getLiteralExpression(rangeString))); + rewriteToRange(filterFunction, dateTimeConvertOperands.get(0), rangeString); } catch (Exception e) { LOGGER.warn("Caught exception while optimizing DATE_TIME_CONVERT predicate: {}, skipping the optimization", filterFunction, e); @@ -419,4 +415,13 @@ private boolean isStringLiteral(Expression expression) { private long ceil(long millisValue, long granularityMillis) { return (millisValue + granularityMillis - 1) / granularityMillis * granularityMillis; } + + private static void rewriteToRange(Function filterFunction, Expression expression, String rangeString) { + filterFunction.setOperator(FilterKind.RANGE.name()); + // NOTE: Create an ArrayList because we might need to modify the list later + List newOperands = new ArrayList<>(2); + newOperands.add(expression); + newOperands.add(RequestUtils.getLiteralExpression(rangeString)); + filterFunction.setOperands(newOperands); + } } diff --git a/pinot-core/src/test/java/org/apache/pinot/core/plan/maker/QueryOverrideWithHintsTest.java b/pinot-core/src/test/java/org/apache/pinot/core/plan/maker/QueryOverrideWithHintsTest.java index 57f169258ca8..522f5881916b 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/plan/maker/QueryOverrideWithHintsTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/plan/maker/QueryOverrideWithHintsTest.java @@ -223,9 +223,9 @@ public void testNotOverrideWithExpressionOverrideHints() { public void testRewriteExpressionsWithHints() { PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery( "SELECT datetrunc('MONTH', ts), count(*), sum(abc) from myTable group by datetrunc('MONTH', ts) "); - Expression dateTruncFunctionExpr = RequestUtils.getFunctionExpression("datetrunc"); - dateTruncFunctionExpr.getFunctionCall().setOperands(new ArrayList<>( - ImmutableList.of(RequestUtils.getLiteralExpression("MONTH"), RequestUtils.getIdentifierExpression("ts")))); + Expression dateTruncFunctionExpr = + RequestUtils.getFunctionExpression("datetrunc", RequestUtils.getLiteralExpression("MONTH"), + RequestUtils.getIdentifierExpression("ts")); Expression timestampIndexColumn = RequestUtils.getIdentifierExpression("$ts$MONTH"); pinotQuery.setExpressionOverrideHints(ImmutableMap.of(dateTruncFunctionExpr, timestampIndexColumn)); QueryContext queryContext = QueryContextConverterUtils.getQueryContext(pinotQuery); @@ -238,9 +238,9 @@ public void testRewriteExpressionsWithHints() { public void testNotRewriteExpressionsWithHints() { PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery( "SELECT datetrunc('DAY', ts), count(*), sum(abc) from myTable group by datetrunc('DAY', ts)"); - Expression dateTruncFunctionExpr = RequestUtils.getFunctionExpression("datetrunc"); - dateTruncFunctionExpr.getFunctionCall().setOperands(new ArrayList<>( - ImmutableList.of(RequestUtils.getLiteralExpression("DAY"), RequestUtils.getIdentifierExpression("ts")))); + Expression dateTruncFunctionExpr = + RequestUtils.getFunctionExpression("datetrunc", RequestUtils.getLiteralExpression("DAY"), + RequestUtils.getIdentifierExpression("ts")); Expression timestampIndexColumn = RequestUtils.getIdentifierExpression("$ts$DAY"); pinotQuery.setExpressionOverrideHints(ImmutableMap.of(dateTruncFunctionExpr, timestampIndexColumn)); QueryContext queryContext = QueryContextConverterUtils.getQueryContext(pinotQuery); diff --git a/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/QueryOptimizerTest.java b/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/QueryOptimizerTest.java index 848f458742d2..62cf2e2b44b8 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/QueryOptimizerTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/QueryOptimizerTest.java @@ -79,10 +79,8 @@ public void testFlattenAndOrFilter() { } private static Expression getEqFilterExpression(String column, Object value) { - Expression eqFilterExpression = RequestUtils.getFunctionExpression(FilterKind.EQUALS.name()); - eqFilterExpression.getFunctionCall().setOperands( - Arrays.asList(RequestUtils.getIdentifierExpression(column), RequestUtils.getLiteralExpression(value))); - return eqFilterExpression; + return RequestUtils.getFunctionExpression(FilterKind.EQUALS.name(), RequestUtils.getIdentifierExpression(column), + RequestUtils.getLiteralExpression(value)); } @Test @@ -182,10 +180,8 @@ public void testMergeTextMatchFilter() { } private static Expression getRangeFilterExpression(String column, String rangeString) { - Expression rangeFilterExpression = RequestUtils.getFunctionExpression(FilterKind.RANGE.name()); - rangeFilterExpression.getFunctionCall().setOperands( - Arrays.asList(RequestUtils.getIdentifierExpression(column), RequestUtils.getLiteralExpression(rangeString))); - return rangeFilterExpression; + return RequestUtils.getFunctionExpression(FilterKind.RANGE.name(), RequestUtils.getIdentifierExpression(column), + RequestUtils.getLiteralExpression(rangeString)); } @Test diff --git a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/plan/server/ServerPlanRequestUtils.java b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/plan/server/ServerPlanRequestUtils.java index 9b8d19d77de5..f8cb04469805 100644 --- a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/plan/server/ServerPlanRequestUtils.java +++ b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/plan/server/ServerPlanRequestUtils.java @@ -18,7 +18,6 @@ */ package org.apache.pinot.query.runtime.plan.server; -import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import java.util.ArrayList; import java.util.Arrays; @@ -229,14 +228,13 @@ private static void attachTimeBoundary(PinotQuery pinotQuery, TimeBoundaryInfo t String timeColumn = timeBoundaryInfo.getTimeColumn(); String timeValue = timeBoundaryInfo.getTimeValue(); Expression timeFilterExpression = RequestUtils.getFunctionExpression( - isOfflineRequest ? FilterKind.LESS_THAN_OR_EQUAL.name() : FilterKind.GREATER_THAN.name()); - timeFilterExpression.getFunctionCall().setOperands( - Arrays.asList(RequestUtils.getIdentifierExpression(timeColumn), RequestUtils.getLiteralExpression(timeValue))); + isOfflineRequest ? FilterKind.LESS_THAN_OR_EQUAL.name() : FilterKind.GREATER_THAN.name(), + RequestUtils.getIdentifierExpression(timeColumn), RequestUtils.getLiteralExpression(timeValue)); Expression filterExpression = pinotQuery.getFilterExpression(); if (filterExpression != null) { - Expression andFilterExpression = RequestUtils.getFunctionExpression(FilterKind.AND.name()); - andFilterExpression.getFunctionCall().setOperands(Arrays.asList(filterExpression, timeFilterExpression)); + Expression andFilterExpression = + RequestUtils.getFunctionExpression(FilterKind.AND.name(), filterExpression, timeFilterExpression); pinotQuery.setFilterExpression(andFilterExpression); } else { pinotQuery.setFilterExpression(timeFilterExpression); @@ -253,21 +251,26 @@ static void attachDynamicFilter(PinotQuery pinotQuery, JoinNode.JoinKeys joinKey List expressions = new ArrayList<>(); for (int i = 0; i < leftJoinKeys.size(); i++) { Expression leftExpr = pinotQuery.getSelectList().get(leftJoinKeys.get(i)); - if (dataContainer.size() == 0) { + if (dataContainer.isEmpty()) { // put a constant false expression - Expression constantFalseExpr = RequestUtils.getLiteralExpression(false); - expressions.add(constantFalseExpr); + expressions.add(RequestUtils.getLiteralExpression(false)); } else { int rightIdx = rightJoinKeys.get(i); - Expression inFilterExpr = RequestUtils.getFunctionExpression(FilterKind.IN.name()); List operands = new ArrayList<>(dataContainer.size() + 1); operands.add(leftExpr); operands.addAll(computeInOperands(dataContainer, dataSchema, rightIdx)); - inFilterExpr.getFunctionCall().setOperands(operands); - expressions.add(inFilterExpr); + expressions.add(RequestUtils.getFunctionExpression(FilterKind.IN.name(), operands)); } } - attachFilterExpression(pinotQuery, FilterKind.AND, expressions); + Expression filterExpression = pinotQuery.getFilterExpression(); + if (filterExpression != null) { + expressions.add(filterExpression); + } + if (expressions.size() > 1) { + pinotQuery.setFilterExpression(RequestUtils.getFunctionExpression(FilterKind.AND.name(), expressions)); + } else { + pinotQuery.setFilterExpression(expressions.get(0)); + } } private static List computeInOperands(List dataContainer, DataSchema dataSchema, int colIdx) { @@ -335,23 +338,4 @@ private static List computeInOperands(List dataContainer, } return expressions; } - - /** - * Attach Filter Expression to existing PinotQuery. - */ - private static void attachFilterExpression(PinotQuery pinotQuery, FilterKind attachKind, List exprs) { - Preconditions.checkState(attachKind == FilterKind.AND || attachKind == FilterKind.OR); - Expression filterExpression = pinotQuery.getFilterExpression(); - List arrayList = new ArrayList<>(exprs); - if (filterExpression != null) { - arrayList.add(filterExpression); - } - if (arrayList.size() > 1) { - Expression attachFilterExpression = RequestUtils.getFunctionExpression(attachKind.name()); - attachFilterExpression.getFunctionCall().setOperands(arrayList); - pinotQuery.setFilterExpression(attachFilterExpression); - } else { - pinotQuery.setFilterExpression(arrayList.get(0)); - } - } } From bf28a83958e069f868e794f4a5271328d5a5f525 Mon Sep 17 00:00:00 2001 From: Yash Mayya Date: Wed, 1 May 2024 07:35:34 +0530 Subject: [PATCH 098/102] Use more efficient variants of URLEncoder::encode and URLDecoder::decode (#13030) --- .../function/scalar/StringFunctions.java | 15 ++++------- .../apache/pinot/common/utils/URIUtils.java | 15 +++-------- .../SegmentGenerationUtilsTest.java | 8 +++--- .../pinot/integration/tests/ClusterTest.java | 8 +++--- .../tests/HybridClusterIntegrationTest.java | 6 ++--- .../hadoop/HadoopSegmentCreationMapper.java | 6 ++--- .../SparkSegmentGenerationJobRunner.java | 6 ++--- .../SparkSegmentGenerationJobRunner.java | 6 ++--- .../SegmentGenerationJobRunner.java | 4 +-- .../plugin/filesystem/AzurePinotFSUtil.java | 7 +++-- .../filesystem/test/AzurePinotFSUtilTest.java | 6 +++-- .../pinot/plugin/filesystem/S3PinotFS.java | 15 ++--------- .../ParquetNativeRecordReaderFullTest.java | 4 ++- .../segment/local/utils/SegmentPushUtils.java | 4 +-- .../server/api/resources/TablesResource.java | 20 +++----------- .../pinot/spi/filesystem/LocalPinotFS.java | 8 ++---- .../builder/ControllerRequestURLBuilder.java | 26 ++++++------------- 17 files changed, 56 insertions(+), 108 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java index 31baeb5d2d44..21c086ffb71e 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java @@ -21,8 +21,6 @@ import it.unimi.dsi.fastutil.objects.ObjectLinkedOpenHashSet; import it.unimi.dsi.fastutil.objects.ObjectSet; import java.io.UnsupportedEncodingException; -import java.net.URLDecoder; -import java.net.URLEncoder; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.text.Normalizer; @@ -33,6 +31,7 @@ import javax.annotation.Nullable; import org.apache.commons.lang3.StringUtils; import org.apache.pinot.common.utils.RegexpPatternConverterUtils; +import org.apache.pinot.common.utils.URIUtils; import org.apache.pinot.spi.annotations.ScalarFunction; import org.apache.pinot.spi.utils.JsonUtils; @@ -804,24 +803,20 @@ public static int strcmp(String input1, String input2) { * * @param input plaintext string * @return url encoded string - * @throws UnsupportedEncodingException */ @ScalarFunction - public static String encodeUrl(String input) - throws UnsupportedEncodingException { - return URLEncoder.encode(input, StandardCharsets.UTF_8.toString()); + public static String encodeUrl(String input) { + return URIUtils.encode(input); } /** * * @param input url encoded string * @return plaintext string - * @throws UnsupportedEncodingException */ @ScalarFunction - public static String decodeUrl(String input) - throws UnsupportedEncodingException { - return URLDecoder.decode(input, StandardCharsets.UTF_8.toString()); + public static String decodeUrl(String input) { + return URIUtils.decode(input); } /** diff --git a/pinot-common/src/main/java/org/apache/pinot/common/utils/URIUtils.java b/pinot-common/src/main/java/org/apache/pinot/common/utils/URIUtils.java index 5aff60b07fe2..a857981f2a2d 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/utils/URIUtils.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/utils/URIUtils.java @@ -23,6 +23,7 @@ import java.net.URISyntaxException; import java.net.URLDecoder; import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; import java.util.Map; import java.util.StringJoiner; import org.apache.http.client.utils.URIBuilder; @@ -91,21 +92,11 @@ public static String constructDownloadUrl(String baseUrl, String rawTableName, S } public static String encode(String string) { - try { - return URLEncoder.encode(string, "UTF-8"); - } catch (Exception e) { - // Should never happen - throw new RuntimeException(e); - } + return URLEncoder.encode(string, StandardCharsets.UTF_8); } public static String decode(String string) { - try { - return URLDecoder.decode(string, "UTF-8"); - } catch (Exception e) { - // Should never happen - throw new RuntimeException(e); - } + return URLDecoder.decode(string, StandardCharsets.UTF_8); } /** diff --git a/pinot-common/src/test/java/org/apache/pinot/common/segment/generation/SegmentGenerationUtilsTest.java b/pinot-common/src/test/java/org/apache/pinot/common/segment/generation/SegmentGenerationUtilsTest.java index 983d8ededd33..d33c046207b1 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/segment/generation/SegmentGenerationUtilsTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/segment/generation/SegmentGenerationUtilsTest.java @@ -22,13 +22,12 @@ import com.google.common.collect.Lists; import java.io.File; import java.io.IOException; -import java.io.UnsupportedEncodingException; import java.net.URI; import java.net.URISyntaxException; -import java.net.URLEncoder; import java.nio.file.Files; import java.util.List; import org.apache.commons.io.FileUtils; +import org.apache.pinot.common.utils.URIUtils; import org.apache.pinot.spi.filesystem.PinotFS; import org.apache.pinot.spi.filesystem.PinotFSFactory; import org.testng.Assert; @@ -73,7 +72,7 @@ public void testRelativeURIs() // Invalid segment tar name with space @Test public void testInvalidRelativeURIs() - throws URISyntaxException, UnsupportedEncodingException { + throws URISyntaxException { URI inputDirURI = new URI("hdfs://namenode1:9999/path/to/"); URI inputFileURI = new URI("hdfs://namenode1:9999/path/to/subdir/file"); URI outputDirURI = new URI("hdfs://namenode2/output/dir/"); @@ -85,8 +84,7 @@ public void testInvalidRelativeURIs() Assert.assertTrue(e instanceof URISyntaxException); } URI outputSegmentTarURI = SegmentGenerationUtils.getRelativeOutputPath(inputDirURI, inputFileURI, outputDirURI) - .resolve(new URI( - URLEncoder.encode("table_OFFLINE_2021-02-01_09:39:00.000_2021-02-01_11:59:00.000_2.tar.gz", "UTF-8"))); + .resolve(new URI(URIUtils.encode("table_OFFLINE_2021-02-01_09:39:00.000_2021-02-01_11:59:00.000_2.tar.gz"))); Assert.assertEquals(outputSegmentTarURI.toString(), "hdfs://namenode2/output/dir/subdir/table_OFFLINE_2021-02-01_09%3A39%3A00.000_2021-02-01_11%3A59%3A00.000_2" + ".tar.gz"); diff --git a/pinot-integration-test-base/src/test/java/org/apache/pinot/integration/tests/ClusterTest.java b/pinot-integration-test-base/src/test/java/org/apache/pinot/integration/tests/ClusterTest.java index 278f378b1ab7..49edf86264ed 100644 --- a/pinot-integration-test-base/src/test/java/org/apache/pinot/integration/tests/ClusterTest.java +++ b/pinot-integration-test-base/src/test/java/org/apache/pinot/integration/tests/ClusterTest.java @@ -25,8 +25,6 @@ import java.io.File; import java.io.IOException; import java.net.URI; -import java.net.URLEncoder; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -56,6 +54,7 @@ import org.apache.pinot.broker.broker.helix.HelixBrokerStarter; import org.apache.pinot.common.exception.HttpErrorStatusException; import org.apache.pinot.common.utils.FileUploadDownloadClient; +import org.apache.pinot.common.utils.URIUtils; import org.apache.pinot.common.utils.http.HttpClient; import org.apache.pinot.controller.helix.ControllerTest; import org.apache.pinot.minion.BaseMinionStarter; @@ -456,8 +455,9 @@ private int uploadSegmentWithOnlyMetadata(String tableName, TableType tableType, FileUploadDownloadClient fileUploadDownloadClient, File segmentTarFile) throws IOException, HttpErrorStatusException { List

headers = ImmutableList.of(new BasicHeader(FileUploadDownloadClient.CustomHeaders.DOWNLOAD_URI, - "file://" + segmentTarFile.getParentFile().getAbsolutePath() + "/" + URLEncoder.encode(segmentTarFile.getName(), - StandardCharsets.UTF_8.toString())), new BasicHeader(FileUploadDownloadClient.CustomHeaders.UPLOAD_TYPE, + "file://" + segmentTarFile.getParentFile().getAbsolutePath() + "/" + + URIUtils.encode(segmentTarFile.getName())), + new BasicHeader(FileUploadDownloadClient.CustomHeaders.UPLOAD_TYPE, FileUploadDownloadClient.FileUploadType.METADATA.toString())); // Add table name and table type as request parameters NameValuePair tableNameValuePair = diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/HybridClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/HybridClusterIntegrationTest.java index 73d0dcb9bdfc..f66c194d8538 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/HybridClusterIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/HybridClusterIntegrationTest.java @@ -20,7 +20,6 @@ import com.fasterxml.jackson.databind.JsonNode; import java.io.File; -import java.net.URLEncoder; import java.util.Collections; import java.util.List; import java.util.Map; @@ -28,6 +27,7 @@ import org.apache.helix.model.ExternalView; import org.apache.helix.model.IdealState; import org.apache.pinot.broker.broker.helix.BaseBrokerStarter; +import org.apache.pinot.common.utils.URIUtils; import org.apache.pinot.common.utils.config.TagNameUtils; import org.apache.pinot.controller.ControllerConf; import org.apache.pinot.spi.config.table.TableConfig; @@ -269,9 +269,9 @@ public void testBrokerDebugRoutingTableSQL(boolean useMultiStageQueryEngine) String offlineTableName = TableNameBuilder.OFFLINE.tableNameWithType(tableName); String realtimeTableName = TableNameBuilder.REALTIME.tableNameWithType(tableName); String encodedSQL; - encodedSQL = URLEncoder.encode("select * from " + realtimeTableName, "UTF-8"); + encodedSQL = URIUtils.encode("select * from " + realtimeTableName); Assert.assertNotNull(getDebugInfo("debug/routingTable/sql?query=" + encodedSQL)); - encodedSQL = URLEncoder.encode("select * from " + offlineTableName, "UTF-8"); + encodedSQL = URIUtils.encode("select * from " + offlineTableName); Assert.assertNotNull(getDebugInfo("debug/routingTable/sql?query=" + encodedSQL)); } diff --git a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-hadoop/src/main/java/org/apache/pinot/plugin/ingestion/batch/hadoop/HadoopSegmentCreationMapper.java b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-hadoop/src/main/java/org/apache/pinot/plugin/ingestion/batch/hadoop/HadoopSegmentCreationMapper.java index 778276434006..3efad6b05d81 100644 --- a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-hadoop/src/main/java/org/apache/pinot/plugin/ingestion/batch/hadoop/HadoopSegmentCreationMapper.java +++ b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-hadoop/src/main/java/org/apache/pinot/plugin/ingestion/batch/hadoop/HadoopSegmentCreationMapper.java @@ -22,7 +22,6 @@ import java.io.File; import java.io.IOException; import java.net.URI; -import java.net.URLEncoder; import java.nio.file.Files; import java.util.List; import java.util.UUID; @@ -34,6 +33,7 @@ import org.apache.hadoop.mapreduce.Mapper; import org.apache.pinot.common.segment.generation.SegmentGenerationUtils; import org.apache.pinot.common.utils.TarGzCompressionUtils; +import org.apache.pinot.common.utils.URIUtils; import org.apache.pinot.plugin.ingestion.batch.common.SegmentGenerationJobUtils; import org.apache.pinot.plugin.ingestion.batch.common.SegmentGenerationTaskRunner; import org.apache.pinot.spi.env.PinotConfiguration; @@ -175,7 +175,7 @@ protected void map(LongWritable key, Text value, Context context) { // Tar segment directory to compress file File localSegmentDir = new File(localOutputTempDir, segmentName); - String segmentTarFileName = URLEncoder.encode(segmentName + Constants.TAR_GZ_FILE_EXT, "UTF-8"); + String segmentTarFileName = URIUtils.encode(segmentName + Constants.TAR_GZ_FILE_EXT); File localSegmentTarFile = new File(localOutputTempDir, segmentTarFileName); LOGGER.info("Tarring segment from: {} to: {}", localSegmentDir, localSegmentTarFile); TarGzCompressionUtils.createTarGzFile(localSegmentDir, localSegmentTarFile); @@ -190,7 +190,7 @@ protected void map(LongWritable key, Text value, Context context) { _spec.isOverwriteOutput()); // Create and upload segment metadata tar file - String metadataTarFileName = URLEncoder.encode(segmentName + Constants.METADATA_TAR_GZ_FILE_EXT, "UTF-8"); + String metadataTarFileName = URIUtils.encode(segmentName + Constants.METADATA_TAR_GZ_FILE_EXT); URI outputMetadataTarURI = relativeOutputPath.resolve(metadataTarFileName); if (outputDirFS.exists(outputMetadataTarURI) && (_spec.isOverwriteOutput() || !_spec.isCreateMetadataTarGz())) { LOGGER.info("Deleting existing metadata tar gz file: {}", outputMetadataTarURI); diff --git a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-2.4/src/main/java/org/apache/pinot/plugin/ingestion/batch/spark/SparkSegmentGenerationJobRunner.java b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-2.4/src/main/java/org/apache/pinot/plugin/ingestion/batch/spark/SparkSegmentGenerationJobRunner.java index 204884ab8d37..6ae7ff97ab23 100644 --- a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-2.4/src/main/java/org/apache/pinot/plugin/ingestion/batch/spark/SparkSegmentGenerationJobRunner.java +++ b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-2.4/src/main/java/org/apache/pinot/plugin/ingestion/batch/spark/SparkSegmentGenerationJobRunner.java @@ -22,7 +22,6 @@ import java.io.IOException; import java.io.Serializable; import java.net.URI; -import java.net.URLEncoder; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; @@ -34,6 +33,7 @@ import org.apache.commons.io.FileUtils; import org.apache.pinot.common.segment.generation.SegmentGenerationUtils; import org.apache.pinot.common.utils.TarGzCompressionUtils; +import org.apache.pinot.common.utils.URIUtils; import org.apache.pinot.plugin.ingestion.batch.common.SegmentGenerationJobUtils; import org.apache.pinot.plugin.ingestion.batch.common.SegmentGenerationTaskRunner; import org.apache.pinot.spi.env.PinotConfiguration; @@ -283,7 +283,7 @@ public void call(String pathAndIdx) // Tar segment directory to compress file File localSegmentDir = new File(localOutputTempDir, segmentName); - String segmentTarFileName = URLEncoder.encode(segmentName + Constants.TAR_GZ_FILE_EXT, "UTF-8"); + String segmentTarFileName = URIUtils.encode(segmentName + Constants.TAR_GZ_FILE_EXT); File localSegmentTarFile = new File(localOutputTempDir, segmentTarFileName); LOGGER.info("Tarring segment from: {} to: {}", localSegmentDir, localSegmentTarFile); TarGzCompressionUtils.createTarGzFile(localSegmentDir, localSegmentTarFile); @@ -299,7 +299,7 @@ public void call(String pathAndIdx) _spec.isOverwriteOutput()); // Create and upload segment metadata tar file - String metadataTarFileName = URLEncoder.encode(segmentName + Constants.METADATA_TAR_GZ_FILE_EXT, "UTF-8"); + String metadataTarFileName = URIUtils.encode(segmentName + Constants.METADATA_TAR_GZ_FILE_EXT); URI outputMetadataTarURI = relativeOutputPath.resolve(metadataTarFileName); if (finalOutputDirFS.exists(outputMetadataTarURI) && (_spec.isOverwriteOutput() diff --git a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-3/src/main/java/org/apache/pinot/plugin/ingestion/batch/spark3/SparkSegmentGenerationJobRunner.java b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-3/src/main/java/org/apache/pinot/plugin/ingestion/batch/spark3/SparkSegmentGenerationJobRunner.java index d595da66b5a6..ef1f6cea5d9a 100644 --- a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-3/src/main/java/org/apache/pinot/plugin/ingestion/batch/spark3/SparkSegmentGenerationJobRunner.java +++ b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-3/src/main/java/org/apache/pinot/plugin/ingestion/batch/spark3/SparkSegmentGenerationJobRunner.java @@ -22,7 +22,6 @@ import java.io.IOException; import java.io.Serializable; import java.net.URI; -import java.net.URLEncoder; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; @@ -34,6 +33,7 @@ import org.apache.commons.io.FileUtils; import org.apache.pinot.common.segment.generation.SegmentGenerationUtils; import org.apache.pinot.common.utils.TarGzCompressionUtils; +import org.apache.pinot.common.utils.URIUtils; import org.apache.pinot.plugin.ingestion.batch.common.SegmentGenerationJobUtils; import org.apache.pinot.plugin.ingestion.batch.common.SegmentGenerationTaskRunner; import org.apache.pinot.spi.env.PinotConfiguration; @@ -281,7 +281,7 @@ public void call(String pathAndIdx) // Tar segment directory to compress file File localSegmentDir = new File(localOutputTempDir, segmentName); - String segmentTarFileName = URLEncoder.encode(segmentName + Constants.TAR_GZ_FILE_EXT, "UTF-8"); + String segmentTarFileName = URIUtils.encode(segmentName + Constants.TAR_GZ_FILE_EXT); File localSegmentTarFile = new File(localOutputTempDir, segmentTarFileName); LOGGER.info("Tarring segment from: {} to: {}", localSegmentDir, localSegmentTarFile); TarGzCompressionUtils.createTarGzFile(localSegmentDir, localSegmentTarFile); @@ -297,7 +297,7 @@ public void call(String pathAndIdx) _spec.isOverwriteOutput()); // Create and upload segment metadata tar file - String metadataTarFileName = URLEncoder.encode(segmentName + Constants.METADATA_TAR_GZ_FILE_EXT, "UTF-8"); + String metadataTarFileName = URIUtils.encode(segmentName + Constants.METADATA_TAR_GZ_FILE_EXT); URI outputMetadataTarURI = relativeOutputPath.resolve(metadataTarFileName); if (finalOutputDirFS.exists(outputMetadataTarURI) && (_spec.isOverwriteOutput() || !_spec.isCreateMetadataTarGz())) { diff --git a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/main/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunner.java b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/main/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunner.java index 974316607768..a85247fc3d9b 100644 --- a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/main/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunner.java +++ b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/main/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunner.java @@ -22,7 +22,6 @@ import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; -import java.net.URLEncoder; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collections; @@ -37,6 +36,7 @@ import org.apache.commons.io.FileUtils; import org.apache.pinot.common.segment.generation.SegmentGenerationUtils; import org.apache.pinot.common.utils.TarGzCompressionUtils; +import org.apache.pinot.common.utils.URIUtils; import org.apache.pinot.plugin.ingestion.batch.common.SegmentGenerationJobUtils; import org.apache.pinot.plugin.ingestion.batch.common.SegmentGenerationTaskRunner; import org.apache.pinot.segment.local.utils.ConsistentDataPushUtils; @@ -265,7 +265,7 @@ private void submitSegmentGenTask(File localTempDir, URI inputFileURI, int seqId String segmentName = taskRunner.run(); // Tar segment directory to compress file localSegmentDir = new File(localOutputTempDir, segmentName); - String segmentTarFileName = URLEncoder.encode(segmentName + Constants.TAR_GZ_FILE_EXT, "UTF-8"); + String segmentTarFileName = URIUtils.encode(segmentName + Constants.TAR_GZ_FILE_EXT); localSegmentTarFile = new File(localOutputTempDir, segmentTarFileName); LOGGER.info("Tarring segment from: {} to: {}", localSegmentDir, localSegmentTarFile); TarGzCompressionUtils.createTarGzFile(localSegmentDir, localSegmentTarFile); diff --git a/pinot-plugins/pinot-file-system/pinot-adls/src/main/java/org/apache/pinot/plugin/filesystem/AzurePinotFSUtil.java b/pinot-plugins/pinot-file-system/pinot-adls/src/main/java/org/apache/pinot/plugin/filesystem/AzurePinotFSUtil.java index 282b4346d6d6..c9665e7093b6 100644 --- a/pinot-plugins/pinot-file-system/pinot-adls/src/main/java/org/apache/pinot/plugin/filesystem/AzurePinotFSUtil.java +++ b/pinot-plugins/pinot-file-system/pinot-adls/src/main/java/org/apache/pinot/plugin/filesystem/AzurePinotFSUtil.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.net.URI; import java.net.URLDecoder; +import java.nio.charset.StandardCharsets; /** @@ -41,12 +42,10 @@ private AzurePinotFSUtil() { * * @param uri a uri path * @return path in Azure Data Lake Gen2 format - * @throws IOException */ - public static String convertUriToAzureStylePath(URI uri) - throws IOException { + public static String convertUriToAzureStylePath(URI uri) { // Pinot side code uses `URLEncoder` when building uri - String path = URLDecoder.decode(uri.getRawPath(), "UTF-8"); + String path = URLDecoder.decode(uri.getRawPath(), StandardCharsets.UTF_8); if (path.startsWith(DIRECTORY_DELIMITER)) { path = path.substring(1); } diff --git a/pinot-plugins/pinot-file-system/pinot-adls/src/test/java/org/apache/pinot/plugin/filesystem/test/AzurePinotFSUtilTest.java b/pinot-plugins/pinot-file-system/pinot-adls/src/test/java/org/apache/pinot/plugin/filesystem/test/AzurePinotFSUtilTest.java index 80e68a659952..057a48147096 100644 --- a/pinot-plugins/pinot-file-system/pinot-adls/src/test/java/org/apache/pinot/plugin/filesystem/test/AzurePinotFSUtilTest.java +++ b/pinot-plugins/pinot-file-system/pinot-adls/src/test/java/org/apache/pinot/plugin/filesystem/test/AzurePinotFSUtilTest.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.net.URI; import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; import org.apache.pinot.plugin.filesystem.AzurePinotFSUtil; import org.testng.Assert; import org.testng.annotations.Test; @@ -67,11 +68,12 @@ public void testUriToAzureStylePath(String tableName, String segmentName, boolea throws Exception { // "/encode(dir)/encode(segment)" String expectedPath = String.join(File.separator, tableName, segmentName); - URI uri = createUri(URLEncoder.encode(tableName, "UTF-8"), URLEncoder.encode(segmentName, "UTF-8")); + URI uri = createUri(URLEncoder.encode(tableName, StandardCharsets.UTF_8), URLEncoder.encode(segmentName, + StandardCharsets.UTF_8)); checkUri(uri, expectedPath, urlEncoded); // "/encode(dir/segment)" - uri = createUri(URLEncoder.encode(String.join(File.separator, tableName, segmentName), "UTF-8")); + uri = createUri(URLEncoder.encode(String.join(File.separator, tableName, segmentName), StandardCharsets.UTF_8)); checkUri(uri, expectedPath, urlEncoded); // "/encode(dir/segment)" diff --git a/pinot-plugins/pinot-file-system/pinot-s3/src/main/java/org/apache/pinot/plugin/filesystem/S3PinotFS.java b/pinot-plugins/pinot-file-system/pinot-s3/src/main/java/org/apache/pinot/plugin/filesystem/S3PinotFS.java index e3a7daa55eef..4fc84f3541c2 100644 --- a/pinot-plugins/pinot-file-system/pinot-s3/src/main/java/org/apache/pinot/plugin/filesystem/S3PinotFS.java +++ b/pinot-plugins/pinot-file-system/pinot-s3/src/main/java/org/apache/pinot/plugin/filesystem/S3PinotFS.java @@ -26,7 +26,6 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; -import java.io.UnsupportedEncodingException; import java.net.URI; import java.net.URISyntaxException; import java.net.URLEncoder; @@ -325,12 +324,7 @@ private boolean isEmptyDirectory(URI uri) private boolean copyFile(URI srcUri, URI dstUri) throws IOException { try { - String encodedUrl = null; - try { - encodedUrl = URLEncoder.encode(srcUri.getHost() + srcUri.getPath(), StandardCharsets.UTF_8.toString()); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e); - } + String encodedUrl = URLEncoder.encode(srcUri.getHost() + srcUri.getPath(), StandardCharsets.UTF_8); String dstPath = sanitizePath(dstUri.getPath()); CopyObjectRequest copyReq = generateCopyObjectRequest(encodedUrl, dstUri, dstPath, null); @@ -674,12 +668,7 @@ public boolean touch(URI uri) throws IOException { try { HeadObjectResponse s3ObjectMetadata = getS3ObjectMetadata(uri); - String encodedUrl = null; - try { - encodedUrl = URLEncoder.encode(uri.getHost() + uri.getPath(), StandardCharsets.UTF_8.toString()); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e); - } + String encodedUrl = URLEncoder.encode(uri.getHost() + uri.getPath(), StandardCharsets.UTF_8); String path = sanitizePath(uri.getPath()); CopyObjectRequest request = generateCopyObjectRequest(encodedUrl, uri, path, diff --git a/pinot-plugins/pinot-input-format/pinot-parquet/src/test/java/org/apache/pinot/plugin/inputformat/parquet/ParquetNativeRecordReaderFullTest.java b/pinot-plugins/pinot-input-format/pinot-parquet/src/test/java/org/apache/pinot/plugin/inputformat/parquet/ParquetNativeRecordReaderFullTest.java index f0438a050835..6e84b19c9a12 100644 --- a/pinot-plugins/pinot-input-format/pinot-parquet/src/test/java/org/apache/pinot/plugin/inputformat/parquet/ParquetNativeRecordReaderFullTest.java +++ b/pinot-plugins/pinot-input-format/pinot-parquet/src/test/java/org/apache/pinot/plugin/inputformat/parquet/ParquetNativeRecordReaderFullTest.java @@ -20,6 +20,7 @@ import java.io.File; import java.net.URLDecoder; +import java.nio.charset.StandardCharsets; import org.apache.commons.io.FileUtils; import org.testng.annotations.Test; @@ -125,7 +126,8 @@ protected void testReadDataSet2() protected void testParquetFile(String filePath) throws Exception { - File dataFile = new File(URLDecoder.decode(getClass().getClassLoader().getResource(filePath).getFile(), "UTF-8")); + File dataFile = new File(URLDecoder.decode(getClass().getClassLoader().getResource(filePath).getFile(), + StandardCharsets.UTF_8)); ParquetNativeRecordReader recordReader = new ParquetNativeRecordReader(); recordReader.init(dataFile, null, null); while (recordReader.hasNext()) { diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SegmentPushUtils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SegmentPushUtils.java index 0a6be1905b22..6ca93f24918e 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SegmentPushUtils.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SegmentPushUtils.java @@ -24,7 +24,6 @@ import java.io.Serializable; import java.net.URI; import java.net.URISyntaxException; -import java.net.URLDecoder; import java.nio.file.FileSystems; import java.nio.file.PathMatcher; import java.nio.file.Paths; @@ -44,6 +43,7 @@ import org.apache.pinot.common.utils.FileUploadDownloadClient; import org.apache.pinot.common.utils.SimpleHttpResponse; import org.apache.pinot.common.utils.TarGzCompressionUtils; +import org.apache.pinot.common.utils.URIUtils; import org.apache.pinot.common.utils.http.HttpClient; import org.apache.pinot.segment.spi.V1Constants; import org.apache.pinot.segment.spi.creator.name.SegmentNameUtils; @@ -407,7 +407,7 @@ public static File generateSegmentMetadataFile(PinotFS fileSystem, URI tarFileUR try { if (fileSystem instanceof LocalPinotFS) { // For local file system, we don't need to copy the tar file. - tarFile = new File(URLDecoder.decode(tarFileURI.getRawPath(), "UTF-8")); + tarFile = new File(URIUtils.decode(tarFileURI.getRawPath())); } else { // For other file systems, we need to download the file to local file system fileSystem.copyToLocalFile(tarFileURI, tarFile); diff --git a/pinot-server/src/main/java/org/apache/pinot/server/api/resources/TablesResource.java b/pinot-server/src/main/java/org/apache/pinot/server/api/resources/TablesResource.java index 1baa52ffa4c9..1c35dc21ece9 100644 --- a/pinot-server/src/main/java/org/apache/pinot/server/api/resources/TablesResource.java +++ b/pinot-server/src/main/java/org/apache/pinot/server/api/resources/TablesResource.java @@ -29,9 +29,7 @@ import io.swagger.annotations.SecurityDefinition; import io.swagger.annotations.SwaggerDefinition; import java.io.File; -import java.io.UnsupportedEncodingException; import java.net.URI; -import java.net.URLDecoder; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.util.ArrayList; @@ -207,11 +205,7 @@ public String getSegmentMetadata( List decodedColumns = new ArrayList<>(columns.size()); for (String column : columns) { - try { - decodedColumns.add(URLDecoder.decode(column, StandardCharsets.UTF_8.name())); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e.getCause()); - } + decodedColumns.add(URIUtils.decode(column)); } boolean allColumns = false; @@ -380,19 +374,11 @@ public String getSegmentMetadata( List columns, @Context HttpHeaders headers) { tableName = DatabaseUtils.translateTableName(tableName, headers); for (int i = 0; i < columns.size(); i++) { - try { - columns.set(i, URLDecoder.decode(columns.get(i), StandardCharsets.UTF_8.name())); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e.getCause()); - } + columns.set(i, URIUtils.decode(columns.get(i))); } TableDataManager tableDataManager = ServerResourceUtils.checkGetTableDataManager(_serverInstance, tableName); - try { - segmentName = URLDecoder.decode(segmentName, StandardCharsets.UTF_8.name()); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e.getCause()); - } + segmentName = URIUtils.decode(segmentName); SegmentDataManager segmentDataManager = tableDataManager.acquireSegment(segmentName); if (segmentDataManager == null) { throw new WebApplicationException(String.format("Table %s segments %s does not exist", tableName, segmentName), diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/filesystem/LocalPinotFS.java b/pinot-spi/src/main/java/org/apache/pinot/spi/filesystem/LocalPinotFS.java index 7fd8ca5906ea..b82490b5e028 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/filesystem/LocalPinotFS.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/filesystem/LocalPinotFS.java @@ -23,9 +23,9 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; -import java.io.UnsupportedEncodingException; import java.net.URI; import java.net.URLDecoder; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -188,11 +188,7 @@ public InputStream open(URI uri) private static File toFile(URI uri) { // NOTE: Do not use new File(uri) because scheme might not exist and it does not decode '+' to ' ' // Do not use uri.getPath() because it does not decode '+' to ' ' - try { - return new File(URLDecoder.decode(uri.getRawPath(), "UTF-8")); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e); - } + return new File(URLDecoder.decode(uri.getRawPath(), StandardCharsets.UTF_8)); } private static void copy(File srcFile, File dstFile, boolean recursive) diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/builder/ControllerRequestURLBuilder.java b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/builder/ControllerRequestURLBuilder.java index 3b22e04941f1..f4133fee59d2 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/builder/ControllerRequestURLBuilder.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/builder/ControllerRequestURLBuilder.java @@ -18,7 +18,6 @@ */ package org.apache.pinot.spi.utils.builder; -import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; import java.util.List; @@ -488,31 +487,27 @@ public String forInstanceReplace(String tableName, @Nullable InstancePartitionsT return url; } - public String forIngestFromFile(String tableNameWithType, String batchConfigMapStr) - throws UnsupportedEncodingException { + public String forIngestFromFile(String tableNameWithType, String batchConfigMapStr) { return String.format("%s?tableNameWithType=%s&batchConfigMapStr=%s", StringUtil.join("/", _baseUrl, "ingestFromFile"), tableNameWithType, - URLEncoder.encode(batchConfigMapStr, StandardCharsets.UTF_8.toString())); + URLEncoder.encode(batchConfigMapStr, StandardCharsets.UTF_8)); } - public String forIngestFromFile(String tableNameWithType, Map batchConfigMap) - throws UnsupportedEncodingException { + public String forIngestFromFile(String tableNameWithType, Map batchConfigMap) { String batchConfigMapStr = batchConfigMap.entrySet().stream().map(e -> String.format("\"%s\":\"%s\"", e.getKey(), e.getValue())) .collect(Collectors.joining(",", "{", "}")); return forIngestFromFile(tableNameWithType, batchConfigMapStr); } - public String forIngestFromURI(String tableNameWithType, String batchConfigMapStr, String sourceURIStr) - throws UnsupportedEncodingException { + public String forIngestFromURI(String tableNameWithType, String batchConfigMapStr, String sourceURIStr) { return String.format("%s?tableNameWithType=%s&batchConfigMapStr=%s&sourceURIStr=%s", StringUtil.join("/", _baseUrl, "ingestFromURI"), tableNameWithType, - URLEncoder.encode(batchConfigMapStr, StandardCharsets.UTF_8.toString()), - URLEncoder.encode(sourceURIStr, StandardCharsets.UTF_8.toString())); + URLEncoder.encode(batchConfigMapStr, StandardCharsets.UTF_8), + URLEncoder.encode(sourceURIStr, StandardCharsets.UTF_8)); } - public String forIngestFromURI(String tableNameWithType, Map batchConfigMap, String sourceURIStr) - throws UnsupportedEncodingException { + public String forIngestFromURI(String tableNameWithType, Map batchConfigMap, String sourceURIStr) { String batchConfigMapStr = batchConfigMap.entrySet().stream().map(e -> String.format("\"%s\":\"%s\"", e.getKey(), e.getValue())) .collect(Collectors.joining(",", "{", "}")); @@ -573,12 +568,7 @@ public String forUpdateTagsValidation() { } private static String encode(String s) { - try { - return URLEncoder.encode(s, "UTF-8"); - } catch (Exception e) { - // Should never happen - throw new RuntimeException(e); - } + return URLEncoder.encode(s, StandardCharsets.UTF_8); } public String forSegmentUpload() { From ad7068619a0c1c7152a707f4cb59fd8dbff2b06d Mon Sep 17 00:00:00 2001 From: David Cromberge Date: Wed, 1 May 2024 20:04:31 +0100 Subject: [PATCH 099/102] Enhancement: Sketch value aggregator performance (#13020) --- .../DistinctCountCPCSketchStarTreeV2Test.java | 24 +++- ...ntIntegerSumTupleSketchStarTreeV2Test.java | 25 +++- ...istinctCountThetaSketchStarTreeV2Test.java | 23 ++- ...DistinctCountCPCSketchValueAggregator.java | 88 +++++++----- ...stinctCountThetaSketchValueAggregator.java | 133 +++++++++--------- .../IntegerTupleSketchValueAggregator.java | 90 ++++++++---- ...inctCountCPCSketchValueAggregatorTest.java | 64 +++++---- ...ctCountThetaSketchValueAggregatorTest.java | 89 +++++++----- ...IntegerTupleSketchValueAggregatorTest.java | 27 ++-- .../pinot/spi/utils/CommonConstants.java | 4 +- 10 files changed, 353 insertions(+), 214 deletions(-) diff --git a/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/DistinctCountCPCSketchStarTreeV2Test.java b/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/DistinctCountCPCSketchStarTreeV2Test.java index 3732d3553b57..c7129a71f21b 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/DistinctCountCPCSketchStarTreeV2Test.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/DistinctCountCPCSketchStarTreeV2Test.java @@ -21,6 +21,7 @@ import java.util.Collections; import java.util.Random; import org.apache.datasketches.cpc.CpcSketch; +import org.apache.datasketches.cpc.CpcUnion; import org.apache.pinot.segment.local.aggregator.DistinctCountCPCSketchValueAggregator; import org.apache.pinot.segment.local.aggregator.ValueAggregator; import org.apache.pinot.spi.data.FieldSpec.DataType; @@ -28,10 +29,10 @@ import static org.testng.Assert.assertEquals; -public class DistinctCountCPCSketchStarTreeV2Test extends BaseStarTreeV2Test { +public class DistinctCountCPCSketchStarTreeV2Test extends BaseStarTreeV2Test { @Override - ValueAggregator getValueAggregator() { + ValueAggregator getValueAggregator() { return new DistinctCountCPCSketchValueAggregator(Collections.emptyList()); } @@ -46,7 +47,22 @@ Object getRandomRawValue(Random random) { } @Override - void assertAggregatedValue(CpcSketch starTreeResult, CpcSketch nonStarTreeResult) { - assertEquals((long) starTreeResult.getEstimate(), (long) nonStarTreeResult.getEstimate()); + void assertAggregatedValue(Object starTreeResult, Object nonStarTreeResult) { + // Use error at (lgK=12, stddev=2) from: + // https://datasketches.apache.org/docs/CPC/CpcPerformance.html + double delta = (1 << 12) * 0.01; + assertEquals((long) toSketch(starTreeResult).getEstimate(), (long) toSketch(nonStarTreeResult).getEstimate(), + delta); + } + + private CpcSketch toSketch(Object value) { + if (value instanceof CpcUnion) { + return ((CpcUnion) value).getResult(); + } else if (value instanceof CpcSketch) { + return (CpcSketch) value; + } else { + throw new IllegalStateException( + "Unsupported data type for CPC Sketch aggregation: " + value.getClass().getSimpleName()); + } } } diff --git a/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/DistinctCountIntegerSumTupleSketchStarTreeV2Test.java b/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/DistinctCountIntegerSumTupleSketchStarTreeV2Test.java index b9c52bf95875..d10efb94595a 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/DistinctCountIntegerSumTupleSketchStarTreeV2Test.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/DistinctCountIntegerSumTupleSketchStarTreeV2Test.java @@ -20,6 +20,7 @@ import java.util.Random; import org.apache.datasketches.tuple.Sketch; +import org.apache.datasketches.tuple.Union; import org.apache.datasketches.tuple.aninteger.IntegerSketch; import org.apache.datasketches.tuple.aninteger.IntegerSummary; import org.apache.pinot.core.common.ObjectSerDeUtils; @@ -30,11 +31,10 @@ import static org.testng.Assert.assertEquals; -public class DistinctCountIntegerSumTupleSketchStarTreeV2Test - extends BaseStarTreeV2Test> { +public class DistinctCountIntegerSumTupleSketchStarTreeV2Test extends BaseStarTreeV2Test { @Override - ValueAggregator> getValueAggregator() { + ValueAggregator getValueAggregator() { return new IntegerTupleSketchValueAggregator(IntegerSummary.Mode.Sum); } @@ -51,7 +51,22 @@ byte[] getRandomRawValue(Random random) { } @Override - void assertAggregatedValue(Sketch starTreeResult, Sketch nonStarTreeResult) { - assertEquals(starTreeResult.getEstimate(), nonStarTreeResult.getEstimate()); + void assertAggregatedValue(Object starTreeResult, Object nonStarTreeResult) { + // Use error at (lgK=14, stddev=2) from: + // https://datasketches.apache.org/docs/Theta/ThetaErrorTable.html + double delta = (1 << 14) * 0.01563; + assertEquals(toSketch(starTreeResult).getEstimate(), toSketch(nonStarTreeResult).getEstimate(), delta); + } + + @SuppressWarnings("unchecked") + private Sketch toSketch(Object value) { + if (value instanceof Union) { + return ((Union) value).getResult(); + } else if (value instanceof Sketch) { + return ((Sketch) value); + } else { + throw new IllegalStateException( + "Unsupported data type for Integer Tuple Sketch aggregation: " + value.getClass().getSimpleName()); + } } } diff --git a/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/DistinctCountThetaSketchStarTreeV2Test.java b/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/DistinctCountThetaSketchStarTreeV2Test.java index 4e924c9d0c65..9fd34dc8c075 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/DistinctCountThetaSketchStarTreeV2Test.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/DistinctCountThetaSketchStarTreeV2Test.java @@ -20,6 +20,7 @@ import java.util.Random; import org.apache.datasketches.theta.Sketch; +import org.apache.datasketches.theta.Union; import org.apache.pinot.segment.local.aggregator.DistinctCountThetaSketchValueAggregator; import org.apache.pinot.segment.local.aggregator.ValueAggregator; import org.apache.pinot.spi.data.FieldSpec.DataType; @@ -27,10 +28,10 @@ import static org.testng.Assert.assertEquals; -public class DistinctCountThetaSketchStarTreeV2Test extends BaseStarTreeV2Test { +public class DistinctCountThetaSketchStarTreeV2Test extends BaseStarTreeV2Test { @Override - ValueAggregator getValueAggregator() { + ValueAggregator getValueAggregator() { return new DistinctCountThetaSketchValueAggregator(); } @@ -45,7 +46,21 @@ Object getRandomRawValue(Random random) { } @Override - void assertAggregatedValue(Sketch starTreeResult, Sketch nonStarTreeResult) { - assertEquals(starTreeResult.getEstimate(), nonStarTreeResult.getEstimate()); + void assertAggregatedValue(Object starTreeResult, Object nonStarTreeResult) { + // Use error at (lgK=14, stddev=2) from: + // https://datasketches.apache.org/docs/Theta/ThetaErrorTable.html + double delta = (1 << 14) * 0.01563; + assertEquals(toSketch(starTreeResult).getEstimate(), toSketch(nonStarTreeResult).getEstimate(), delta); + } + + private Sketch toSketch(Object value) { + if (value instanceof Union) { + return ((Union) value).getResult(); + } else if (value instanceof Sketch) { + return (Sketch) value; + } else { + throw new IllegalStateException( + "Unsupported data type for Theta Sketch aggregation: " + value.getClass().getSimpleName()); + } } } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/aggregator/DistinctCountCPCSketchValueAggregator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/aggregator/DistinctCountCPCSketchValueAggregator.java index 7ac3090188da..203b900a32da 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/aggregator/DistinctCountCPCSketchValueAggregator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/aggregator/DistinctCountCPCSketchValueAggregator.java @@ -28,13 +28,11 @@ import org.apache.pinot.spi.utils.CommonConstants; -public class DistinctCountCPCSketchValueAggregator implements ValueAggregator { +public class DistinctCountCPCSketchValueAggregator implements ValueAggregator { public static final DataType AGGREGATED_VALUE_TYPE = DataType.BYTES; private final int _lgK; - private int _maxByteSize; - public DistinctCountCPCSketchValueAggregator(List arguments) { // length 1 means we use the Helix default if (arguments.size() <= 1) { @@ -55,64 +53,61 @@ public DataType getAggregatedValueType() { } @Override - public CpcSketch getInitialAggregatedValue(Object rawValue) { - CpcSketch initialValue; + public Object getInitialAggregatedValue(Object rawValue) { + CpcUnion cpcUnion = new CpcUnion(_lgK); if (rawValue instanceof byte[]) { // Serialized Sketch byte[] bytes = (byte[]) rawValue; - initialValue = deserializeAggregatedValue(bytes); - _maxByteSize = Math.max(_maxByteSize, bytes.length); + cpcUnion.update(deserializeAggregatedValue(bytes)); } else if (rawValue instanceof byte[][]) { // Multiple Serialized Sketches byte[][] serializedSketches = (byte[][]) rawValue; - CpcUnion union = new CpcUnion(_lgK); for (byte[] bytes : serializedSketches) { - union.update(deserializeAggregatedValue(bytes)); + cpcUnion.update(deserializeAggregatedValue(bytes)); } - initialValue = union.getResult(); - updateMaxByteSize(initialValue); } else { - initialValue = empty(); - addObjectToSketch(rawValue, initialValue); - updateMaxByteSize(initialValue); + CpcSketch pristineSketch = empty(); + addObjectToSketch(rawValue, pristineSketch); + cpcUnion.update(pristineSketch); } - return initialValue; + return cpcUnion; } @Override - public CpcSketch applyRawValue(CpcSketch value, Object rawValue) { + public Object applyRawValue(Object aggregatedValue, Object rawValue) { + CpcUnion cpcUnion = extractUnion(aggregatedValue); if (rawValue instanceof byte[]) { byte[] bytes = (byte[]) rawValue; - CpcSketch sketch = union(value, deserializeAggregatedValue(bytes)); - updateMaxByteSize(sketch); - return sketch; + CpcSketch sketch = deserializeAggregatedValue(bytes); + cpcUnion.update(sketch); } else { - addObjectToSketch(rawValue, value); - updateMaxByteSize(value); - return value; + CpcSketch pristineSketch = empty(); + addObjectToSketch(rawValue, pristineSketch); + cpcUnion.update(pristineSketch); } + return cpcUnion; } @Override - public CpcSketch applyAggregatedValue(CpcSketch value, CpcSketch aggregatedValue) { - CpcSketch result = union(value, aggregatedValue); - updateMaxByteSize(result); - return result; + public Object applyAggregatedValue(Object value, Object aggregatedValue) { + CpcUnion cpcUnion = extractUnion(aggregatedValue); + CpcSketch sketch = extractSketch(value); + cpcUnion.update(sketch); + return cpcUnion; } @Override - public CpcSketch cloneAggregatedValue(CpcSketch value) { + public Object cloneAggregatedValue(Object value) { return deserializeAggregatedValue(serializeAggregatedValue(value)); } @Override public int getMaxAggregatedValueByteSize() { - // NOTE: For aggregated metrics, initial aggregated value might have not been generated. Returns the byte size - // based on lgK. - return _maxByteSize > 0 ? _maxByteSize : CpcSketch.getMaxSerializedBytes(_lgK); + return CpcSketch.getMaxSerializedBytes(_lgK); } @Override - public byte[] serializeAggregatedValue(CpcSketch value) { - return CustomSerDeUtils.DATA_SKETCH_CPC_SER_DE.serialize(value); + public byte[] serializeAggregatedValue(Object value) { + CpcSketch sketch = extractSketch(value); + return CustomSerDeUtils.DATA_SKETCH_CPC_SER_DE.serialize(sketch); } @Override @@ -181,9 +176,32 @@ private void addObjectsToSketch(Object[] rawValues, CpcSketch sketch) { } } - private void updateMaxByteSize(CpcSketch sketch) { - if (sketch != null) { - _maxByteSize = Math.max(_maxByteSize, sketch.toByteArray().length); + private CpcUnion extractUnion(Object value) { + if (value == null) { + return new CpcUnion(_lgK); + } else if (value instanceof CpcUnion) { + return (CpcUnion) value; + } else if (value instanceof CpcSketch) { + CpcSketch sketch = (CpcSketch) value; + CpcUnion cpcUnion = new CpcUnion(_lgK); + cpcUnion.update(sketch); + return cpcUnion; + } else { + throw new IllegalStateException( + "Unsupported data type for CPC Sketch aggregation: " + value.getClass().getSimpleName()); + } + } + + private CpcSketch extractSketch(Object value) { + if (value == null) { + return empty(); + } else if (value instanceof CpcUnion) { + return ((CpcUnion) value).getResult(); + } else if (value instanceof CpcSketch) { + return (CpcSketch) value; + } else { + throw new IllegalStateException( + "Unsupported data type for CPC Sketch aggregation: " + value.getClass().getSimpleName()); } } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/aggregator/DistinctCountThetaSketchValueAggregator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/aggregator/DistinctCountThetaSketchValueAggregator.java index f36f9a00e936..3222265f973c 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/aggregator/DistinctCountThetaSketchValueAggregator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/aggregator/DistinctCountThetaSketchValueAggregator.java @@ -18,29 +18,26 @@ */ package org.apache.pinot.segment.local.aggregator; -import java.util.Arrays; -import java.util.stream.StreamSupport; +import org.apache.datasketches.theta.SetOperationBuilder; import org.apache.datasketches.theta.Sketch; -import org.apache.datasketches.theta.Sketches; import org.apache.datasketches.theta.Union; -import org.apache.datasketches.theta.UpdateSketch; import org.apache.pinot.segment.local.utils.CustomSerDeUtils; import org.apache.pinot.segment.spi.AggregationFunctionType; import org.apache.pinot.spi.data.FieldSpec.DataType; import org.apache.pinot.spi.utils.CommonConstants; -public class DistinctCountThetaSketchValueAggregator implements ValueAggregator { +public class DistinctCountThetaSketchValueAggregator implements ValueAggregator { public static final DataType AGGREGATED_VALUE_TYPE = DataType.BYTES; - private final Union _union; + private final SetOperationBuilder _setOperationBuilder; // This changes a lot similar to the Bitmap aggregator private int _maxByteSize; public DistinctCountThetaSketchValueAggregator() { - // TODO: Handle configurable nominal entries for StarTreeBuilder - _union = Union.builder().setNominalEntries(CommonConstants.Helix.DEFAULT_THETA_SKETCH_NOMINAL_ENTRIES).buildUnion(); + _setOperationBuilder = + Union.builder().setNominalEntries(CommonConstants.Helix.DEFAULT_THETA_SKETCH_NOMINAL_ENTRIES); } @Override @@ -53,51 +50,49 @@ public DataType getAggregatedValueType() { return AGGREGATED_VALUE_TYPE; } - // Utility method to create a theta sketch with one item in it - private Sketch singleItemSketch(Object rawValue) { - // TODO: Handle configurable nominal entries for StarTreeBuilder - UpdateSketch sketch = - Sketches.updateSketchBuilder().setNominalEntries(CommonConstants.Helix.DEFAULT_THETA_SKETCH_NOMINAL_ENTRIES) - .build(); + private void singleItemUpdate(Union thetaUnion, Object rawValue) { if (rawValue instanceof String) { - sketch.update((String) rawValue); + thetaUnion.update((String) rawValue); } else if (rawValue instanceof Integer) { - sketch.update((Integer) rawValue); + thetaUnion.update((Integer) rawValue); } else if (rawValue instanceof Long) { - sketch.update((Long) rawValue); + thetaUnion.update((Long) rawValue); } else if (rawValue instanceof Double) { - sketch.update((Double) rawValue); + thetaUnion.update((Double) rawValue); } else if (rawValue instanceof Float) { - sketch.update((Float) rawValue); + thetaUnion.update((Float) rawValue); } else if (rawValue instanceof Object[]) { - addObjectsToSketch((Object[]) rawValue, sketch); + multiItemUpdate(thetaUnion, (Object[]) rawValue); + } else if (rawValue instanceof Sketch) { + thetaUnion.union((Sketch) rawValue); + } else if (rawValue instanceof Union) { + thetaUnion.union(((Union) rawValue).getResult()); } else { throw new IllegalStateException( "Unsupported data type for Theta Sketch aggregation: " + rawValue.getClass().getSimpleName()); } - return sketch.compact(); } - private void addObjectsToSketch(Object[] rawValues, UpdateSketch updateSketch) { + private void multiItemUpdate(Union thetaUnion, Object[] rawValues) { if (rawValues instanceof String[]) { for (String s : (String[]) rawValues) { - updateSketch.update(s); + thetaUnion.update(s); } } else if (rawValues instanceof Integer[]) { for (Integer i : (Integer[]) rawValues) { - updateSketch.update(i); + thetaUnion.update(i); } } else if (rawValues instanceof Long[]) { for (Long l : (Long[]) rawValues) { - updateSketch.update(l); + thetaUnion.update(l); } } else if (rawValues instanceof Double[]) { for (Double d : (Double[]) rawValues) { - updateSketch.update(d); + thetaUnion.update(d); } } else if (rawValues instanceof Float[]) { for (Float f : (Float[]) rawValues) { - updateSketch.update(f); + thetaUnion.update(f); } } else { throw new IllegalStateException( @@ -105,59 +100,64 @@ private void addObjectsToSketch(Object[] rawValues, UpdateSketch updateSketch) { } } - // Utility method to merge two sketches - private Sketch union(Sketch left, Sketch right) { - return _union.union(left, right); - } - - // Utility method to make an empty sketch - private Sketch empty() { - // TODO: Handle configurable nominal entries for StarTreeBuilder - return Sketches.updateSketchBuilder().setNominalEntries(CommonConstants.Helix.DEFAULT_THETA_SKETCH_NOMINAL_ENTRIES) - .build().compact(); - } - @Override - public Sketch getInitialAggregatedValue(Object rawValue) { - Sketch initialValue; + public Object getInitialAggregatedValue(Object rawValue) { + Union thetaUnion = _setOperationBuilder.buildUnion(); if (rawValue instanceof byte[]) { // Serialized Sketch byte[] bytes = (byte[]) rawValue; - initialValue = deserializeAggregatedValue(bytes); - _maxByteSize = Math.max(_maxByteSize, bytes.length); + Sketch sketch = deserializeAggregatedValue(bytes); + thetaUnion.union(sketch); } else if (rawValue instanceof byte[][]) { // Multiple Serialized Sketches byte[][] serializedSketches = (byte[][]) rawValue; - initialValue = StreamSupport.stream(Arrays.stream(serializedSketches).spliterator(), false) - .map(this::deserializeAggregatedValue).reduce(this::union).orElseGet(this::empty); - _maxByteSize = Math.max(_maxByteSize, initialValue.getCurrentBytes()); + for (byte[] sketchBytes : serializedSketches) { + thetaUnion.union(deserializeAggregatedValue(sketchBytes)); + } } else { - initialValue = singleItemSketch(rawValue); - _maxByteSize = Math.max(_maxByteSize, initialValue.getCurrentBytes()); + singleItemUpdate(thetaUnion, rawValue); + } + _maxByteSize = Math.max(_maxByteSize, thetaUnion.getCurrentBytes()); + return thetaUnion; + } + + private Union extractUnion(Object value) { + if (value == null) { + return _setOperationBuilder.buildUnion(); + } else if (value instanceof Union) { + return (Union) value; + } else if (value instanceof Sketch) { + Sketch sketch = (Sketch) value; + Union thetaUnion = _setOperationBuilder.buildUnion(); + thetaUnion.union(sketch); + return thetaUnion; + } else { + throw new IllegalStateException( + "Unsupported data type for Theta Sketch aggregation: " + value.getClass().getSimpleName()); } - return initialValue; } @Override - public Sketch applyRawValue(Sketch value, Object rawValue) { - Sketch right; + public Object applyRawValue(Object aggregatedValue, Object rawValue) { + Union thetaUnion = extractUnion(aggregatedValue); if (rawValue instanceof byte[]) { - right = deserializeAggregatedValue((byte[]) rawValue); + Sketch sketch = deserializeAggregatedValue((byte[]) rawValue); + thetaUnion.union(sketch); } else { - right = singleItemSketch(rawValue); + singleItemUpdate(thetaUnion, rawValue); } - Sketch result = union(value, right).compact(); - _maxByteSize = Math.max(_maxByteSize, result.getCurrentBytes()); - return result; + _maxByteSize = Math.max(_maxByteSize, thetaUnion.getCurrentBytes()); + return thetaUnion; } @Override - public Sketch applyAggregatedValue(Sketch value, Sketch aggregatedValue) { - Sketch result = union(value, aggregatedValue); - _maxByteSize = Math.max(_maxByteSize, result.getCurrentBytes()); - return result; + public Object applyAggregatedValue(Object value, Object aggregatedValue) { + Union thetaUnion = extractUnion(aggregatedValue); + singleItemUpdate(thetaUnion, value); + _maxByteSize = Math.max(_maxByteSize, thetaUnion.getCurrentBytes()); + return thetaUnion; } @Override - public Sketch cloneAggregatedValue(Sketch value) { + public Object cloneAggregatedValue(Object value) { return deserializeAggregatedValue(serializeAggregatedValue(value)); } @@ -167,8 +167,15 @@ public int getMaxAggregatedValueByteSize() { } @Override - public byte[] serializeAggregatedValue(Sketch value) { - return CustomSerDeUtils.DATA_SKETCH_THETA_SER_DE.serialize(value); + public byte[] serializeAggregatedValue(Object value) { + if (value instanceof Union) { + return CustomSerDeUtils.DATA_SKETCH_THETA_SER_DE.serialize(((Union) value).getResult()); + } else if (value instanceof Sketch) { + return CustomSerDeUtils.DATA_SKETCH_THETA_SER_DE.serialize(((Sketch) value)); + } else { + throw new IllegalStateException( + "Unsupported data type for Theta Sketch aggregation: " + value.getClass().getSimpleName()); + } } @Override diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/aggregator/IntegerTupleSketchValueAggregator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/aggregator/IntegerTupleSketchValueAggregator.java index 1440e738d1d4..87d5c0f97e0c 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/aggregator/IntegerTupleSketchValueAggregator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/aggregator/IntegerTupleSketchValueAggregator.java @@ -25,17 +25,19 @@ import org.apache.pinot.segment.local.utils.CustomSerDeUtils; import org.apache.pinot.segment.spi.AggregationFunctionType; import org.apache.pinot.spi.data.FieldSpec.DataType; +import org.apache.pinot.spi.utils.CommonConstants; -public class IntegerTupleSketchValueAggregator implements ValueAggregator> { +@SuppressWarnings("unchecked") +public class IntegerTupleSketchValueAggregator implements ValueAggregator { public static final DataType AGGREGATED_VALUE_TYPE = DataType.BYTES; - // This changes a lot similar to the Bitmap aggregator - private int _maxByteSize; + private final int _nominalEntries; private final IntegerSummary.Mode _mode; public IntegerTupleSketchValueAggregator(IntegerSummary.Mode mode) { + _nominalEntries = 1 << CommonConstants.Helix.DEFAULT_TUPLE_SKETCH_LGK; _mode = mode; } @@ -49,47 +51,85 @@ public DataType getAggregatedValueType() { return AGGREGATED_VALUE_TYPE; } - // Utility method to merge two sketches - private Sketch union(Sketch a, Sketch b) { - return new Union<>(new IntegerSummarySetOperations(_mode, _mode)).union(a, b); - } - @Override - public Sketch getInitialAggregatedValue(byte[] rawValue) { + public Object getInitialAggregatedValue(byte[] rawValue) { Sketch initialValue = deserializeAggregatedValue(rawValue); - _maxByteSize = Math.max(_maxByteSize, rawValue.length); - return initialValue; + Union tupleUnion = new Union<>(_nominalEntries, new IntegerSummarySetOperations(_mode, _mode)); + tupleUnion.union(initialValue); + return tupleUnion; + } + + private Union extractUnion(Object value) { + if (value == null) { + return new Union<>(_nominalEntries, new IntegerSummarySetOperations(_mode, _mode)); + } else if (value instanceof Union) { + return (Union) value; + } else if (value instanceof Sketch) { + Sketch sketch = (Sketch) value; + Union tupleUnion = new Union<>(_nominalEntries, new IntegerSummarySetOperations(_mode, _mode)); + tupleUnion.union(sketch); + return tupleUnion; + } else { + throw new IllegalStateException( + "Unsupported data type for Integer Tuple Sketch aggregation: " + value.getClass().getSimpleName()); + } + } + + private Sketch extractSketch(Object value) { + if (value instanceof Union) { + return ((Union) value).getResult(); + } else if (value instanceof Sketch) { + return ((Sketch) value); + } else { + throw new IllegalStateException( + "Unsupported data type for Integer Tuple Sketch aggregation: " + value.getClass().getSimpleName()); + } } @Override - public Sketch applyRawValue(Sketch value, byte[] rawValue) { - Sketch right = deserializeAggregatedValue(rawValue); - Sketch result = union(value, right).compact(); - _maxByteSize = Math.max(_maxByteSize, result.toByteArray().length); - return result; + public Object applyRawValue(Object aggregatedValue, byte[] rawValue) { + Union tupleUnion = extractUnion(aggregatedValue); + tupleUnion.union(deserializeAggregatedValue(rawValue)); + return tupleUnion; } @Override - public Sketch applyAggregatedValue(Sketch value, - Sketch aggregatedValue) { - Sketch result = union(value, aggregatedValue); - _maxByteSize = Math.max(_maxByteSize, result.toByteArray().length); - return result; + public Object applyAggregatedValue(Object value, Object aggregatedValue) { + Union tupleUnion = extractUnion(aggregatedValue); + Sketch sketch = extractSketch(value); + tupleUnion.union(sketch); + return tupleUnion; } @Override - public Sketch cloneAggregatedValue(Sketch value) { + public Object cloneAggregatedValue(Object value) { return deserializeAggregatedValue(serializeAggregatedValue(value)); } + /** + * Returns the maximum number of storage bytes required for a Compact Integer Tuple Sketch with the given + * number of actual entries. Note that this assumes the worst case of the sketch in + * estimation mode, which requires storing theta and count. + * @return the maximum number of storage bytes required for a Compact Integer Tuple Sketch with the given number + * of entries. + */ @Override public int getMaxAggregatedValueByteSize() { - return _maxByteSize; + if (_nominalEntries == 0) { + return 8; + } + if (_nominalEntries == 1) { + return 16; + } + int longSizeInBytes = Long.BYTES; + int intSizeInBytes = Integer.BYTES; + return (_nominalEntries * (longSizeInBytes + intSizeInBytes)) + 24; } @Override - public byte[] serializeAggregatedValue(Sketch value) { - return CustomSerDeUtils.DATA_SKETCH_INT_TUPLE_SER_DE.serialize(value); + public byte[] serializeAggregatedValue(Object value) { + Sketch sketch = extractSketch(value); + return sketch.compact().toByteArray(); } @Override diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/aggregator/DistinctCountCPCSketchValueAggregatorTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/aggregator/DistinctCountCPCSketchValueAggregatorTest.java index b8dcb701f5ed..c9bc80f8264f 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/aggregator/DistinctCountCPCSketchValueAggregatorTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/aggregator/DistinctCountCPCSketchValueAggregatorTest.java @@ -34,19 +34,18 @@ public class DistinctCountCPCSketchValueAggregatorTest { @Test public void initialShouldCreateSingleItemSketch() { DistinctCountCPCSketchValueAggregator agg = new DistinctCountCPCSketchValueAggregator(Collections.emptyList()); - assertEquals(agg.getInitialAggregatedValue("hello world").getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue("hello world")).getEstimate(), 1.0); } @Test public void initialShouldParseASketch() { - CpcSketch input = new CpcSketch(); - IntStream.range(0, 1000).forEach(input::update); + CpcSketch input = new CpcSketch(12); + IntStream.range(0, 100).forEach(input::update); DistinctCountCPCSketchValueAggregator agg = new DistinctCountCPCSketchValueAggregator(Collections.emptyList()); byte[] bytes = agg.serializeAggregatedValue(input); - assertEquals(agg.getInitialAggregatedValue(bytes).getEstimate(), input.getEstimate()); - - // and should update the max size - assertEquals(agg.getMaxAggregatedValueByteSize(), input.toByteArray().length); + assertEquals(Math.round(toSketch(agg.getInitialAggregatedValue(bytes)).getEstimate()), + Math.round(input.getEstimate())); + assertEquals(agg.getMaxAggregatedValueByteSize(), 2580); } @Test @@ -57,7 +56,7 @@ public void initialShouldParseMultiValueSketches() { input2.update("world"); DistinctCountCPCSketchValueAggregator agg = new DistinctCountCPCSketchValueAggregator(Collections.emptyList()); byte[][] bytes = {agg.serializeAggregatedValue(input1), agg.serializeAggregatedValue(input2)}; - assertEquals(Math.round(agg.getInitialAggregatedValue(bytes).getEstimate()), 2); + assertEquals(Math.round(toSketch(agg.getInitialAggregatedValue(bytes)).getEstimate()), 2); } @Test @@ -67,7 +66,7 @@ public void applyAggregatedValueShouldUnion() { CpcSketch input2 = new CpcSketch(); IntStream.range(0, 1000).forEach(input2::update); DistinctCountCPCSketchValueAggregator agg = new DistinctCountCPCSketchValueAggregator(Collections.emptyList()); - CpcSketch result = agg.applyAggregatedValue(input1, input2); + CpcSketch result = toSketch(agg.applyAggregatedValue(input1, input2)); CpcUnion union = new CpcUnion(CommonConstants.Helix.DEFAULT_CPC_SKETCH_LGK); union.update(input1); @@ -75,9 +74,7 @@ public void applyAggregatedValueShouldUnion() { CpcSketch merged = union.getResult(); assertEquals(result.getEstimate(), merged.getEstimate()); - - // and should update the max size - assertEquals(agg.getMaxAggregatedValueByteSize(), merged.toByteArray().length); + assertEquals(agg.getMaxAggregatedValueByteSize(), 2580); } @Test @@ -88,7 +85,7 @@ public void applyRawValueShouldUnion() { IntStream.range(0, 1000).forEach(input2::update); DistinctCountCPCSketchValueAggregator agg = new DistinctCountCPCSketchValueAggregator(Collections.emptyList()); byte[] result2bytes = agg.serializeAggregatedValue(input2); - CpcSketch result = agg.applyRawValue(input1, result2bytes); + CpcSketch result = toSketch(agg.applyRawValue(input1, result2bytes)); CpcUnion union = new CpcUnion(CommonConstants.Helix.DEFAULT_CPC_SKETCH_LGK); union.update(input1); @@ -96,9 +93,7 @@ public void applyRawValueShouldUnion() { CpcSketch merged = union.getResult(); assertEquals(result.getEstimate(), merged.getEstimate()); - - // and should update the max size - assertEquals(agg.getMaxAggregatedValueByteSize(), merged.toByteArray().length); + assertEquals(agg.getMaxAggregatedValueByteSize(), 2580); } @Test @@ -106,13 +101,13 @@ public void applyRawValueShouldAdd() { CpcSketch input1 = new CpcSketch(); input1.update("hello".hashCode()); DistinctCountCPCSketchValueAggregator agg = new DistinctCountCPCSketchValueAggregator(Collections.emptyList()); - CpcSketch result = agg.applyRawValue(input1, "world"); + CpcSketch result = toSketch(agg.applyRawValue(input1, "world")); assertEquals(Math.round(result.getEstimate()), 2); CpcSketch pristine = new CpcSketch(); pristine.update("hello"); pristine.update("world"); - assertEquals(agg.getMaxAggregatedValueByteSize(), pristine.toByteArray().length); + assertEquals(agg.getMaxAggregatedValueByteSize(), 2580); } @Test @@ -121,7 +116,7 @@ public void applyRawValueShouldSupportMultiValue() { input1.update("hello"); DistinctCountCPCSketchValueAggregator agg = new DistinctCountCPCSketchValueAggregator(Collections.emptyList()); String[] strings = {"hello", "world", "this", "is", "some", "strings"}; - CpcSketch result = agg.applyRawValue(input1, strings); + CpcSketch result = toSketch(agg.applyRawValue(input1, strings)); assertEquals(Math.round(result.getEstimate()), 6); @@ -129,16 +124,16 @@ public void applyRawValueShouldSupportMultiValue() { for (String value : strings) { pristine.update(value); } - assertEquals(agg.getMaxAggregatedValueByteSize(), pristine.toByteArray().length); + assertEquals(agg.getMaxAggregatedValueByteSize(), 2580); } @Test public void getInitialValueShouldSupportDifferentTypes() { DistinctCountCPCSketchValueAggregator agg = new DistinctCountCPCSketchValueAggregator(Collections.emptyList()); - assertEquals(agg.getInitialAggregatedValue(12345).getEstimate(), 1.0); - assertEquals(agg.getInitialAggregatedValue(12345L).getEstimate(), 1.0); - assertEquals(agg.getInitialAggregatedValue(12.345f).getEstimate(), 1.0); - assertEquals(agg.getInitialAggregatedValue(12.345d).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(12345)).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(12345L)).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(12.345f)).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(12.345d)).getEstimate(), 1.0); assertThrows(() -> agg.getInitialAggregatedValue(new Object())); } @@ -146,16 +141,27 @@ public void getInitialValueShouldSupportDifferentTypes() { public void getInitialValueShouldSupportMultiValueTypes() { DistinctCountCPCSketchValueAggregator agg = new DistinctCountCPCSketchValueAggregator(Collections.emptyList()); Integer[] ints = {12345}; - assertEquals(agg.getInitialAggregatedValue(ints).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(ints)).getEstimate(), 1.0); Long[] longs = {12345L}; - assertEquals(agg.getInitialAggregatedValue(longs).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(longs)).getEstimate(), 1.0); Float[] floats = {12.345f}; - assertEquals(agg.getInitialAggregatedValue(floats).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(floats)).getEstimate(), 1.0); Double[] doubles = {12.345d}; - assertEquals(agg.getInitialAggregatedValue(doubles).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(doubles)).getEstimate(), 1.0); Object[] objects = {new Object()}; assertThrows(() -> agg.getInitialAggregatedValue(objects)); byte[][] zeroSketches = {}; - assertEquals(agg.getInitialAggregatedValue(zeroSketches).getEstimate(), 0.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(zeroSketches)).getEstimate(), 0.0); + } + + private CpcSketch toSketch(Object value) { + if (value instanceof CpcUnion) { + return ((CpcUnion) value).getResult(); + } else if (value instanceof CpcSketch) { + return (CpcSketch) value; + } else { + throw new IllegalStateException( + "Unsupported data type for CPC Sketch aggregation: " + value.getClass().getSimpleName()); + } } } diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/aggregator/DistinctCountThetaSketchValueAggregatorTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/aggregator/DistinctCountThetaSketchValueAggregatorTest.java index fdc820c1200f..8bbdb9443a06 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/aggregator/DistinctCountThetaSketchValueAggregatorTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/aggregator/DistinctCountThetaSketchValueAggregatorTest.java @@ -37,7 +37,7 @@ public class DistinctCountThetaSketchValueAggregatorTest { @Test public void initialShouldCreateSingleItemSketch() { DistinctCountThetaSketchValueAggregator agg = new DistinctCountThetaSketchValueAggregator(); - assertEquals(agg.getInitialAggregatedValue("hello world").getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue("hello world")).getEstimate(), 1.0); } @Test @@ -47,10 +47,13 @@ public void initialShouldParseASketch() { Sketch result = input.compact(); DistinctCountThetaSketchValueAggregator agg = new DistinctCountThetaSketchValueAggregator(); byte[] bytes = agg.serializeAggregatedValue(result); - assertEquals(agg.getInitialAggregatedValue(bytes).getEstimate(), result.getEstimate()); - + Sketch initSketch = toSketch(agg.getInitialAggregatedValue(bytes)); + Union union = + Union.builder().setNominalEntries(CommonConstants.Helix.DEFAULT_THETA_SKETCH_NOMINAL_ENTRIES).buildUnion(); + union.union(initSketch); + assertEquals(initSketch.getEstimate(), result.getEstimate()); // and should update the max size - assertEquals(agg.getMaxAggregatedValueByteSize(), result.getCurrentBytes()); + assertEquals(agg.getMaxAggregatedValueByteSize(), union.getCurrentBytes()); } @Test @@ -61,7 +64,7 @@ public void initialShouldParseMultiValueSketches() { input2.update("world"); DistinctCountThetaSketchValueAggregator agg = new DistinctCountThetaSketchValueAggregator(); byte[][] bytes = {agg.serializeAggregatedValue(input1), agg.serializeAggregatedValue(input2)}; - assertEquals(agg.getInitialAggregatedValue(bytes).getEstimate(), 2.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(bytes)).getEstimate(), 2.0); } @Test @@ -73,16 +76,14 @@ public void applyAggregatedValueShouldUnion() { IntStream.range(0, 1000).forEach(input2::update); Sketch result2 = input2.compact(); DistinctCountThetaSketchValueAggregator agg = new DistinctCountThetaSketchValueAggregator(); - Sketch result = agg.applyAggregatedValue(result1, result2); + Sketch result = toSketch(agg.applyAggregatedValue(result1, result2)); Union union = Union.builder().setNominalEntries(CommonConstants.Helix.DEFAULT_THETA_SKETCH_NOMINAL_ENTRIES).buildUnion(); - - Sketch merged = union.union(result1, result2); - + union.union(result1); + union.union(result2); + Sketch merged = union.getResult(); assertEquals(result.getEstimate(), merged.getEstimate()); - - // and should update the max size - assertEquals(agg.getMaxAggregatedValueByteSize(), merged.getCurrentBytes()); + assertEquals(agg.getMaxAggregatedValueByteSize(), union.getCurrentBytes()); } @Test @@ -95,16 +96,15 @@ public void applyRawValueShouldUnion() { Sketch result2 = input2.compact(); DistinctCountThetaSketchValueAggregator agg = new DistinctCountThetaSketchValueAggregator(); byte[] result2bytes = agg.serializeAggregatedValue(result2); - Sketch result = agg.applyRawValue(result1, result2bytes); + Sketch result = toSketch(agg.applyRawValue(result1, result2bytes)); Union union = Union.builder().setNominalEntries(CommonConstants.Helix.DEFAULT_THETA_SKETCH_NOMINAL_ENTRIES).buildUnion(); - - Sketch merged = union.union(result1, result2); - + union.union(result1); + union.union(result2); + Sketch merged = union.getResult(); assertEquals(result.getEstimate(), merged.getEstimate()); - // and should update the max size - assertEquals(agg.getMaxAggregatedValueByteSize(), merged.getCurrentBytes()); + assertEquals(agg.getMaxAggregatedValueByteSize(), union.getCurrentBytes()); } @Test @@ -113,13 +113,13 @@ public void applyRawValueShouldAdd() { input1.update("hello".hashCode()); Sketch result1 = input1.compact(); DistinctCountThetaSketchValueAggregator agg = new DistinctCountThetaSketchValueAggregator(); - Sketch result = agg.applyRawValue(result1, "world"); - + Sketch result = toSketch(agg.applyRawValue(result1, "world")); + Union union = + Union.builder().setNominalEntries(CommonConstants.Helix.DEFAULT_THETA_SKETCH_NOMINAL_ENTRIES).buildUnion(); + union.union(result); assertEquals(result.getEstimate(), 2.0); - // and should update the max size - assertEquals(agg.getMaxAggregatedValueByteSize(), 32 // may change in future versions of datasketches - ); + assertEquals(agg.getMaxAggregatedValueByteSize(), union.getCurrentBytes()); } @Test @@ -129,22 +129,22 @@ public void applyRawValueShouldSupportMultiValue() { Sketch result1 = input1.compact(); DistinctCountThetaSketchValueAggregator agg = new DistinctCountThetaSketchValueAggregator(); String[] strings = {"hello", "world", "this", "is", "some", "strings"}; - Sketch result = agg.applyRawValue(result1, (Object) strings); - + Sketch result = toSketch(agg.applyRawValue(result1, (Object) strings)); + Union union = + Union.builder().setNominalEntries(CommonConstants.Helix.DEFAULT_THETA_SKETCH_NOMINAL_ENTRIES).buildUnion(); + union.union(result); assertEquals(result.getEstimate(), 6.0); - // and should update the max size - assertEquals(agg.getMaxAggregatedValueByteSize(), 64 // may change in future versions of datasketches - ); + assertEquals(agg.getMaxAggregatedValueByteSize(), union.getCurrentBytes()); } @Test public void getInitialValueShouldSupportDifferentTypes() { DistinctCountThetaSketchValueAggregator agg = new DistinctCountThetaSketchValueAggregator(); - assertEquals(agg.getInitialAggregatedValue(12345).getEstimate(), 1.0); - assertEquals(agg.getInitialAggregatedValue(12345L).getEstimate(), 1.0); - assertEquals(agg.getInitialAggregatedValue(12.345f).getEstimate(), 1.0); - assertEquals(agg.getInitialAggregatedValue(12.345d).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(12345)).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(12345L)).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(12.345f)).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(12.345d)).getEstimate(), 1.0); assertThrows(() -> agg.getInitialAggregatedValue(new Object())); } @@ -152,17 +152,17 @@ public void getInitialValueShouldSupportDifferentTypes() { public void getInitialValueShouldSupportMultiValueTypes() { DistinctCountThetaSketchValueAggregator agg = new DistinctCountThetaSketchValueAggregator(); Integer[] ints = {12345}; - assertEquals(agg.getInitialAggregatedValue(ints).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(ints)).getEstimate(), 1.0); Long[] longs = {12345L}; - assertEquals(agg.getInitialAggregatedValue(longs).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(longs)).getEstimate(), 1.0); Float[] floats = {12.345f}; - assertEquals(agg.getInitialAggregatedValue(floats).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(floats)).getEstimate(), 1.0); Double[] doubles = {12.345d}; - assertEquals(agg.getInitialAggregatedValue(doubles).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(doubles)).getEstimate(), 1.0); Object[] objects = {new Object()}; assertThrows(() -> agg.getInitialAggregatedValue(objects)); byte[][] zeroSketches = {}; - assertEquals(agg.getInitialAggregatedValue(zeroSketches).getEstimate(), 0.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(zeroSketches)).getEstimate(), 0.0); } @Test @@ -172,7 +172,18 @@ public void shouldRetainSketchOrdering() { Sketch unordered = input.compact(false, null); Sketch ordered = input.compact(true, null); DistinctCountThetaSketchValueAggregator agg = new DistinctCountThetaSketchValueAggregator(); - assertTrue(agg.cloneAggregatedValue(ordered).isOrdered()); - assertFalse(agg.cloneAggregatedValue(unordered).isOrdered()); + assertTrue(toSketch(agg.cloneAggregatedValue(ordered)).isOrdered()); + assertFalse(toSketch(agg.cloneAggregatedValue(unordered)).isOrdered()); + } + + private Sketch toSketch(Object value) { + if (value instanceof Union) { + return ((Union) value).getResult(); + } else if (value instanceof Sketch) { + return (Sketch) value; + } else { + throw new IllegalStateException( + "Unsupported data type for Theta Sketch aggregation: " + value.getClass().getSimpleName()); + } } } diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/aggregator/IntegerTupleSketchValueAggregatorTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/aggregator/IntegerTupleSketchValueAggregatorTest.java index d108d799b040..cfc8b88f8ed7 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/aggregator/IntegerTupleSketchValueAggregatorTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/aggregator/IntegerTupleSketchValueAggregatorTest.java @@ -19,6 +19,7 @@ package org.apache.pinot.segment.local.aggregator; import org.apache.datasketches.tuple.Sketch; +import org.apache.datasketches.tuple.Union; import org.apache.datasketches.tuple.aninteger.IntegerSketch; import org.apache.datasketches.tuple.aninteger.IntegerSummary; import org.testng.annotations.Test; @@ -32,12 +33,12 @@ private byte[] sketchContaining(String key, int value) { IntegerSketch is = new IntegerSketch(16, IntegerSummary.Mode.Sum); is.update(key, value); return is.compact().toByteArray(); - }; + } @Test public void initialShouldParseASketch() { IntegerTupleSketchValueAggregator agg = new IntegerTupleSketchValueAggregator(IntegerSummary.Mode.Sum); - assertEquals(agg.getInitialAggregatedValue(sketchContaining("hello world", 1)).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(sketchContaining("hello world", 1))).getEstimate(), 1.0); } @Test @@ -47,11 +48,9 @@ public void applyAggregatedValueShouldUnion() { s1.update("a", 1); s2.update("b", 1); IntegerTupleSketchValueAggregator agg = new IntegerTupleSketchValueAggregator(IntegerSummary.Mode.Sum); - Sketch merged = agg.applyAggregatedValue(s1, s2); + Sketch merged = toSketch(agg.applyAggregatedValue(s1, s2)); assertEquals(merged.getEstimate(), 2.0); - - // and should update the max size - assertEquals(agg.getMaxAggregatedValueByteSize(), agg.serializeAggregatedValue(merged).length); + assertEquals(agg.getMaxAggregatedValueByteSize(), 196632); } @Test @@ -61,10 +60,20 @@ public void applyRawValueShouldUnion() { s1.update("a", 1); s2.update("b", 1); IntegerTupleSketchValueAggregator agg = new IntegerTupleSketchValueAggregator(IntegerSummary.Mode.Sum); - Sketch merged = agg.applyRawValue(s1, agg.serializeAggregatedValue(s2)); + Sketch merged = toSketch(agg.applyRawValue(s1, agg.serializeAggregatedValue(s2))); assertEquals(merged.getEstimate(), 2.0); + assertEquals(agg.getMaxAggregatedValueByteSize(), 196632); + } - // and should update the max size - assertEquals(agg.getMaxAggregatedValueByteSize(), agg.serializeAggregatedValue(merged).length); + @SuppressWarnings("unchecked") + private Sketch toSketch(Object value) { + if (value instanceof Union) { + return ((Union) value).getResult(); + } else if (value instanceof Sketch) { + return ((Sketch) value); + } else { + throw new IllegalStateException( + "Unsupported data type for Integer Tuple Sketch aggregation: " + value.getClass().getSimpleName()); + } } } diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java index befd5b57633e..24ea49cfa101 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java @@ -107,7 +107,9 @@ public static class Helix { // https://datasketches.apache.org/docs/Theta/ThetaErrorTable.html public static final int DEFAULT_THETA_SKETCH_NOMINAL_ENTRIES = 16384; - public static final int DEFAULT_TUPLE_SKETCH_LGK = 16; + // 2 to the power of 14, for tradeoffs see datasketches library documentation: + // https://datasketches.apache.org/docs/Theta/ThetaErrorTable.html + public static final int DEFAULT_TUPLE_SKETCH_LGK = 14; public static final int DEFAULT_CPC_SKETCH_LGK = 12; public static final int DEFAULT_ULTRALOGLOG_P = 12; From 0f28a5cc6f58688040c6e20c458581a1077ed0cd Mon Sep 17 00:00:00 2001 From: rohit Date: Thu, 2 May 2024 01:35:14 +0530 Subject: [PATCH 100/102] fix merging null multi value in partial upsert (#13031) --- .../local/upsert/PartialUpsertHandler.java | 16 ++++++++++++++-- .../local/upsert/PartialUpsertHandlerTest.java | 9 +++++++-- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/PartialUpsertHandler.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/PartialUpsertHandler.java index 118412ab7725..ad73de9d70f5 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/PartialUpsertHandler.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/PartialUpsertHandler.java @@ -18,6 +18,7 @@ */ package org.apache.pinot.segment.local.upsert; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.TreeMap; @@ -47,12 +48,24 @@ public class PartialUpsertHandler { private final TreeMap _fieldSpecMap; private final PartialUpsertMerger _partialUpsertMerger; + private final Map _defaultNullValues = new HashMap<>(); + public PartialUpsertHandler(Schema schema, List comparisonColumns, UpsertConfig upsertConfig) { _primaryKeyColumns = schema.getPrimaryKeyColumns(); _comparisonColumns = comparisonColumns; _fieldSpecMap = schema.getFieldSpecMap(); _partialUpsertMerger = PartialUpsertMergerFactory.getPartialUpsertMerger(_primaryKeyColumns, comparisonColumns, upsertConfig); + // cache default null values to handle null merger results + for (Map.Entry entry : schema.getFieldSpecMap().entrySet()) { + String column = entry.getKey(); + FieldSpec fieldSpec = entry.getValue(); + if (fieldSpec.isSingleValueField()) { + _defaultNullValues.put(column, fieldSpec.getDefaultNullValue()); + } else { + _defaultNullValues.put(column, new Object[]{fieldSpec.getDefaultNullValue()}); + } + } } public void merge(LazyRow previousRow, GenericRow newRow, Map resultHolder) { @@ -83,8 +96,7 @@ private void setMergedValue(GenericRow row, String column, @Nullable Object merg row.removeNullValueField(column); row.putValue(column, mergedValue); } else { - // if column exists but mapped to a null value then merger result was a null value - row.putDefaultNullValue(column, _fieldSpecMap.get(column).getDefaultNullValue()); + row.putDefaultNullValue(column, _defaultNullValues.get(column)); } } } diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/upsert/PartialUpsertHandlerTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/upsert/PartialUpsertHandlerTest.java index 4b954aa1400e..fc8fdbdefbe4 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/upsert/PartialUpsertHandlerTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/upsert/PartialUpsertHandlerTest.java @@ -86,11 +86,14 @@ public void testCustomPartialUpsertMergerWithNullResult() { newRowData.put("hoursSinceEpoch", null); // testing null comparison column GenericRow newRecord = initGenericRow(new GenericRow(), newRowData); LazyRow prevRecord = mock(LazyRow.class); - mockLazyRow(prevRecord, Map.of("pk", "pk1", "field1", 5L, "field2", "set", "hoursSinceEpoch", 2L)); - Map expectedData = new HashMap<>(Map.of("pk", "pk1", "field2", "reset", "hoursSinceEpoch", 2L)); + mockLazyRow(prevRecord, + Map.of("pk", "pk1", "field1", 5L, "field2", "set", "field3", new Integer[]{0}, "hoursSinceEpoch", 2L)); + Map expectedData = new HashMap<>( + Map.of("pk", "pk1", "field2", "reset", "hoursSinceEpoch", 2L)); expectedData.put("field1", Long.MIN_VALUE); GenericRow expectedRecord = initGenericRow(new GenericRow(), expectedData); expectedRecord.addNullValueField("field1"); + expectedRecord.putDefaultNullValue("field3", new Object[]{Integer.MIN_VALUE}); testCustomMerge(prevRecord, newRecord, expectedRecord, getCustomMerger()); } @@ -138,6 +141,7 @@ private void testCustomMerge(LazyRow prevRecord, GenericRow newRecord, GenericRo Schema schema = new Schema.SchemaBuilder().addSingleValueDimension("pk", FieldSpec.DataType.STRING) .addSingleValueDimension("field1", FieldSpec.DataType.LONG) .addSingleValueDimension("field2", FieldSpec.DataType.STRING) + .addMultiValueDimension("field3", FieldSpec.DataType.INT) .addDateTime("hoursSinceEpoch", FieldSpec.DataType.LONG, "1:HOURS:EPOCH", "1:HOURS") .setPrimaryKeyColumns(Arrays.asList("pk")).build(); @@ -169,6 +173,7 @@ public PartialUpsertMerger getCustomMerger() { } if ((newRow.getValue("field2")).equals("reset")) { resultHolder.put("field1", null); + resultHolder.put("field3", null); } }; } From c8b223f1fc2ab8df958175f8ae0c4b17902a2061 Mon Sep 17 00:00:00 2001 From: Abhishek Sharma Date: Wed, 1 May 2024 16:17:48 -0400 Subject: [PATCH 101/102] Upgrade lucene to 9.10.0 and compatibility changes to code. (#12866) --- .../impl/vector/MutableVectorIndex.java | 2 +- .../impl/inv/text/LuceneFSTIndexCreator.java | 2 +- .../impl/text/LuceneTextIndexCreator.java | 4 +-- .../impl/vector/HnswVectorIndexCreator.java | 3 +- .../{lucene95 => lucene99}/HnswCodec.java | 14 ++++---- .../HnswVectorsFormat.java | 10 +++--- .../SegmentV1V2ToV3FormatConverter.java | 8 ++--- .../local/segment/index/fst/FstIndexType.java | 3 +- .../loader/invertedindex/FSTIndexHandler.java | 4 +-- .../invertedindex/VectorIndexHandler.java | 5 +-- .../index/readers/LuceneFSTIndexReader.java | 3 +- .../segment/index/text/TextIndexType.java | 3 +- .../segment/index/vector/VectorIndexType.java | 3 +- .../local/segment/store/TextIndexUtils.java | 5 ++- .../local/segment/store/VectorIndexUtils.java | 24 +++++++------ .../segment/local/utils/fst/FSTBuilder.java | 13 ++++--- .../nativefst/NativeFSTIndexCreator.java | 2 +- .../creator/LuceneFSTIndexCreatorTest.java | 4 +-- .../creator/NativeFSTIndexCreatorTest.java | 4 +-- .../segment/index/loader/LoaderTest.java | 36 +++++++++---------- .../index/loader/SegmentPreProcessorTest.java | 4 +-- .../store/FilePerIndexDirectoryTest.java | 4 +-- .../store/SingleFileIndexDirectoryTest.java | 4 +-- .../local/utils/fst/FSTBuilderTest.java | 2 -- .../apache/pinot/segment/spi/V1Constants.java | 4 +++ .../spi/store/SegmentDirectoryPaths.java | 26 +++++++++++--- pom.xml | 2 +- 27 files changed, 117 insertions(+), 81 deletions(-) rename pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/{lucene95 => lucene99}/HnswCodec.java (92%) rename pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/{lucene95 => lucene99}/HnswVectorsFormat.java (92%) diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/vector/MutableVectorIndex.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/vector/MutableVectorIndex.java index a591650be4e7..47329a1c2a25 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/vector/MutableVectorIndex.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/vector/MutableVectorIndex.java @@ -83,7 +83,7 @@ public MutableVectorIndex(String segmentName, String vectorColumn, VectorIndexCo // to V3 if segmentVersion is set to V3 in SegmentGeneratorConfig. _indexDir = new File(FileUtils.getTempDirectory(), segmentName); _indexDirectory = FSDirectory.open( - new File(_indexDir, _vectorColumn + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION).toPath()); + new File(_indexDir, _vectorColumn + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION).toPath()); LOGGER.info("Creating mutable HNSW index for segment: {}, column: {} at path: {} with {}", segmentName, vectorColumn, _indexDir.getAbsolutePath(), vectorIndexConfig.getProperties()); _indexWriter = new IndexWriter(_indexDirectory, VectorIndexUtils.getIndexWriterConfig(vectorIndexConfig)); diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/text/LuceneFSTIndexCreator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/text/LuceneFSTIndexCreator.java index 60b903739bb1..2e51c19096a1 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/text/LuceneFSTIndexCreator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/text/LuceneFSTIndexCreator.java @@ -55,7 +55,7 @@ public class LuceneFSTIndexCreator implements FSTIndexCreator { */ public LuceneFSTIndexCreator(File indexDir, String columnName, String[] sortedEntries) throws IOException { - _fstIndexFile = new File(indexDir, columnName + V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION); + _fstIndexFile = new File(indexDir, columnName + V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION); _fstBuilder = new FSTBuilder(); _dictId = 0; diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/LuceneTextIndexCreator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/LuceneTextIndexCreator.java index 49306d9404af..2cdbf13f6af4 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/LuceneTextIndexCreator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/LuceneTextIndexCreator.java @@ -330,7 +330,7 @@ public void close() } private File getV1TextIndexFile(File indexDir) { - String luceneIndexDirectory = _textColumn + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION; + String luceneIndexDirectory = _textColumn + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION; return new File(indexDir, luceneIndexDirectory); } @@ -339,7 +339,7 @@ private File getMutableIndexDir(File indexDir) { String tmpSegmentName = indexDir.getParentFile().getName(); String segmentName = tmpSegmentName.substring(tmpSegmentName.indexOf("tmp-") + 4, tmpSegmentName.lastIndexOf('-')); String mutableDir = indexDir.getParentFile().getParentFile().getParent() + "/consumers/" + segmentName + "/" - + _textColumn + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION; + + _textColumn + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION; return new File(mutableDir); } } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/HnswVectorIndexCreator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/HnswVectorIndexCreator.java index d13b45039762..c1f5cbb0b51e 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/HnswVectorIndexCreator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/HnswVectorIndexCreator.java @@ -61,7 +61,8 @@ public HnswVectorIndexCreator(String column, File segmentIndexDir, VectorIndexCo try { // segment generation is always in V1 and later we convert (as part of post creation processing) // to V3 if segmentVersion is set to V3 in SegmentGeneratorConfig. - File indexFile = new File(segmentIndexDir, _vectorColumn + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); + File indexFile = new File(segmentIndexDir, _vectorColumn + + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); _indexDirectory = FSDirectory.open(indexFile.toPath()); LOGGER.info("Creating HNSW index for column: {} at path: {} with {} for segment: {}", column, indexFile.getAbsolutePath(), vectorIndexConfig.getProperties(), segmentIndexDir.getAbsolutePath()); diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene95/HnswCodec.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene99/HnswCodec.java similarity index 92% rename from pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene95/HnswCodec.java rename to pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene99/HnswCodec.java index ee7cf560df31..bfcfcff5ac48 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene95/HnswCodec.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene99/HnswCodec.java @@ -16,8 +16,9 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.pinot.segment.local.segment.creator.impl.vector.lucene95; +package org.apache.pinot.segment.local.segment.creator.impl.vector.lucene99; +import org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat; import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.KnnVectorsFormat; @@ -25,8 +26,7 @@ import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat; import org.apache.lucene.codecs.lucene90.Lucene90PointsFormat; -import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat; -import org.apache.lucene.codecs.lucene95.Lucene95Codec; +import org.apache.lucene.codecs.lucene99.Lucene99Codec; import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; @@ -36,7 +36,7 @@ * Extend the Lucene 9.5 index format * The major change here is to allow custom: @link{org.apache.lucene.codecs.KnnVectorsFormat} * - * @see org.apache.lucene.codecs.lucene95 package documentation for file format details. + * @see org.apache.lucene.codecs.lucene99 package documentation for file format details. */ public class HnswCodec extends FilterCodec { @@ -73,8 +73,8 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) { * * @param mode stored fields compression mode to use for newly flushed/merged segments. */ - public HnswCodec(Lucene95Codec.Mode mode, KnnVectorsFormat defaultKnnVectorsFormat) { - super("Lucene95", new Lucene95Codec(mode)); + public HnswCodec(Lucene99Codec.Mode mode, KnnVectorsFormat defaultKnnVectorsFormat) { + super("Lucene99", new Lucene99Codec(mode)); _defaultKnnVectorsFormat = defaultKnnVectorsFormat; _defaultPostingsFormat = new Lucene90PostingsFormat(); _defaultDVFormat = new Lucene90DocValuesFormat(); @@ -123,7 +123,7 @@ public DocValuesFormat getDocValuesFormatForField(String field) { /** * Returns the vectors format that should be used for writing new segments of field * - *

The default implementation always returns "Lucene95". + *

The default implementation always returns "Lucene99". * *

WARNING: if you subclass, you are responsible for index backwards compatibility: * future version of Lucene are only guaranteed to be able to read the default implementation. diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene95/HnswVectorsFormat.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene99/HnswVectorsFormat.java similarity index 92% rename from pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene95/HnswVectorsFormat.java rename to pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene99/HnswVectorsFormat.java index b3f356c51e49..2ba2781445f4 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene95/HnswVectorsFormat.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene99/HnswVectorsFormat.java @@ -16,13 +16,13 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.pinot.segment.local.segment.creator.impl.vector.lucene95; +package org.apache.pinot.segment.local.segment.creator.impl.vector.lucene99; import java.io.IOException; import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.KnnVectorsReader; import org.apache.lucene.codecs.KnnVectorsWriter; -import org.apache.lucene.codecs.lucene95.Lucene95HnswVectorsFormat; +import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.util.hnsw.HnswGraph; @@ -41,7 +41,7 @@ public final class HnswVectorsFormat extends KnnVectorsFormat { public static final int DEFAULT_MAX_DIMENSIONS = 2048; private final int _maxDimensions; - private final Lucene95HnswVectorsFormat _delegate; + private final Lucene99HnswVectorsFormat _delegate; /** * Constructs a format using the given graph construction parameters. @@ -51,7 +51,7 @@ public final class HnswVectorsFormat extends KnnVectorsFormat { * @param maxDimensions the maximum number of dimensions supported by this format */ public HnswVectorsFormat(int maxConn, int beamWidth, int maxDimensions) { - super("Lucene95HnswVectorsFormat"); + super("Lucene99HnswVectorsFormat"); if (maxDimensions <= 0 || maxDimensions > DEFAULT_MAX_DIMENSIONS) { throw new IllegalArgumentException( "maxDimensions must be postive and less than or equal to" @@ -59,7 +59,7 @@ public HnswVectorsFormat(int maxConn, int beamWidth, int maxDimensions) { + "; maxDimensions=" + maxDimensions); } - _delegate = new Lucene95HnswVectorsFormat(maxConn, beamWidth); + _delegate = new Lucene99HnswVectorsFormat(maxConn, beamWidth); _maxDimensions = maxDimensions; } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/converter/SegmentV1V2ToV3FormatConverter.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/converter/SegmentV1V2ToV3FormatConverter.java index ece4daf604d2..0e84a4b6ad5c 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/converter/SegmentV1V2ToV3FormatConverter.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/converter/SegmentV1V2ToV3FormatConverter.java @@ -110,10 +110,10 @@ private void deleteV2Files(File v2SegmentDirectory) if (file.isFile() && file.exists()) { FileUtils.deleteQuietly(file); } - if (file.isDirectory() && file.getName().endsWith(V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION)) { + if (file.isDirectory() && file.getName().endsWith(V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION)) { FileUtils.deleteDirectory(file); } - if (file.isDirectory() && file.getName().endsWith(V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION)) { + if (file.isDirectory() && file.getName().endsWith(V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION)) { FileUtils.deleteDirectory(file); } } @@ -226,7 +226,7 @@ private void copyCreationMetadataIfExists(File currentDir, File v3Dir) private void copyLuceneTextIndexIfExists(File segmentDirectory, File v3Dir) throws IOException { // TODO: see if this can be done by reusing some existing methods - String suffix = V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION; + String suffix = V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION; File[] textIndexFiles = segmentDirectory.listFiles(new FilenameFilter() { @Override public boolean accept(File dir, String name) { @@ -263,7 +263,7 @@ public boolean accept(File dir, String name) { private void copyVectorIndexIfExists(File segmentDirectory, File v3Dir) throws IOException { // TODO: see if this can be done by reusing some existing methods - String suffix = V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION; + String suffix = V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION; File[] vectorIndexFiles = segmentDirectory.listFiles(new FilenameFilter() { @Override public boolean accept(File dir, String name) { diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/fst/FstIndexType.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/fst/FstIndexType.java index d04ce7bc97fe..83e755f734ef 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/fst/FstIndexType.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/fst/FstIndexType.java @@ -65,7 +65,8 @@ public class FstIndexType extends AbstractIndexType EXTENSIONS = ImmutableList.of(V1Constants.Indexes.LUCENE_FST_INDEX_FILE_EXTENSION, - V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION); + V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION, + V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION); protected FstIndexType() { super(StandardIndexes.FST_ID); diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/FSTIndexHandler.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/FSTIndexHandler.java index 778e92db0aed..b9d9c5096d84 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/FSTIndexHandler.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/FSTIndexHandler.java @@ -44,7 +44,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION; +import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION; /** @@ -157,7 +157,7 @@ private void createFSTIndexForColumn(SegmentDirectory.Writer segmentWriter, Colu String segmentName = _segmentDirectory.getSegmentMetadata().getName(); String columnName = columnMetadata.getColumnName(); File inProgress = new File(indexDir, columnName + ".fst.inprogress"); - File fstIndexFile = new File(indexDir, columnName + LUCENE_V9_FST_INDEX_FILE_EXTENSION); + File fstIndexFile = new File(indexDir, columnName + LUCENE_V99_FST_INDEX_FILE_EXTENSION); if (!inProgress.exists()) { // Create a marker file. diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/VectorIndexHandler.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/VectorIndexHandler.java index 584d4be1c37c..b3e5d2dfc38f 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/VectorIndexHandler.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/VectorIndexHandler.java @@ -115,9 +115,10 @@ private void createVectorIndexForColumn(SegmentDirectory.Writer segmentWriter, C String columnName = columnMetadata.getColumnName(); File inProgress = - new File(segmentDirectory, columnName + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION + ".inprogress"); + new File(segmentDirectory, columnName + + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION + ".inprogress"); File vectorIndexFile = - new File(segmentDirectory, columnName + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); + new File(segmentDirectory, columnName + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); if (!inProgress.exists()) { // Marker file does not exist, which means last run ended normally. diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/LuceneFSTIndexReader.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/LuceneFSTIndexReader.java index 6e579562c47e..3bf2c2a60134 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/LuceneFSTIndexReader.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/LuceneFSTIndexReader.java @@ -52,7 +52,8 @@ public LuceneFSTIndexReader(PinotDataBuffer pinotDataBuffer) _dataBufferIndexInput = new PinotBufferIndexInput(_dataBuffer, 0L, _dataBuffer.size()); _readFST = - new FST(_dataBufferIndexInput, _dataBufferIndexInput, PositiveIntOutputs.getSingleton(), new OffHeapFSTStore()); + new FST<>(FST.readMetadata(_dataBufferIndexInput, PositiveIntOutputs.getSingleton()), + _dataBufferIndexInput, new OffHeapFSTStore()); } @Override diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/TextIndexType.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/TextIndexType.java index 596380d81b51..cfbf6271f12e 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/TextIndexType.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/TextIndexType.java @@ -75,7 +75,8 @@ public class TextIndexType extends AbstractIndexType EXTENSIONS = Lists.newArrayList( V1Constants.Indexes.LUCENE_TEXT_INDEX_FILE_EXTENSION, V1Constants.Indexes.NATIVE_TEXT_INDEX_FILE_EXTENSION, - V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION, + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION ); protected TextIndexType() { diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/vector/VectorIndexType.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/vector/VectorIndexType.java index 59faf4c88036..cb228b81aa97 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/vector/VectorIndexType.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/vector/VectorIndexType.java @@ -129,7 +129,8 @@ public IndexHandler createIndexHandler(SegmentDirectory segmentDirectory, Map getFileExtensions(@Nullable ColumnMetadata columnMetadata) { - return List.of(V1Constants.Indexes.VECTOR_INDEX_FILE_EXTENSION); + return List.of(V1Constants.Indexes.VECTOR_INDEX_FILE_EXTENSION, + V1Constants.Indexes.VECTOR_V99_INDEX_FILE_EXTENSION); } private static class ReaderFactory implements IndexReaderFactory { diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/TextIndexUtils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/TextIndexUtils.java index caa47adff73d..0c2369bdd8e3 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/TextIndexUtils.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/TextIndexUtils.java @@ -49,6 +49,8 @@ static void cleanupTextIndex(File segDir, String column) { FileUtils.deleteQuietly(luceneMappingFile); File luceneV9IndexFile = new File(segDir, column + Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION); FileUtils.deleteQuietly(luceneV9IndexFile); + File luceneV99IndexFile = new File(segDir, column + Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION); + FileUtils.deleteQuietly(luceneV99IndexFile); File luceneV9MappingFile = new File(segDir, column + Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION); FileUtils.deleteQuietly(luceneV9MappingFile); @@ -61,7 +63,8 @@ static boolean hasTextIndex(File segDir, String column) { //@formatter:off return new File(segDir, column + Indexes.LUCENE_TEXT_INDEX_FILE_EXTENSION).exists() || new File(segDir, column + Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION).exists() - || new File(segDir, column + Indexes.NATIVE_TEXT_INDEX_FILE_EXTENSION).exists(); + || new File(segDir, column + Indexes.NATIVE_TEXT_INDEX_FILE_EXTENSION).exists() + || new File(segDir, column + Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION).exists(); //@formatter:on } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/VectorIndexUtils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/VectorIndexUtils.java index 15de36cf5fd3..698adcb31896 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/VectorIndexUtils.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/VectorIndexUtils.java @@ -20,12 +20,12 @@ import java.io.File; import org.apache.commons.io.FileUtils; -import org.apache.lucene.codecs.lucene95.Lucene95Codec; -import org.apache.lucene.codecs.lucene95.Lucene95HnswVectorsFormat; +import org.apache.lucene.codecs.lucene99.Lucene99Codec; +import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.VectorSimilarityFunction; -import org.apache.pinot.segment.local.segment.creator.impl.vector.lucene95.HnswCodec; -import org.apache.pinot.segment.local.segment.creator.impl.vector.lucene95.HnswVectorsFormat; +import org.apache.pinot.segment.local.segment.creator.impl.vector.lucene99.HnswCodec; +import org.apache.pinot.segment.local.segment.creator.impl.vector.lucene99.HnswVectorsFormat; import org.apache.pinot.segment.spi.V1Constants.Indexes; import org.apache.pinot.segment.spi.index.creator.VectorIndexConfig; @@ -38,17 +38,21 @@ static void cleanupVectorIndex(File segDir, String column) { // Remove the lucene index file and potentially the docId mapping file. File luceneIndexFile = new File(segDir, column + Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); FileUtils.deleteQuietly(luceneIndexFile); + File luceneV99IndexFile = new File(segDir, column + Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); + FileUtils.deleteQuietly(luceneV99IndexFile); File luceneMappingFile = new File(segDir, column + Indexes.VECTOR_HNSW_INDEX_DOCID_MAPPING_FILE_EXTENSION); FileUtils.deleteQuietly(luceneMappingFile); // Remove the native index file File nativeIndexFile = new File(segDir, column + Indexes.VECTOR_INDEX_FILE_EXTENSION); FileUtils.deleteQuietly(nativeIndexFile); + File nativeV99IndexFile = new File(segDir, column + Indexes.VECTOR_V99_INDEX_FILE_EXTENSION); + FileUtils.deleteQuietly(nativeV99IndexFile); } static boolean hasVectorIndex(File segDir, String column) { - return new File(segDir, column + Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION).exists() || new File(segDir, - column + Indexes.VECTOR_INDEX_FILE_EXTENSION).exists(); + return new File(segDir, column + Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION).exists() || new File(segDir, + column + Indexes.VECTOR_V99_INDEX_FILE_EXTENSION).exists(); } public static VectorSimilarityFunction toSimilarityFunction( @@ -81,17 +85,17 @@ public static IndexWriterConfig getIndexWriterConfig(VectorIndexConfig vectorInd indexWriterConfig.setUseCompoundFile(useCompoundFile); int maxCon = Integer.parseInt(vectorIndexConfig.getProperties() - .getOrDefault("maxCon", String.valueOf(Lucene95HnswVectorsFormat.DEFAULT_MAX_CONN))); + .getOrDefault("maxCon", String.valueOf(Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN))); int beamWidth = Integer.parseInt(vectorIndexConfig.getProperties() - .getOrDefault("beamWidth", String.valueOf(Lucene95HnswVectorsFormat.DEFAULT_BEAM_WIDTH))); + .getOrDefault("beamWidth", String.valueOf(Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH))); int maxDimensions = Integer.parseInt(vectorIndexConfig.getProperties() .getOrDefault("maxDimensions", String.valueOf(HnswVectorsFormat.DEFAULT_MAX_DIMENSIONS))); HnswVectorsFormat knnVectorsFormat = new HnswVectorsFormat(maxCon, beamWidth, maxDimensions); - Lucene95Codec.Mode mode = Lucene95Codec.Mode.valueOf(vectorIndexConfig.getProperties() - .getOrDefault("mode", Lucene95Codec.Mode.BEST_SPEED.name())); + Lucene99Codec.Mode mode = Lucene99Codec.Mode.valueOf(vectorIndexConfig.getProperties() + .getOrDefault("mode", Lucene99Codec.Mode.BEST_SPEED.name())); indexWriterConfig.setCodec(new HnswCodec(mode, knnVectorsFormat)); return indexWriterConfig; } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/fst/FSTBuilder.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/fst/FSTBuilder.java index 0a4596d173d4..a64f7817585b 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/fst/FSTBuilder.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/fst/FSTBuilder.java @@ -36,28 +36,31 @@ */ public class FSTBuilder { public static final Logger LOGGER = LoggerFactory.getLogger(FSTBuilder.class); - private final FSTCompiler _builder = new FSTCompiler<>(FST.INPUT_TYPE.BYTE4, PositiveIntOutputs.getSingleton()); + private final FSTCompiler _fstCompiler = + (new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE4, PositiveIntOutputs.getSingleton())).build(); private final IntsRefBuilder _scratch = new IntsRefBuilder(); public static FST buildFST(SortedMap input) throws IOException { PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton(); - FSTCompiler fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE4, fstOutput); + FSTCompiler.Builder fstCompilerBuilder = new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE4, fstOutput); + FSTCompiler fstCompiler = fstCompilerBuilder.build(); IntsRefBuilder scratch = new IntsRefBuilder(); for (Map.Entry entry : input.entrySet()) { fstCompiler.add(Util.toUTF16(entry.getKey(), scratch), entry.getValue().longValue()); } - return fstCompiler.compile(); + + return FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); } public void addEntry(String key, Integer value) throws IOException { - _builder.add(Util.toUTF16(key, _scratch), value.longValue()); + _fstCompiler.add(Util.toUTF16(key, _scratch), value.longValue()); } public FST done() throws IOException { - return _builder.compile(); + return FST.fromFSTReader(_fstCompiler.compile(), _fstCompiler.getFSTReader()); } } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/NativeFSTIndexCreator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/NativeFSTIndexCreator.java index 1f69bc50f1a1..933106b4e8bd 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/NativeFSTIndexCreator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/NativeFSTIndexCreator.java @@ -47,7 +47,7 @@ public class NativeFSTIndexCreator implements FSTIndexCreator { * @param sortedEntries Sorted entries of the unique values of the column. */ public NativeFSTIndexCreator(File indexDir, String columnName, String[] sortedEntries) { - _fstIndexFile = new File(indexDir, columnName + V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION); + _fstIndexFile = new File(indexDir, columnName + V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION); _fstBuilder = new FSTBuilder(); _dictId = 0; diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/LuceneFSTIndexCreatorTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/LuceneFSTIndexCreatorTest.java index b9c568000e8f..e0e6168c9a28 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/LuceneFSTIndexCreatorTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/LuceneFSTIndexCreatorTest.java @@ -32,7 +32,7 @@ import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; -import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION; +import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION; public class LuceneFSTIndexCreatorTest { @@ -62,7 +62,7 @@ public void testIndexWriterReader() LuceneFSTIndexCreator creator = new LuceneFSTIndexCreator( INDEX_DIR, "testFSTColumn", uniqueValues); creator.seal(); - File fstFile = new File(INDEX_DIR, "testFSTColumn" + LUCENE_V9_FST_INDEX_FILE_EXTENSION); + File fstFile = new File(INDEX_DIR, "testFSTColumn" + LUCENE_V99_FST_INDEX_FILE_EXTENSION); PinotDataBuffer pinotDataBuffer = PinotDataBuffer.mapFile(fstFile, true, 0, fstFile.length(), ByteOrder.BIG_ENDIAN, "fstIndexFile"); LuceneFSTIndexReader reader = new LuceneFSTIndexReader(pinotDataBuffer); diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/NativeFSTIndexCreatorTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/NativeFSTIndexCreatorTest.java index d77268ef2825..f98324af588f 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/NativeFSTIndexCreatorTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/NativeFSTIndexCreatorTest.java @@ -29,7 +29,7 @@ import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; -import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION; +import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION; public class NativeFSTIndexCreatorTest { @@ -59,7 +59,7 @@ public void testIndexWriterReader() creator.seal(); } - File fstFile = new File(INDEX_DIR, "testFSTColumn" + LUCENE_V9_FST_INDEX_FILE_EXTENSION); + File fstFile = new File(INDEX_DIR, "testFSTColumn" + LUCENE_V99_FST_INDEX_FILE_EXTENSION); try (PinotDataBuffer dataBuffer = PinotDataBuffer.mapReadOnlyBigEndianFile(fstFile); NativeFSTIndexReader reader = new NativeFSTIndexReader(dataBuffer)) { diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/LoaderTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/LoaderTest.java index 87a1e5db2596..98dff135d584 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/LoaderTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/LoaderTest.java @@ -65,7 +65,7 @@ import org.testng.annotations.Test; import org.testng.collections.Lists; -import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION; +import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; @@ -341,7 +341,7 @@ public void testFSTIndexLoad() fstIndexFile = SegmentDirectoryPaths.findFSTIndexIndexFile(_indexDir, FST_INDEX_COL_NAME); Assert.assertNotNull(fstIndexFile); Assert.assertFalse(fstIndexFile.isDirectory()); - Assert.assertEquals(fstIndexFile.getName(), FST_INDEX_COL_NAME + LUCENE_V9_FST_INDEX_FILE_EXTENSION); + Assert.assertEquals(fstIndexFile.getName(), FST_INDEX_COL_NAME + LUCENE_V99_FST_INDEX_FILE_EXTENSION); Assert.assertEquals(fstIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName()); indexSegment.destroy(); @@ -360,7 +360,7 @@ public void testFSTIndexLoad() fstIndexFile = SegmentDirectoryPaths.findFSTIndexIndexFile(_indexDir, FST_INDEX_COL_NAME); Assert.assertNotNull(fstIndexFile); Assert.assertFalse(fstIndexFile.isDirectory()); - Assert.assertEquals(fstIndexFile.getName(), FST_INDEX_COL_NAME + LUCENE_V9_FST_INDEX_FILE_EXTENSION); + Assert.assertEquals(fstIndexFile.getName(), FST_INDEX_COL_NAME + LUCENE_V99_FST_INDEX_FILE_EXTENSION); Assert.assertEquals(fstIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName()); indexSegment.destroy(); @@ -377,7 +377,7 @@ public void testFSTIndexLoad() fstIndexFile = SegmentDirectoryPaths.findFSTIndexIndexFile(_indexDir, FST_INDEX_COL_NAME); Assert.assertNotNull(fstIndexFile); Assert.assertFalse(fstIndexFile.isDirectory()); - Assert.assertEquals(fstIndexFile.getName(), FST_INDEX_COL_NAME + LUCENE_V9_FST_INDEX_FILE_EXTENSION); + Assert.assertEquals(fstIndexFile.getName(), FST_INDEX_COL_NAME + LUCENE_V99_FST_INDEX_FILE_EXTENSION); Assert.assertEquals(fstIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName()); indexSegment.destroy(); @@ -590,7 +590,7 @@ public void testTextIndexLoad() Assert.assertNotNull(textIndexFile); Assert.assertTrue(textIndexFile.isDirectory()); Assert.assertEquals(textIndexFile.getName(), - TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION); + TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION); Assert.assertEquals(textIndexFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME); // CASE 1: don't set the segment version to load in IndexLoadingConfig @@ -616,7 +616,7 @@ public void testTextIndexLoad() Assert.assertTrue(textIndexFile.isDirectory()); Assert.assertFalse(textIndexDocIdMappingFile.isDirectory()); Assert.assertEquals(textIndexFile.getName(), - TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION); + TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION); Assert.assertEquals(textIndexFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME); Assert.assertEquals(textIndexDocIdMappingFile.getName(), TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION); @@ -644,7 +644,7 @@ public void testTextIndexLoad() Assert.assertTrue(textIndexFile.isDirectory()); Assert.assertFalse(textIndexDocIdMappingFile.isDirectory()); Assert.assertEquals(textIndexFile.getName(), - TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION); + TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION); Assert.assertEquals(textIndexFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME); Assert.assertEquals(textIndexDocIdMappingFile.getName(), TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION); @@ -671,7 +671,7 @@ public void testTextIndexLoad() Assert.assertTrue(textIndexFile.isDirectory()); Assert.assertFalse(textIndexDocIdMappingFile.isDirectory()); Assert.assertEquals(textIndexFile.getName(), - TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION); + TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION); Assert.assertEquals(textIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName()); // CASE 1: don't set the segment version to load in IndexLoadingConfig @@ -694,7 +694,7 @@ public void testTextIndexLoad() Assert.assertNotNull(textIndexDocIdMappingFile); Assert.assertTrue(textIndexFile.isDirectory()); Assert.assertEquals(textIndexFile.getName(), - TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION); + TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION); Assert.assertEquals(textIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName()); Assert.assertEquals(textIndexDocIdMappingFile.getName(), TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION); @@ -720,7 +720,7 @@ public void testTextIndexLoad() Assert.assertNotNull(textIndexDocIdMappingFile); Assert.assertTrue(textIndexFile.isDirectory()); Assert.assertEquals(textIndexFile.getName(), - TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION); + TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION); Assert.assertEquals(textIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName()); Assert.assertEquals(textIndexDocIdMappingFile.getName(), TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION); @@ -746,7 +746,7 @@ public void testTextIndexLoad() Assert.assertNotNull(textIndexDocIdMappingFile); Assert.assertTrue(textIndexFile.isDirectory()); Assert.assertEquals(textIndexFile.getName(), - TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION); + TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION); Assert.assertEquals(textIndexFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME); Assert.assertEquals(textIndexDocIdMappingFile.getName(), TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION); @@ -776,7 +776,7 @@ public void testVectorIndexLoad() Assert.assertNotNull(vectorIndexFile); Assert.assertTrue(vectorIndexFile.isDirectory()); Assert.assertEquals(vectorIndexFile.getName(), - VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); + VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); Assert.assertEquals(vectorIndexFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME); // CASE 1: don't set the segment version to load in IndexLoadingConfig @@ -801,7 +801,7 @@ public void testVectorIndexLoad() Assert.assertNotNull(vectorIndexFile); Assert.assertTrue(vectorIndexFile.isDirectory()); Assert.assertEquals(vectorIndexFile.getName(), - VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); + VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); Assert.assertEquals(vectorIndexFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME); indexSegment.destroy(); @@ -821,7 +821,7 @@ public void testVectorIndexLoad() Assert.assertNotNull(vectorIndexFile); Assert.assertTrue(vectorIndexFile.isDirectory()); Assert.assertEquals(vectorIndexFile.getName(), - VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); + VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); Assert.assertEquals(vectorIndexFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME); indexSegment.destroy(); @@ -843,7 +843,7 @@ public void testVectorIndexLoad() Assert.assertNotNull(vectorIndexFile); Assert.assertTrue(vectorIndexFile.isDirectory()); Assert.assertEquals(vectorIndexFile.getName(), - VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); + VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); Assert.assertEquals(vectorIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName()); // CASE 1: don't set the segment version to load in IndexLoadingConfig @@ -867,7 +867,7 @@ public void testVectorIndexLoad() Assert.assertNotNull(vectorIndexFile); Assert.assertTrue(vectorIndexFile.isDirectory()); Assert.assertEquals(vectorIndexFile.getName(), - VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); + VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); Assert.assertEquals(vectorIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName()); indexSegment.destroy(); @@ -886,7 +886,7 @@ public void testVectorIndexLoad() Assert.assertNotNull(vectorIndexFile); Assert.assertTrue(vectorIndexFile.isDirectory()); Assert.assertEquals(vectorIndexFile.getName(), - VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); + VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); Assert.assertEquals(vectorIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName()); indexSegment.destroy(); @@ -905,7 +905,7 @@ public void testVectorIndexLoad() Assert.assertNotNull(vectorIndexFile); Assert.assertTrue(vectorIndexFile.isDirectory()); Assert.assertEquals(vectorIndexFile.getName(), - VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); + VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); Assert.assertEquals(vectorIndexFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME); indexSegment.destroy(); } diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java index 3349821963a0..acdc67925655 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java @@ -1441,8 +1441,8 @@ public void testV1CleanupIndices() // V1 use separate file for each column index. File iiFile = new File(_indexDir, strColumn + V1Constants.Indexes.BITMAP_INVERTED_INDEX_FILE_EXTENSION); File rgFile = new File(_indexDir, strColumn + V1Constants.Indexes.BITMAP_RANGE_INDEX_FILE_EXTENSION); - File txtFile = new File(_indexDir, strColumn + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION); - File fstFile = new File(_indexDir, strColumn + V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION); + File txtFile = new File(_indexDir, strColumn + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION); + File fstFile = new File(_indexDir, strColumn + V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION); File bfFile = new File(_indexDir, strColumn + V1Constants.Indexes.BLOOM_FILTER_FILE_EXTENSION); assertFalse(iiFile.exists()); diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java index 38eae8436ec8..a385a60b0347 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java @@ -231,11 +231,11 @@ public void testRemoveTextIndices() // Both files for TextIndex should be removed. fpi.removeIndex("foo", StandardIndexes.text()); - assertFalse(new File(TEMP_DIR, "foo" + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION).exists()); + assertFalse(new File(TEMP_DIR, "foo" + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION).exists()); assertFalse( new File(TEMP_DIR, "foo" + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION).exists()); } - assertTrue(new File(TEMP_DIR, "bar" + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION).exists()); + assertTrue(new File(TEMP_DIR, "bar" + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION).exists()); assertTrue(new File(TEMP_DIR, "bar" + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION).exists()); // Read indices back and check the content. diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java index 7f0dcebb05f8..3a94ceec1193 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java @@ -264,11 +264,11 @@ public void testRemoveTextIndices() // Both files for TextIndex should be removed. sfd.removeIndex("foo", StandardIndexes.text()); - assertFalse(new File(TEMP_DIR, "foo" + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION).exists()); + assertFalse(new File(TEMP_DIR, "foo" + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION).exists()); assertFalse( new File(TEMP_DIR, "foo" + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION).exists()); } - assertTrue(new File(TEMP_DIR, "bar" + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION).exists()); + assertTrue(new File(TEMP_DIR, "bar" + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION).exists()); assertTrue( new File(TEMP_DIR, "bar" + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION).exists()); diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/fst/FSTBuilderTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/fst/FSTBuilderTest.java index 493e7b3449e9..edee3ebef21e 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/fst/FSTBuilderTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/fst/FSTBuilderTest.java @@ -28,7 +28,6 @@ import org.apache.commons.io.FileUtils; import org.apache.lucene.store.OutputStreamDataOutput; import org.apache.lucene.util.fst.FST; -import org.apache.lucene.util.fst.OffHeapFSTStore; import org.apache.lucene.util.fst.Outputs; import org.apache.lucene.util.fst.PositiveIntOutputs; import org.apache.pinot.segment.spi.memory.PinotDataBuffer; @@ -78,7 +77,6 @@ public void testFSTBuilder() PinotDataBuffer pinotDataBuffer = PinotDataBuffer.mapFile(fstFile, true, 0, fstFile.length(), ByteOrder.BIG_ENDIAN, ""); PinotBufferIndexInput indexInput = new PinotBufferIndexInput(pinotDataBuffer, 0L, fstFile.length()); - FST readFST = new FST(indexInput, indexInput, outputs, new OffHeapFSTStore()); List results = RegexpMatcher.regexMatch("hello.*123", fst); Assert.assertEquals(results.size(), 1); diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java index 25ded5fa30d9..8827329a7bdd 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java @@ -54,8 +54,12 @@ public static class Indexes { public static final String LUCENE_TEXT_INDEX_FILE_EXTENSION = ".lucene.index"; public static final String LUCENE_V9_FST_INDEX_FILE_EXTENSION = ".lucene.v9.fst"; public static final String LUCENE_V9_TEXT_INDEX_FILE_EXTENSION = ".lucene.v9.index"; + public static final String LUCENE_V99_FST_INDEX_FILE_EXTENSION = ".lucene.v99.fst"; + public static final String LUCENE_V99_TEXT_INDEX_FILE_EXTENSION = ".lucene.v99.index"; public static final String VECTOR_INDEX_FILE_EXTENSION = ".vector.index"; public static final String VECTOR_HNSW_INDEX_FILE_EXTENSION = ".vector.hnsw.index"; + public static final String VECTOR_V99_INDEX_FILE_EXTENSION = ".vector.v99.index"; + public static final String VECTOR_V99_HNSW_INDEX_FILE_EXTENSION = ".vector.v99.hnsw.index"; public static final String VECTOR_HNSW_INDEX_DOCID_MAPPING_FILE_EXTENSION = ".vector.hnsw.mapping"; } diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/store/SegmentDirectoryPaths.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/store/SegmentDirectoryPaths.java index b8c09a0329e0..c873ab7e03b5 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/store/SegmentDirectoryPaths.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/store/SegmentDirectoryPaths.java @@ -79,8 +79,14 @@ public static File findCreationMetaFile(File indexDir) { */ @Nullable public static File findTextIndexIndexFile(File indexDir, String column) { - String luceneIndexDirectory = column + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION; + String luceneIndexDirectory = column + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION; File indexFormatFile = findFormatFile(indexDir, luceneIndexDirectory); + // check for V9 version, if null + if (indexFormatFile == null) { + luceneIndexDirectory = column + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION; + indexFormatFile = findFormatFile(indexDir, luceneIndexDirectory); + } + // check for old version, if null if (indexFormatFile == null) { luceneIndexDirectory = column + V1Constants.Indexes.LUCENE_TEXT_INDEX_FILE_EXTENSION; indexFormatFile = findFormatFile(indexDir, luceneIndexDirectory); @@ -101,8 +107,14 @@ public static File findNativeTextIndexIndexFile(File indexDir, String column) { } public static File findFSTIndexIndexFile(File indexDir, String column) { - String luceneIndexDirectory = column + V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION; + String luceneIndexDirectory = column + V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION; File formatFile = findFormatFile(indexDir, luceneIndexDirectory); + // check for V9 version, if null + if (formatFile == null) { + luceneIndexDirectory = column + V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION; + formatFile = findFormatFile(indexDir, luceneIndexDirectory); + } + // check for old version, if null if (formatFile == null) { luceneIndexDirectory = column + V1Constants.Indexes.LUCENE_FST_INDEX_FILE_EXTENSION; formatFile = findFormatFile(indexDir, luceneIndexDirectory); @@ -120,8 +132,14 @@ public static File findTextIndexDocIdMappingFile(File indexDir, String column) { @Nullable @VisibleForTesting public static File findVectorIndexIndexFile(File segmentIndexDir, String column) { - String vectorIndexDirectory = column + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION; - return findFormatFile(segmentIndexDir, vectorIndexDirectory); + String vectorIndexDirectory = column + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION; + File formatFile = findFormatFile(segmentIndexDir, vectorIndexDirectory); + + if (formatFile == null) { + vectorIndexDirectory = column + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION; + formatFile = findFormatFile(segmentIndexDir, vectorIndexDirectory); + } + return formatFile; } /** diff --git a/pom.xml b/pom.xml index e3d1f26a9bfc..1d7ea3245651 100644 --- a/pom.xml +++ b/pom.xml @@ -153,7 +153,7 @@ 2.5.1 2.3.2 1.36.0 - 9.8.0 + 9.10.0 0.10.2 0.17.0 From 5d1dc73cc608f5fa8cfea2c72a5fb8a2d112c143 Mon Sep 17 00:00:00 2001 From: Johan Adami <4760722+jadami10@users.noreply.github.com> Date: Wed, 1 May 2024 16:56:50 -0400 Subject: [PATCH 102/102] log the log rate limiter rate for dropped broker logs (#13041) --- .../main/java/org/apache/pinot/broker/querylog/QueryLogger.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pinot-broker/src/main/java/org/apache/pinot/broker/querylog/QueryLogger.java b/pinot-broker/src/main/java/org/apache/pinot/broker/querylog/QueryLogger.java index a1ccd63e6c34..28564cc1c67a 100644 --- a/pinot-broker/src/main/java/org/apache/pinot/broker/querylog/QueryLogger.java +++ b/pinot-broker/src/main/java/org/apache/pinot/broker/querylog/QueryLogger.java @@ -98,7 +98,7 @@ public void log(QueryLogParams params) { long numDroppedLogsSinceLastLog = _numDroppedLogs.getAndSet(0); if (numDroppedLogsSinceLastLog > 0) { _logger.warn("{} logs were dropped. (log max rate per second: {})", numDroppedLogsSinceLastLog, - _droppedLogRateLimiter.getRate()); + _logRateLimiter.getRate()); } } }