From df38aa6a38f1e4712778ffd96ce9582c2670ca28 Mon Sep 17 00:00:00 2001
From: Chaitanya Deepthi <45308220+deepthi912@users.noreply.github.com>
Date: Fri, 12 Apr 2024 01:09:59 -0400
Subject: [PATCH 001/102] swagger-ui upgrade to 5.15.0 Fixes (#12908)
---
.../main/java/org/apache/pinot/spi/utils/CommonConstants.java | 2 +-
pom.xml | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java
index a59948c5f985..bbf3b30342fa 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java
@@ -51,7 +51,7 @@ private CommonConstants() {
"org.apache.pinot.spi.eventlistener.query.NoOpBrokerQueryEventListener";
public static final String SWAGGER_AUTHORIZATION_KEY = "oauth";
- public static final String CONFIG_OF_SWAGGER_RESOURCES_PATH = "META-INF/resources/webjars/swagger-ui/5.13.0/";
+ public static final String CONFIG_OF_SWAGGER_RESOURCES_PATH = "META-INF/resources/webjars/swagger-ui/5.15.0/";
public static final String CONFIG_OF_TIMEZONE = "pinot.timezone";
public static final String DATABASE = "database";
diff --git a/pom.xml b/pom.xml
index 255f13e13911..85d282e65e08 100644
--- a/pom.xml
+++ b/pom.xml
@@ -144,7 +144,7 @@
2.6.1
3.30.2-GA
1.6.14
- 5.13.0
+ 5.15.0
3.3.6
2.9.0
2.5.1
From 4c514725af9a6acc1ea92bc820d5f669b074103c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 12 Apr 2024 13:53:34 -0700
Subject: [PATCH 002/102] Bump javax.servlet.jsp:javax.servlet.jsp-api from 2.2
to 2.3.3 (#12919)
---
pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pom.xml b/pom.xml
index 85d282e65e08..01e08307015b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -210,7 +210,7 @@
2.1.1
1.1.1
1.1.1
- 2.2
+ 2.3.3
4.5.14
From 2c88d09466a6e9c11d5c35be202691871534dca1 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 12 Apr 2024 13:53:54 -0700
Subject: [PATCH 003/102] Bump org.apache.maven.plugins:maven-gpg-plugin from
3.2.2 to 3.2.3 (#12918)
---
pinot-connectors/pinot-spark-2-connector/pom.xml | 2 +-
pinot-connectors/pinot-spark-3-connector/pom.xml | 2 +-
pinot-connectors/pinot-spark-common/pom.xml | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/pinot-connectors/pinot-spark-2-connector/pom.xml b/pinot-connectors/pinot-spark-2-connector/pom.xml
index 14904aad2884..c2a0ea3f4f5d 100644
--- a/pinot-connectors/pinot-spark-2-connector/pom.xml
+++ b/pinot-connectors/pinot-spark-2-connector/pom.xml
@@ -152,7 +152,7 @@
Thus, explicitly adding this plugin to a new profile to sign the files at the end all at once. -->
org.apache.maven.plugins
maven-gpg-plugin
- 3.2.2
+ 3.2.3
diff --git a/pinot-connectors/pinot-spark-3-connector/pom.xml b/pinot-connectors/pinot-spark-3-connector/pom.xml
index a39548d22019..2cf4a3fe2e0f 100644
--- a/pinot-connectors/pinot-spark-3-connector/pom.xml
+++ b/pinot-connectors/pinot-spark-3-connector/pom.xml
@@ -148,7 +148,7 @@
Thus, explicitly adding this plugin to a new profile to sign the files at the end all at once. -->
org.apache.maven.plugins
maven-gpg-plugin
- 3.2.2
+ 3.2.3
diff --git a/pinot-connectors/pinot-spark-common/pom.xml b/pinot-connectors/pinot-spark-common/pom.xml
index a4f72ed076e4..5c8f812cf31b 100644
--- a/pinot-connectors/pinot-spark-common/pom.xml
+++ b/pinot-connectors/pinot-spark-common/pom.xml
@@ -163,7 +163,7 @@
Thus, explicitly adding this plugin to a new profile to sign the files at the end all at once. -->
org.apache.maven.plugins
maven-gpg-plugin
- 3.2.2
+ 3.2.3
From 21275580d40cc3f70eb53b2766397ef8577f3977 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 12 Apr 2024 13:54:12 -0700
Subject: [PATCH 004/102] Bump io.github.hakky54:sslcontext-kickstart-for-netty
(#12917)
---
pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pom.xml b/pom.xml
index 01e08307015b..9abfd6067fd8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -173,7 +173,7 @@
3.1.12
7.10.1
6.6.2
- 8.2.0
+ 8.3.4
3.14.0
From 2c22980c460bb677fb952d966acf06c713113d31 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 12 Apr 2024 13:54:30 -0700
Subject: [PATCH 005/102] Bump it.unimi.dsi:fastutil from 8.2.3 to 8.5.13
(#12916)
---
pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pom.xml b/pom.xml
index 9abfd6067fd8..b01a3c870668 100644
--- a/pom.xml
+++ b/pom.xml
@@ -492,7 +492,7 @@
it.unimi.dsi
fastutil
- 8.2.3
+ 8.5.13
joda-time
From 94b2f3f07b5dc190956a1efb4bb129eb097b9ab3 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 12 Apr 2024 13:57:36 -0700
Subject: [PATCH 006/102] Bump aws.sdk.version from 2.25.29 to 2.25.30 (#12914)
---
pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pom.xml b/pom.xml
index b01a3c870668..11961af564b7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -168,7 +168,7 @@
0.15.0
0.4.4
4.2.2
- 2.25.29
+ 2.25.30
2.12.7
3.1.12
7.10.1
From f86928d74433c48807c72f8af601f4aa19bb9449 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 12 Apr 2024 13:57:52 -0700
Subject: [PATCH 007/102] Bump com.mycila:license-maven-plugin from 4.2 to 4.3
(#12912)
---
pom.xml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/pom.xml b/pom.xml
index 11961af564b7..6c836c52c93a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1809,7 +1809,7 @@
com.mycila
license-maven-plugin
- 4.2
+ 4.3
org.apache.maven.plugins
@@ -2068,7 +2068,7 @@
com.mycila
license-maven-plugin
- 4.2
+ 4.3
From 159aca6bc9ed43fef1e8fe9a06e13529e6acdaa7 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 12 Apr 2024 13:58:21 -0700
Subject: [PATCH 008/102] Bump org.codehaus.mojo:exec-maven-plugin from 3.1.0
to 3.2.0 (#12911)
---
pinot-distribution/pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pinot-distribution/pom.xml b/pinot-distribution/pom.xml
index 540420cc22e6..5a024f142d56 100644
--- a/pinot-distribution/pom.xml
+++ b/pinot-distribution/pom.xml
@@ -260,7 +260,7 @@
org.codehaus.mojo
exec-maven-plugin
- 3.1.0
+ 3.2.0
remove-build-directory
From 4040a19875a33d212485720e71f1f8278857750b Mon Sep 17 00:00:00 2001
From: Jayesh Choudhary
Date: Mon, 15 Apr 2024 11:27:31 +0530
Subject: [PATCH 009/102] fix(build): update node version to 16 (#12924)
---
.github/workflows/pinot_compatibility_tests.yml | 4 ++--
.github/workflows/pinot_tests.yml | 4 ++--
pinot-controller/pom.xml | 4 ++--
pinot-controller/src/main/resources/.nvmrc | 1 +
4 files changed, 7 insertions(+), 6 deletions(-)
create mode 100644 pinot-controller/src/main/resources/.nvmrc
diff --git a/.github/workflows/pinot_compatibility_tests.yml b/.github/workflows/pinot_compatibility_tests.yml
index e9d242fede91..ecbc2300e01e 100644
--- a/.github/workflows/pinot_compatibility_tests.yml
+++ b/.github/workflows/pinot_compatibility_tests.yml
@@ -46,12 +46,12 @@ jobs:
- name: Setup node
uses: actions/setup-node@v4
with:
- node-version: v10.16.1
+ node-version: v16.15.0
cache: 'npm'
cache-dependency-path: pinot-controller/src/main/resources/package-lock.json
- name: Install npm
run: |
- npm install -g npm@6.10.0
+ npm install -g npm@8.5.5
npm --version
- name: Pinot Compatibility Regression Testing
if : ${{github.event_name == 'workflow_dispatch'}}
diff --git a/.github/workflows/pinot_tests.yml b/.github/workflows/pinot_tests.yml
index 1d90331719c4..2b97445d2b9a 100644
--- a/.github/workflows/pinot_tests.yml
+++ b/.github/workflows/pinot_tests.yml
@@ -281,12 +281,12 @@ jobs:
- name: Setup node
uses: actions/setup-node@v4
with:
- node-version: v10.16.1
+ node-version: v16.15.0
cache: 'npm'
cache-dependency-path: pinot-controller/src/main/resources/package-lock.json
- name: Install npm
run: |
- npm install -g npm@6.10.0
+ npm install -g npm@8.5.5
npm --version
# Step that does that actual cache save and restore
- uses: actions/cache@v4
diff --git a/pinot-controller/pom.xml b/pinot-controller/pom.xml
index f70dde647129..0f49f928a11f 100644
--- a/pinot-controller/pom.xml
+++ b/pinot-controller/pom.xml
@@ -159,8 +159,8 @@
install-node-and-npm
- v10.16.1
- 6.10.0
+ v16.15.0
+ 8.5.5
diff --git a/pinot-controller/src/main/resources/.nvmrc b/pinot-controller/src/main/resources/.nvmrc
new file mode 100644
index 000000000000..7fd023741b20
--- /dev/null
+++ b/pinot-controller/src/main/resources/.nvmrc
@@ -0,0 +1 @@
+v16.15.0
From 848fe9c4ac18b9e670bf947aba21db59d629e9f9 Mon Sep 17 00:00:00 2001
From: Abhishek Sharma
Date: Mon, 15 Apr 2024 01:59:20 -0400
Subject: [PATCH 010/102] Added PR compatability test against release 1.1.0
(#12921)
---
.github/workflows/pinot_tests.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/pinot_tests.yml b/.github/workflows/pinot_tests.yml
index 2b97445d2b9a..ce170f31900a 100644
--- a/.github/workflows/pinot_tests.yml
+++ b/.github/workflows/pinot_tests.yml
@@ -267,7 +267,7 @@ jobs:
matrix:
test_suite: [ "compatibility-verifier/sample-test-suite" ]
old_commit: [
- "release-0.12.1", "release-1.0.0", "master"
+ "release-0.12.1", "release-1.0.0", "release-1.1.0", "master"
]
name: Pinot Compatibility Regression Testing against ${{ matrix.old_commit }} on ${{ matrix.test_suite }}
steps:
From 2c6a84b8ec840ed679c0a98230c518522b6591dd Mon Sep 17 00:00:00 2001
From: Christopher Peck <27231838+itschrispeck@users.noreply.github.com>
Date: Mon, 15 Apr 2024 09:34:37 -0700
Subject: [PATCH 011/102] Improved segment build time for Lucene text index
realtime to offline conversion (#12744)
* reuse mutable lucene index during segment conversion
* realtime segment conversion only
* add RealtimeSegmentConverter test for index reuse path
* clarify naming
* fix missed renaming
* address comments, close all resources
---
.../mutable/MutableSegmentImpl.java | 13 ++
.../converter/RealtimeSegmentConverter.java | 3 +
.../RealtimeLuceneTextIndex.java | 13 +-
.../impl/SegmentColumnarIndexCreator.java | 5 +-
.../impl/SegmentIndexCreationDriverImpl.java | 45 +++++-
.../impl/text/LuceneTextIndexCreator.java | 153 +++++++++++++++++-
.../readers/text/LuceneTextIndexReader.java | 2 +-
.../RealtimeSegmentConverterTest.java | 153 +++++++++++++++++-
.../store/FilePerIndexDirectoryTest.java | 11 +-
.../store/SingleFileIndexDirectoryTest.java | 8 +-
.../spi/creator/IndexCreationContext.java | 48 +++++-
.../segment/spi/creator/SegmentCreator.java | 3 +-
.../spi/creator/SegmentGeneratorConfig.java | 9 ++
.../segment/spi/index/TextIndexConfig.java | 2 +-
.../spi/index/mutable/MutableIndex.java | 8 +
15 files changed, 442 insertions(+), 34 deletions(-)
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/indexsegment/mutable/MutableSegmentImpl.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/indexsegment/mutable/MutableSegmentImpl.java
index 55e0aec072ae..b336b30f2016 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/indexsegment/mutable/MutableSegmentImpl.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/indexsegment/mutable/MutableSegmentImpl.java
@@ -932,6 +932,19 @@ public Object getValue(int docId, String column) {
}
}
+ /**
+ * Calls commit() on all mutable indexes. This is used in preparation for realtime segment conversion.
+ * .commit() can be implemented per index to perform any required actions before using mutable segment
+ * artifacts to optimize immutable segment build.
+ */
+ public void commit() {
+ for (IndexContainer indexContainer : _indexContainerMap.values()) {
+ for (MutableIndex mutableIndex : indexContainer._mutableIndexes.values()) {
+ mutableIndex.commit();
+ }
+ }
+ }
+
@Override
public void destroy() {
_logger.info("Trying to close RealtimeSegmentImpl : {}", _segmentName);
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/converter/RealtimeSegmentConverter.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/converter/RealtimeSegmentConverter.java
index ffb9bfc23f99..0bf8fe571f18 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/converter/RealtimeSegmentConverter.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/converter/RealtimeSegmentConverter.java
@@ -118,6 +118,9 @@ public void build(@Nullable SegmentVersion segmentVersion, ServerMetrics serverM
genConfig.setNullHandlingEnabled(_nullHandlingEnabled);
genConfig.setSegmentZKPropsConfig(_segmentZKPropsConfig);
+ // flush any artifacts to disk to improve mutable to immutable segment conversion
+ _realtimeSegmentImpl.commit();
+
SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
try (PinotSegmentRecordReader recordReader = new PinotSegmentRecordReader()) {
int[] sortedDocIds = _columnIndicesForRealtimeTable.getSortedColumn() != null
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/RealtimeLuceneTextIndex.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/RealtimeLuceneTextIndex.java
index a71d2663ed74..8d2e43c8a563 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/RealtimeLuceneTextIndex.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/RealtimeLuceneTextIndex.java
@@ -78,7 +78,7 @@ public RealtimeLuceneTextIndex(String column, File segmentIndexDir, String segme
// for realtime
_indexCreator =
new LuceneTextIndexCreator(column, new File(segmentIndexDir.getAbsolutePath() + "/" + segmentName),
- false /* commitOnClose */, config);
+ false /* commitOnClose */, true, null, config);
IndexWriter indexWriter = _indexCreator.getIndexWriter();
_searcherManager = new SearcherManager(indexWriter, false, false, null);
_analyzer = _indexCreator.getIndexWriter().getConfig().getAnalyzer();
@@ -181,6 +181,17 @@ private MutableRoaringBitmap getPinotDocIds(IndexSearcher indexSearcher, Mutable
return actualDocIDs;
}
+ @Override
+ public void commit() {
+ try {
+ _indexCreator.getIndexWriter().commit();
+ } catch (Exception e) {
+ LOGGER.error("Failed to commit the realtime lucene text index for column {}, exception {}", _column,
+ e.getMessage());
+ throw new RuntimeException(e);
+ }
+ }
+
@Override
public void close() {
try {
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
index 2d7909b40729..168490635a44 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
@@ -106,7 +106,8 @@ public class SegmentColumnarIndexCreator implements SegmentCreator {
@Override
public void init(SegmentGeneratorConfig segmentCreationSpec, SegmentIndexCreationInfo segmentIndexCreationInfo,
- TreeMap indexCreationInfoMap, Schema schema, File outDir)
+ TreeMap indexCreationInfoMap, Schema schema, File outDir,
+ @Nullable int[] immutableToMutableIdMap)
throws Exception {
_docIdCounter = 0;
_config = segmentCreationSpec;
@@ -158,6 +159,8 @@ public void init(SegmentGeneratorConfig segmentCreationSpec, SegmentIndexCreatio
.onHeap(segmentCreationSpec.isOnHeap())
.withForwardIndexDisabled(forwardIndexDisabled)
.withTextCommitOnClose(true)
+ .withImmutableToMutableIdMap(immutableToMutableIdMap)
+ .withRealtimeConversion(segmentCreationSpec.isRealtimeConversion())
.build();
//@formatter:on
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentIndexCreationDriverImpl.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentIndexCreationDriverImpl.java
index e99d89c8b42c..ecfea58ca788 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentIndexCreationDriverImpl.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentIndexCreationDriverImpl.java
@@ -36,6 +36,7 @@
import javax.annotation.Nullable;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.local.realtime.converter.stats.RealtimeSegmentSegmentCreationDataSource;
import org.apache.pinot.segment.local.recordtransformer.ComplexTypeTransformer;
import org.apache.pinot.segment.local.recordtransformer.RecordTransformer;
import org.apache.pinot.segment.local.segment.creator.RecordReaderSegmentCreationDataSource;
@@ -191,6 +192,11 @@ public void init(SegmentGeneratorConfig config, SegmentCreationDataSource dataSo
((RecordReaderSegmentCreationDataSource) dataSource).setTransformPipeline(transformPipeline);
}
+ // Optimization for realtime segment conversion
+ if (dataSource instanceof RealtimeSegmentSegmentCreationDataSource) {
+ _config.setRealtimeConversion(true);
+ }
+
// Initialize stats collection
_segmentStats = dataSource.gatherStats(
new StatsCollectorConfig(config.getTableConfig(), _dataSchema, config.getSegmentPartitionConfig()));
@@ -218,6 +224,23 @@ public void init(SegmentGeneratorConfig config, SegmentCreationDataSource dataSo
LOGGER.debug("tempIndexDir:{}", _tempIndexDir);
}
+ /**
+ * Generate a mutable docId to immutable docId mapping from the sortedDocIds iteration order
+ *
+ * @param sortedDocIds used to map sortedDocIds[immutableId] = mutableId (based on RecordReader iteration order)
+ * @return int[] used to map output[mutableId] = immutableId, or null if sortedDocIds is null
+ */
+ private int[] getImmutableToMutableIdMap(@Nullable int[] sortedDocIds) {
+ if (sortedDocIds == null) {
+ return null;
+ }
+ int[] res = new int[sortedDocIds.length];
+ for (int i = 0; i < res.length; i++) {
+ res[sortedDocIds[i]] = i;
+ }
+ return res;
+ }
+
@Override
public void build()
throws Exception {
@@ -229,10 +252,19 @@ public void build()
int incompleteRowsFound = 0;
try {
+ // TODO: Eventually pull the doc Id sorting logic out of Record Reader so that all row oriented logic can be
+ // removed from this code.
+ int[] immutableToMutableIdMap = null;
+ if (_recordReader instanceof PinotSegmentRecordReader) {
+ immutableToMutableIdMap =
+ getImmutableToMutableIdMap(((PinotSegmentRecordReader) _recordReader).getSortedDocIds());
+ }
+
// Initialize the index creation using the per-column statistics information
// TODO: _indexCreationInfoMap holds the reference to all unique values on heap (ColumnIndexCreationInfo ->
// ColumnStatistics) throughout the segment creation. Find a way to release the memory early.
- _indexCreator.init(_config, _segmentIndexCreationInfo, _indexCreationInfoMap, _dataSchema, _tempIndexDir);
+ _indexCreator.init(_config, _segmentIndexCreationInfo, _indexCreationInfoMap, _dataSchema, _tempIndexDir,
+ immutableToMutableIdMap);
// Build the index
_recordReader.rewind();
@@ -299,19 +331,22 @@ public void buildByColumn(IndexSegment indexSegment)
LOGGER.info("Collected stats for {} documents", _totalDocs);
try {
+ // TODO: Eventually pull the doc Id sorting logic out of Record Reader so that all row oriented logic can be
+ // removed from this code.
+ int[] sortedDocIds = ((PinotSegmentRecordReader) _recordReader).getSortedDocIds();
+ int[] immutableToMutableIdMap = getImmutableToMutableIdMap(sortedDocIds);
+
// Initialize the index creation using the per-column statistics information
// TODO: _indexCreationInfoMap holds the reference to all unique values on heap (ColumnIndexCreationInfo ->
// ColumnStatistics) throughout the segment creation. Find a way to release the memory early.
- _indexCreator.init(_config, _segmentIndexCreationInfo, _indexCreationInfoMap, _dataSchema, _tempIndexDir);
+ _indexCreator.init(_config, _segmentIndexCreationInfo, _indexCreationInfoMap, _dataSchema, _tempIndexDir,
+ immutableToMutableIdMap);
// Build the indexes
LOGGER.info("Start building Index by column");
TreeSet columns = _dataSchema.getPhysicalColumnNames();
- // TODO: Eventually pull the doc Id sorting logic out of Record Reader so that all row oriented logic can be
- // removed from this code.
- int[] sortedDocIds = ((PinotSegmentRecordReader) _recordReader).getSortedDocIds();
for (String col : columns) {
_indexCreator.indexColumn(col, sortedDocIds, indexSegment);
}
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/LuceneTextIndexCreator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/LuceneTextIndexCreator.java
index f14cf62bc63c..49306d9404af 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/LuceneTextIndexCreator.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/LuceneTextIndexCreator.java
@@ -20,8 +20,11 @@
import java.io.File;
import java.io.IOException;
+import java.nio.ByteOrder;
import java.util.Arrays;
import java.util.HashSet;
+import javax.annotation.Nullable;
+import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
@@ -29,8 +32,11 @@
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.pinot.segment.local.realtime.impl.invertedindex.RealtimeLuceneTextIndex;
@@ -41,6 +47,10 @@
import org.apache.pinot.segment.spi.creator.IndexCreationContext;
import org.apache.pinot.segment.spi.index.TextIndexConfig;
import org.apache.pinot.segment.spi.index.creator.DictionaryBasedInvertedIndexCreator;
+import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
+import org.apache.pinot.segment.spi.store.SegmentDirectoryPaths;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
@@ -49,12 +59,15 @@
* and realtime from {@link RealtimeLuceneTextIndex}
*/
public class LuceneTextIndexCreator extends AbstractTextIndexCreator {
+ private static final Logger LOGGER = LoggerFactory.getLogger(LuceneTextIndexCreator.class);
public static final String LUCENE_INDEX_DOC_ID_COLUMN_NAME = "DocID";
private final String _textColumn;
- private final Directory _indexDirectory;
- private final IndexWriter _indexWriter;
-
+ private final boolean _commitOnClose;
+ private final boolean _reuseMutableIndex;
+ private final File _indexFile;
+ private Directory _indexDirectory;
+ private IndexWriter _indexWriter;
private int _nextDocId = 0;
public static HashSet getDefaultEnglishStopWordsSet() {
@@ -75,6 +88,7 @@ public static HashSet getDefaultEnglishStopWordsSet() {
* @param segmentIndexDir segment index directory
* @param commit true if the index should be committed (at the end after all documents have
* been added), false if index should not be committed
+ * @param immutableToMutableIdMap immutableToMutableIdMap from segment conversion
* Note on commit:
* Once {@link SegmentColumnarIndexCreator}
* finishes indexing all documents/rows for the segment, we need to commit and close
@@ -90,14 +104,19 @@ public static HashSet getDefaultEnglishStopWordsSet() {
* to offline), we close this lucene index writer to release resources but don't commit.
* @param config the text index config
*/
- public LuceneTextIndexCreator(String column, File segmentIndexDir, boolean commit, TextIndexConfig config) {
+ public LuceneTextIndexCreator(String column, File segmentIndexDir, boolean commit, boolean realtimeConversion,
+ @Nullable int[] immutableToMutableIdMap, TextIndexConfig config) {
_textColumn = column;
+ _commitOnClose = commit;
+
+ // to reuse the mutable index, it must be (1) not the realtime index, i.e. commit is set to false
+ // and (2) happens during realtime segment conversion
+ _reuseMutableIndex = commit && realtimeConversion;
String luceneAnalyzerClass = config.getLuceneAnalyzerClass();
try {
// segment generation is always in V1 and later we convert (as part of post creation processing)
// to V3 if segmentVersion is set to V3 in SegmentGeneratorConfig.
- File indexFile = getV1TextIndexFile(segmentIndexDir);
- _indexDirectory = FSDirectory.open(indexFile.toPath());
+ _indexFile = getV1TextIndexFile(segmentIndexDir);
Analyzer luceneAnalyzer;
if (luceneAnalyzerClass.isEmpty() || luceneAnalyzerClass.equals(StandardAnalyzer.class.getName())) {
@@ -111,6 +130,15 @@ public LuceneTextIndexCreator(String column, File segmentIndexDir, boolean commi
indexWriterConfig.setRAMBufferSizeMB(config.getLuceneMaxBufferSizeMB());
indexWriterConfig.setCommitOnClose(commit);
indexWriterConfig.setUseCompoundFile(config.isLuceneUseCompoundFile());
+
+ if (_reuseMutableIndex) {
+ LOGGER.info("Reusing the realtime lucene index for segment {} and column {}", segmentIndexDir, column);
+ indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
+ convertMutableSegment(segmentIndexDir, immutableToMutableIdMap, indexWriterConfig);
+ return;
+ }
+
+ _indexDirectory = FSDirectory.open(_indexFile.toPath());
_indexWriter = new IndexWriter(_indexDirectory, indexWriterConfig);
} catch (ReflectiveOperationException e) {
throw new RuntimeException(
@@ -122,15 +150,102 @@ public LuceneTextIndexCreator(String column, File segmentIndexDir, boolean commi
}
public LuceneTextIndexCreator(IndexCreationContext context, TextIndexConfig indexConfig) {
- this(context.getFieldSpec().getName(), context.getIndexDir(), context.isTextCommitOnClose(), indexConfig);
+ this(context.getFieldSpec().getName(), context.getIndexDir(), context.isTextCommitOnClose(),
+ context.isRealtimeConversion(), context.getImmutableToMutableIdMap(), indexConfig);
}
public IndexWriter getIndexWriter() {
return _indexWriter;
}
+ /**
+ * Copy the mutable lucene index files to create an immutable lucene index
+ * @param segmentIndexDir segment index directory
+ * @param immutableToMutableIdMap immutableToMutableIdMap from segment conversion
+ * @param indexWriterConfig indexWriterConfig
+ */
+ private void convertMutableSegment(File segmentIndexDir, @Nullable int[] immutableToMutableIdMap,
+ IndexWriterConfig indexWriterConfig) {
+ try {
+ // Copy the mutable index to the v1 index location
+ File dest = getV1TextIndexFile(segmentIndexDir);
+ File mutableDir = getMutableIndexDir(segmentIndexDir);
+ FileUtils.copyDirectory(mutableDir, dest);
+
+ // Remove the copied write.lock file
+ File writeLock = new File(dest, "write.lock");
+ FileUtils.delete(writeLock);
+
+ // Call .forceMerge(1) on the copied index as the mutable index will likely contain many Lucene segments
+ try (Directory destDirectory = FSDirectory.open(dest.toPath());
+ IndexWriter indexWriter = new IndexWriter(destDirectory, indexWriterConfig)) {
+ indexWriter.forceMerge(1, true);
+ indexWriter.commit();
+
+ buildMappingFile(segmentIndexDir, _textColumn, destDirectory, immutableToMutableIdMap);
+ } catch (Exception e) {
+ throw new RuntimeException("Failed to build the mapping file during segment conversion: " + e);
+ }
+ } catch (IOException e) {
+ throw new RuntimeException("Failed to convert the mutable lucene index: " + e);
+ }
+ }
+
+ /**
+ * Generate the mapping file from mutable Pinot docId (stored within the Lucene index) to immutable Pinot docId using
+ * the immutableToMutableIdMap from segment conversion
+ * @param segmentIndexDir segment index directory
+ * @param column column name
+ * @param directory directory of the index
+ * @param immutableToMutableIdMap immutableToMutableIdMap from segment conversion
+ */
+ private void buildMappingFile(File segmentIndexDir, String column, Directory directory,
+ @Nullable int[] immutableToMutableIdMap)
+ throws IOException {
+ IndexReader indexReader = DirectoryReader.open(directory);
+ IndexSearcher indexSearcher = new IndexSearcher(indexReader);
+
+ int numDocs = indexSearcher.getIndexReader().numDocs();
+ int length = Integer.BYTES * numDocs;
+ File docIdMappingFile = new File(SegmentDirectoryPaths.findSegmentDirectory(segmentIndexDir),
+ column + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION);
+ String desc = "Text index docId mapping buffer: " + column;
+ try (PinotDataBuffer buffer = PinotDataBuffer.mapFile(docIdMappingFile, /* readOnly */ false, 0, length,
+ ByteOrder.LITTLE_ENDIAN, desc)) {
+ try {
+ // If immutableToMutableIdMap is null, then docIds should not change between the mutable and immutable segments.
+ // Therefore, the mapping file can be built without doing an additional docId conversion
+ if (immutableToMutableIdMap == null) {
+ for (int i = 0; i < numDocs; i++) {
+ Document document = indexSearcher.doc(i);
+ int pinotDocId = Integer.parseInt(document.get(LuceneTextIndexCreator.LUCENE_INDEX_DOC_ID_COLUMN_NAME));
+ buffer.putInt(i * Integer.BYTES, pinotDocId);
+ }
+ return;
+ }
+
+ for (int i = 0; i < numDocs; i++) {
+ Document document = indexSearcher.doc(i);
+ int mutablePinotDocId =
+ Integer.parseInt(document.get(LuceneTextIndexCreator.LUCENE_INDEX_DOC_ID_COLUMN_NAME));
+ int immutablePinotDocId = immutableToMutableIdMap[mutablePinotDocId];
+ buffer.putInt(i * Integer.BYTES, immutablePinotDocId);
+ }
+ } catch (Exception e) {
+ throw new RuntimeException(
+ "Caught exception while building mutable to immutable doc id mapping for text index column: " + column, e);
+ }
+ } finally {
+ indexReader.close();
+ }
+ }
+
@Override
public void add(String document) {
+ if (_reuseMutableIndex) {
+ return; // no-op
+ }
+
// text index on SV column
Document docToIndex = new Document();
docToIndex.add(new TextField(_textColumn, document, Field.Store.NO));
@@ -145,6 +260,10 @@ public void add(String document) {
@Override
public void add(String[] documents, int length) {
+ if (_reuseMutableIndex) {
+ return; // no-op
+ }
+
Document docToIndex = new Document();
// Whenever multiple fields with the same name appear in one document, both the
@@ -165,6 +284,9 @@ public void add(String[] documents, int length) {
@Override
public void seal() {
+ if (_reuseMutableIndex) {
+ return; // no-op
+ }
try {
// Do this one time operation of combining the multiple lucene index files (if any)
// into a single index file. Based on flush threshold and size of data, Lucene
@@ -190,12 +312,20 @@ public void seal() {
@Override
public void close()
throws IOException {
+ if (_reuseMutableIndex) {
+ return; // no-op
+ }
try {
// based on the commit flag set in IndexWriterConfig, this will decide to commit or not
_indexWriter.close();
_indexDirectory.close();
} catch (Exception e) {
throw new RuntimeException("Caught exception while closing the Lucene index for column: " + _textColumn, e);
+ } finally {
+ // remove leftover write.lock file, as well as artifacts from .commit() being called on the realtime index
+ if (!_commitOnClose) {
+ FileUtils.deleteQuietly(_indexFile);
+ }
}
}
@@ -203,4 +333,13 @@ private File getV1TextIndexFile(File indexDir) {
String luceneIndexDirectory = _textColumn + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION;
return new File(indexDir, luceneIndexDirectory);
}
+
+ private File getMutableIndexDir(File indexDir) {
+ // tmpSegmentName format: tmp-tableName__9__1__20240227T0254Z-1709002522086
+ String tmpSegmentName = indexDir.getParentFile().getName();
+ String segmentName = tmpSegmentName.substring(tmpSegmentName.indexOf("tmp-") + 4, tmpSegmentName.lastIndexOf('-'));
+ String mutableDir = indexDir.getParentFile().getParentFile().getParent() + "/consumers/" + segmentName + "/"
+ + _textColumn + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION;
+ return new File(mutableDir);
+ }
}
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/text/LuceneTextIndexReader.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/text/LuceneTextIndexReader.java
index 3a0efabe8c9a..07eb52f88b58 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/text/LuceneTextIndexReader.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/text/LuceneTextIndexReader.java
@@ -171,7 +171,7 @@ public MutableRoaringBitmap getDocIds(String searchQuery) {
return docIds;
} catch (Exception e) {
String msg =
- "Caught excepttion while searching the text index for column:" + _column + " search query:" + searchQuery;
+ "Caught exception while searching the text index for column:" + _column + " search query:" + searchQuery;
throw new RuntimeException(msg, e);
}
}
diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/realtime/converter/RealtimeSegmentConverterTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/realtime/converter/RealtimeSegmentConverterTest.java
index ded9e85b692c..e4ed4bb396f9 100644
--- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/realtime/converter/RealtimeSegmentConverterTest.java
+++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/realtime/converter/RealtimeSegmentConverterTest.java
@@ -24,6 +24,8 @@
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
@@ -36,6 +38,7 @@
import org.apache.pinot.segment.local.io.writer.impl.DirectMemoryManager;
import org.apache.pinot.segment.local.realtime.impl.RealtimeSegmentConfig;
import org.apache.pinot.segment.local.realtime.impl.RealtimeSegmentStatsHistory;
+import org.apache.pinot.segment.local.realtime.impl.invertedindex.RealtimeLuceneTextIndexSearcherPool;
import org.apache.pinot.segment.local.segment.index.column.PhysicalColumnIndexContainer;
import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig;
import org.apache.pinot.segment.local.segment.store.SegmentLocalFSDirectory;
@@ -44,9 +47,12 @@
import org.apache.pinot.segment.spi.creator.SegmentVersion;
import org.apache.pinot.segment.spi.index.DictionaryIndexConfig;
import org.apache.pinot.segment.spi.index.StandardIndexes;
+import org.apache.pinot.segment.spi.index.TextIndexConfig;
import org.apache.pinot.segment.spi.index.column.ColumnIndexContainer;
import org.apache.pinot.segment.spi.index.metadata.SegmentMetadataImpl;
+import org.apache.pinot.segment.spi.index.reader.TextIndexReader;
import org.apache.pinot.segment.spi.store.SegmentDirectory;
+import org.apache.pinot.spi.config.table.FieldConfig;
import org.apache.pinot.spi.config.table.IndexConfig;
import org.apache.pinot.spi.config.table.IndexingConfig;
import org.apache.pinot.spi.config.table.SegmentZKPropsConfig;
@@ -58,6 +64,8 @@
import org.apache.pinot.spi.data.readers.GenericRow;
import org.apache.pinot.spi.utils.ReadMode;
import org.apache.pinot.spi.utils.builder.TableConfigBuilder;
+import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
+import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import static org.testng.Assert.assertEquals;
@@ -101,7 +109,7 @@ public void testNoRecordsIndexedRowMajorSegmentBuilder()
throws Exception {
File tmpDir = new File(TMP_DIR, "tmp_" + System.currentTimeMillis());
TableConfig tableConfig =
- new TableConfigBuilder(TableType.OFFLINE).setTableName("testTable").setTimeColumnName(DATE_TIME_COLUMN)
+ new TableConfigBuilder(TableType.REALTIME).setTableName("testTable").setTimeColumnName(DATE_TIME_COLUMN)
.setInvertedIndexColumns(Lists.newArrayList(STRING_COLUMN1)).setSortedColumn(LONG_COLUMN1)
.setRangeIndexColumns(Lists.newArrayList(STRING_COLUMN2))
.setNoDictionaryColumns(Lists.newArrayList(LONG_COLUMN2))
@@ -167,7 +175,7 @@ public void test10RecordsIndexedRowMajorSegmentBuilder()
throws Exception {
File tmpDir = new File(TMP_DIR, "tmp_" + System.currentTimeMillis());
TableConfig tableConfig =
- new TableConfigBuilder(TableType.OFFLINE).setTableName("testTable")
+ new TableConfigBuilder(TableType.REALTIME).setTableName("testTable")
.setTimeColumnName(DATE_TIME_COLUMN)
.setInvertedIndexColumns(Lists.newArrayList(STRING_COLUMN1, LONG_COLUMN1))
.setSortedColumn(LONG_COLUMN1)
@@ -252,7 +260,7 @@ public void testNoRecordsIndexedColumnMajorSegmentBuilder()
throws Exception {
File tmpDir = new File(TMP_DIR, "tmp_" + System.currentTimeMillis());
TableConfig tableConfig =
- new TableConfigBuilder(TableType.OFFLINE).setTableName("testTable").setTimeColumnName(DATE_TIME_COLUMN)
+ new TableConfigBuilder(TableType.REALTIME).setTableName("testTable").setTimeColumnName(DATE_TIME_COLUMN)
.setInvertedIndexColumns(Lists.newArrayList(STRING_COLUMN1)).setSortedColumn(LONG_COLUMN1)
.setRangeIndexColumns(Lists.newArrayList(STRING_COLUMN2))
.setNoDictionaryColumns(Lists.newArrayList(LONG_COLUMN2))
@@ -319,7 +327,7 @@ public void test10RecordsIndexedColumnMajorSegmentBuilder()
throws Exception {
File tmpDir = new File(TMP_DIR, "tmp_" + System.currentTimeMillis());
TableConfig tableConfig =
- new TableConfigBuilder(TableType.OFFLINE).setTableName("testTable")
+ new TableConfigBuilder(TableType.REALTIME).setTableName("testTable")
.setTimeColumnName(DATE_TIME_COLUMN)
.setInvertedIndexColumns(Lists.newArrayList(STRING_COLUMN1, LONG_COLUMN1))
.setSortedColumn(LONG_COLUMN1)
@@ -433,6 +441,130 @@ private void testSegment(List rows, File indexDir,
}
}
+ @DataProvider
+ public static Object[][] reuseParams() {
+ List enabledColumnMajorSegmentBuildParams = Arrays.asList(false, true);
+ String[] sortedColumnParams = new String[]{null, STRING_COLUMN1};
+
+ return enabledColumnMajorSegmentBuildParams.stream().flatMap(
+ columnMajor -> Arrays.stream(sortedColumnParams).map(sortedColumn -> new Object[]{columnMajor,
+ sortedColumn}))
+ .toArray(Object[][]::new);
+ }
+
+ // Test the realtime segment conversion of a table with an index that reuses mutable index artifacts during conversion
+ @Test(dataProvider = "reuseParams")
+ public void testSegmentBuilderWithReuse(boolean columnMajorSegmentBuilder, String sortedColumn)
+ throws Exception {
+ File tmpDir = new File(TMP_DIR, "tmp_" + System.currentTimeMillis());
+ FieldConfig textIndexFieldConfig =
+ new FieldConfig.Builder(STRING_COLUMN1).withEncodingType(FieldConfig.EncodingType.RAW)
+ .withIndexTypes(Collections.singletonList(FieldConfig.IndexType.TEXT)).build();
+ List fieldConfigList = Collections.singletonList(textIndexFieldConfig);
+ TableConfig tableConfig =
+ new TableConfigBuilder(TableType.REALTIME).setTableName("testTable").setTimeColumnName(DATE_TIME_COLUMN)
+ .setInvertedIndexColumns(Lists.newArrayList(STRING_COLUMN1))
+ .setSortedColumn(sortedColumn).setColumnMajorSegmentBuilderEnabled(columnMajorSegmentBuilder)
+ .setFieldConfigList(fieldConfigList).build();
+ Schema schema = new Schema.SchemaBuilder().addSingleValueDimension(STRING_COLUMN1, FieldSpec.DataType.STRING)
+ .addDateTime(DATE_TIME_COLUMN, FieldSpec.DataType.LONG, "1:MILLISECONDS:EPOCH", "1:MILLISECONDS").build();
+
+ String tableNameWithType = tableConfig.getTableName();
+ String segmentName = "testTable__0__0__123456";
+ IndexingConfig indexingConfig = tableConfig.getIndexingConfig();
+ TextIndexConfig textIndexConfig =
+ new TextIndexConfig(false, null, null, false, false, Collections.emptyList(), Collections.emptyList(), false,
+ 500, null, false);
+
+ RealtimeSegmentConfig.Builder realtimeSegmentConfigBuilder =
+ new RealtimeSegmentConfig.Builder().setTableNameWithType(tableNameWithType).setSegmentName(segmentName)
+ .setStreamName(tableNameWithType).setSchema(schema).setTimeColumnName(DATE_TIME_COLUMN).setCapacity(1000)
+ .setIndex(Sets.newHashSet(STRING_COLUMN1), StandardIndexes.inverted(), IndexConfig.ENABLED)
+ .setIndex(Sets.newHashSet(STRING_COLUMN1), StandardIndexes.text(), textIndexConfig)
+ .setFieldConfigList(fieldConfigList).setSegmentZKMetadata(getSegmentZKMetadata(segmentName))
+ .setOffHeap(true).setMemoryManager(new DirectMemoryManager(segmentName))
+ .setStatsHistory(RealtimeSegmentStatsHistory.deserialzeFrom(new File(tmpDir, "stats")))
+ .setConsumerDir(new File(tmpDir, "consumers").getAbsolutePath());
+
+ // create mutable segment impl
+ RealtimeLuceneTextIndexSearcherPool.init(1);
+ MutableSegmentImpl mutableSegmentImpl = new MutableSegmentImpl(realtimeSegmentConfigBuilder.build(), null);
+ List rows = generateTestDataForReusePath();
+
+ for (GenericRow row : rows) {
+ mutableSegmentImpl.index(row, null);
+ }
+
+ // build converted segment
+ File outputDir = new File(new File(tmpDir, segmentName), "tmp-" + segmentName + "-" + System.currentTimeMillis());
+ SegmentZKPropsConfig segmentZKPropsConfig = new SegmentZKPropsConfig();
+ segmentZKPropsConfig.setStartOffset("1");
+ segmentZKPropsConfig.setEndOffset("100");
+ ColumnIndicesForRealtimeTable cdc = new ColumnIndicesForRealtimeTable(sortedColumn,
+ indexingConfig.getInvertedIndexColumns(), Collections.singletonList(STRING_COLUMN1), null,
+ indexingConfig.getNoDictionaryColumns(), indexingConfig.getVarLengthDictionaryColumns());
+ RealtimeSegmentConverter converter =
+ new RealtimeSegmentConverter(mutableSegmentImpl, segmentZKPropsConfig, outputDir.getAbsolutePath(), schema,
+ tableNameWithType, tableConfig, segmentName, cdc, false);
+ converter.build(SegmentVersion.v3, null);
+
+ File indexDir = new File(outputDir, segmentName);
+ SegmentMetadataImpl segmentMetadata = new SegmentMetadataImpl(indexDir);
+ assertEquals(segmentMetadata.getVersion(), SegmentVersion.v3);
+ assertEquals(segmentMetadata.getTotalDocs(), rows.size());
+ assertEquals(segmentMetadata.getTimeColumn(), DATE_TIME_COLUMN);
+ assertEquals(segmentMetadata.getTimeUnit(), TimeUnit.MILLISECONDS);
+
+ long expectedStartTime = (long) rows.get(0).getValue(DATE_TIME_COLUMN);
+ assertEquals(segmentMetadata.getStartTime(), expectedStartTime);
+ long expectedEndTime = (long) rows.get(rows.size() - 1).getValue(DATE_TIME_COLUMN);
+ assertEquals(segmentMetadata.getEndTime(), expectedEndTime);
+
+ assertTrue(segmentMetadata.getAllColumns().containsAll(schema.getColumnNames()));
+ assertEquals(segmentMetadata.getStartOffset(), "1");
+ assertEquals(segmentMetadata.getEndOffset(), "100");
+
+ // read converted segment
+ SegmentLocalFSDirectory segmentDir = new SegmentLocalFSDirectory(indexDir, segmentMetadata, ReadMode.mmap);
+ SegmentDirectory.Reader segmentReader = segmentDir.createReader();
+
+ Map indexContainerMap = new HashMap<>();
+ Map columnMetadataMap = segmentMetadata.getColumnMetadataMap();
+ IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig(null, tableConfig);
+ for (Map.Entry entry : columnMetadataMap.entrySet()) {
+ indexContainerMap.put(entry.getKey(),
+ new PhysicalColumnIndexContainer(segmentReader, entry.getValue(), indexLoadingConfig));
+ }
+ ImmutableSegmentImpl segmentFile = new ImmutableSegmentImpl(segmentDir, segmentMetadata, indexContainerMap, null);
+
+ // test forward index contents
+ GenericRow readRow = new GenericRow();
+ int docId = 0;
+ for (int i = 0; i < rows.size(); i++) {
+ GenericRow row;
+ if (sortedColumn == null) {
+ row = rows.get(i);
+ } else {
+ row = rows.get(rows.size() - i - 1);
+ }
+
+ segmentFile.getRecord(docId, readRow);
+ assertEquals(readRow.getValue(STRING_COLUMN1), row.getValue(STRING_COLUMN1));
+ assertEquals(readRow.getValue(DATE_TIME_COLUMN), row.getValue(DATE_TIME_COLUMN));
+ docId += 1;
+ }
+
+ // test docId conversion
+ TextIndexReader textIndexReader = segmentFile.getIndex(STRING_COLUMN1, StandardIndexes.text());
+ if (sortedColumn == null) {
+ assertEquals(textIndexReader.getDocIds("str-8"), ImmutableRoaringBitmap.bitmapOf(0));
+ assertEquals(textIndexReader.getDocIds("str-4"), ImmutableRoaringBitmap.bitmapOf(4));
+ } else {
+ assertEquals(textIndexReader.getDocIds("str-8"), ImmutableRoaringBitmap.bitmapOf(7));
+ assertEquals(textIndexReader.getDocIds("str-4"), ImmutableRoaringBitmap.bitmapOf(3));
+ }
+ }
+
private List generateTestData() {
LinkedList rows = new LinkedList<>();
@@ -457,6 +589,19 @@ private List generateTestData() {
return rows;
}
+ private List generateTestDataForReusePath() {
+ List rows = new LinkedList<>();
+
+ for (int i = 0; i < 8; i++) {
+ GenericRow row = new GenericRow();
+ row.putValue(STRING_COLUMN1, "str" + (i - 8));
+ row.putValue(DATE_TIME_COLUMN, 1697814309L + i);
+ rows.add(row);
+ }
+
+ return rows;
+ }
+
private SegmentZKMetadata getSegmentZKMetadata(String segmentName) {
SegmentZKMetadata segmentZKMetadata = new SegmentZKMetadata(segmentName);
segmentZKMetadata.setCreationTime(System.currentTimeMillis());
diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java
index f60de6d12d22..38eae8436ec8 100644
--- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java
+++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java
@@ -204,8 +204,8 @@ public void testRemoveTextIndices()
TextIndexConfig config =
new TextIndexConfig(false, null, null, false, false, null, null, true, 500, null, false);
try (FilePerIndexDirectory fpi = new FilePerIndexDirectory(TEMP_DIR, _segmentMetadata, ReadMode.mmap);
- LuceneTextIndexCreator fooCreator = new LuceneTextIndexCreator("foo", TEMP_DIR, true, config);
- LuceneTextIndexCreator barCreator = new LuceneTextIndexCreator("bar", TEMP_DIR, true, config)) {
+ LuceneTextIndexCreator fooCreator = new LuceneTextIndexCreator("foo", TEMP_DIR, true, false, null, config);
+ LuceneTextIndexCreator barCreator = new LuceneTextIndexCreator("bar", TEMP_DIR, true, false, null, config)) {
PinotDataBuffer buf = fpi.newBuffer("col1", StandardIndexes.forward(), 1024);
buf.putInt(0, 1);
@@ -236,8 +236,7 @@ public void testRemoveTextIndices()
new File(TEMP_DIR, "foo" + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION).exists());
}
assertTrue(new File(TEMP_DIR, "bar" + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION).exists());
- assertTrue(
- new File(TEMP_DIR, "bar" + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION).exists());
+ assertTrue(new File(TEMP_DIR, "bar" + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION).exists());
// Read indices back and check the content.
try (FilePerIndexDirectory fpi = new FilePerIndexDirectory(TEMP_DIR, _segmentMetadata, ReadMode.mmap)) {
@@ -268,8 +267,8 @@ public void testGetColumnIndices()
new TextIndexConfig(false, null, null, false, false, null, null, true, 500, null, false);
// Write sth to buffers and flush them to index files on disk
try (FilePerIndexDirectory fpi = new FilePerIndexDirectory(TEMP_DIR, _segmentMetadata, ReadMode.mmap);
- LuceneTextIndexCreator fooCreator = new LuceneTextIndexCreator("foo", TEMP_DIR, true, config);
- LuceneTextIndexCreator barCreator = new LuceneTextIndexCreator("bar", TEMP_DIR, true, config)) {
+ LuceneTextIndexCreator fooCreator = new LuceneTextIndexCreator("foo", TEMP_DIR, true, false, null, config);
+ LuceneTextIndexCreator barCreator = new LuceneTextIndexCreator("bar", TEMP_DIR, true, false, null, config)) {
PinotDataBuffer buf = fpi.newBuffer("col1", StandardIndexes.forward(), 1024);
buf.putInt(0, 111);
buf = fpi.newBuffer("col2", StandardIndexes.dictionary(), 1024);
diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java
index 28494666362b..7f0dcebb05f8 100644
--- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java
+++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java
@@ -237,8 +237,8 @@ public void testRemoveTextIndices()
TextIndexConfig config =
new TextIndexConfig(false, null, null, false, false, null, null, true, 500, null, false);
try (SingleFileIndexDirectory sfd = new SingleFileIndexDirectory(TEMP_DIR, _segmentMetadata, ReadMode.mmap);
- LuceneTextIndexCreator fooCreator = new LuceneTextIndexCreator("foo", TEMP_DIR, true, config);
- LuceneTextIndexCreator barCreator = new LuceneTextIndexCreator("bar", TEMP_DIR, true, config)) {
+ LuceneTextIndexCreator fooCreator = new LuceneTextIndexCreator("foo", TEMP_DIR, true, false, null, config);
+ LuceneTextIndexCreator barCreator = new LuceneTextIndexCreator("bar", TEMP_DIR, true, false, null, config)) {
PinotDataBuffer buf = sfd.newBuffer("col1", StandardIndexes.forward(), 1024);
buf.putInt(0, 1);
@@ -343,8 +343,8 @@ public void testGetColumnIndices()
TextIndexConfig config =
new TextIndexConfig(false, null, null, false, false, null, null, true, 500, null, false);
try (SingleFileIndexDirectory sfd = new SingleFileIndexDirectory(TEMP_DIR, _segmentMetadata, ReadMode.mmap);
- LuceneTextIndexCreator fooCreator = new LuceneTextIndexCreator("foo", TEMP_DIR, true, config);
- LuceneTextIndexCreator barCreator = new LuceneTextIndexCreator("bar", TEMP_DIR, true, config)) {
+ LuceneTextIndexCreator fooCreator = new LuceneTextIndexCreator("foo", TEMP_DIR, true, false, null, config);
+ LuceneTextIndexCreator barCreator = new LuceneTextIndexCreator("bar", TEMP_DIR, true, false, null, config)) {
PinotDataBuffer buf = sfd.newBuffer("col1", StandardIndexes.forward(), 1024);
buf.putInt(0, 111);
buf = sfd.newBuffer("col2", StandardIndexes.dictionary(), 1024);
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/IndexCreationContext.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/IndexCreationContext.java
index 52df382efa2d..3ebe041e877e 100644
--- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/IndexCreationContext.java
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/IndexCreationContext.java
@@ -91,6 +91,20 @@ public interface IndexCreationContext {
boolean isTextCommitOnClose();
ColumnStatistics getColumnStatistics();
+ /**
+ * This flags whether the index creation is done during realtime segment conversion
+ * @return
+ */
+ boolean isRealtimeConversion();
+
+ /**
+ * This contains immutableToMutableIdMap mapping generated in {@link SegmentIndexCreationDriver}
+ *
+ * This allows for index creation during realtime segment conversion to take advantage of mutable to immutable
+ * docId mapping
+ * @return
+ */
+ int[] getImmutableToMutableIdMap();
final class Builder {
private ColumnStatistics _columnStatistics;
@@ -112,6 +126,8 @@ final class Builder {
private boolean _optimizedDictionary;
private boolean _fixedLength;
private boolean _textCommitOnClose;
+ private boolean _realtimeConversion = false;
+ private int[] _immutableToMutableIdMap;
public Builder withColumnIndexCreationInfo(ColumnIndexCreationInfo columnIndexCreationInfo) {
return withLengthOfLongestEntry(columnIndexCreationInfo.getLengthOfLongestEntry())
@@ -229,11 +245,22 @@ public Builder withTextCommitOnClose(boolean textCommitOnClose) {
return this;
}
+ public Builder withRealtimeConversion(boolean realtimeConversion) {
+ _realtimeConversion = realtimeConversion;
+ return this;
+ }
+
+ public Builder withImmutableToMutableIdMap(int[] immutableToMutableIdMap) {
+ _immutableToMutableIdMap = immutableToMutableIdMap;
+ return this;
+ }
+
public Common build() {
return new Common(Objects.requireNonNull(_indexDir), _lengthOfLongestEntry, _maxNumberOfMultiValueElements,
_maxRowLengthInBytes, _onHeap, Objects.requireNonNull(_fieldSpec), _sorted, _cardinality,
_totalNumberOfEntries, _totalDocs, _hasDictionary, _minValue, _maxValue, _forwardIndexDisabled,
- _sortedUniqueElementsArray, _optimizedDictionary, _fixedLength, _textCommitOnClose, _columnStatistics);
+ _sortedUniqueElementsArray, _optimizedDictionary, _fixedLength, _textCommitOnClose, _columnStatistics,
+ _realtimeConversion, _immutableToMutableIdMap);
}
public Builder withSortedUniqueElementsArray(Object sortedUniqueElementsArray) {
@@ -267,13 +294,16 @@ final class Common implements IndexCreationContext {
private final boolean _fixedLength;
private final boolean _textCommitOnClose;
private final ColumnStatistics _columnStatistics;
+ private final boolean _realtimeConversion;
+ private final int[] _immutableToMutableIdMap;
public Common(File indexDir, int lengthOfLongestEntry,
int maxNumberOfMultiValueElements, int maxRowLengthInBytes, boolean onHeap,
FieldSpec fieldSpec, boolean sorted, int cardinality, int totalNumberOfEntries,
int totalDocs, boolean hasDictionary, Comparable> minValue, Comparable> maxValue,
- boolean forwardIndexDisabled, Object sortedUniqueElementsArray, boolean optimizeDictionary,
- boolean fixedLength, boolean textCommitOnClose, ColumnStatistics columnStatistics) {
+ boolean forwardIndexDisabled, Object sortedUniqueElementsArray, boolean optimizeDictionary, boolean fixedLength,
+ boolean textCommitOnClose, ColumnStatistics columnStatistics, boolean realtimeConversion,
+ int[] immutableToMutableIdMap) {
_indexDir = indexDir;
_lengthOfLongestEntry = lengthOfLongestEntry;
_maxNumberOfMultiValueElements = maxNumberOfMultiValueElements;
@@ -293,6 +323,8 @@ public Common(File indexDir, int lengthOfLongestEntry,
_fixedLength = fixedLength;
_textCommitOnClose = textCommitOnClose;
_columnStatistics = columnStatistics;
+ _realtimeConversion = realtimeConversion;
+ _immutableToMutableIdMap = immutableToMutableIdMap;
}
public FieldSpec getFieldSpec() {
@@ -378,5 +410,15 @@ public boolean isTextCommitOnClose() {
public ColumnStatistics getColumnStatistics() {
return _columnStatistics;
}
+
+ @Override
+ public boolean isRealtimeConversion() {
+ return _realtimeConversion;
+ }
+
+ @Override
+ public int[] getImmutableToMutableIdMap() {
+ return _immutableToMutableIdMap;
+ }
}
}
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentCreator.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentCreator.java
index 9adda03b728e..dce1d5b1d40d 100644
--- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentCreator.java
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentCreator.java
@@ -46,7 +46,8 @@ public interface SegmentCreator extends Closeable, Serializable {
* @throws Exception
*/
void init(SegmentGeneratorConfig segmentCreationSpec, SegmentIndexCreationInfo segmentIndexCreationInfo,
- TreeMap indexCreationInfoMap, Schema schema, File outDir)
+ TreeMap indexCreationInfoMap, Schema schema, File outDir,
+ @Nullable int[] immutableToMutableIdMap)
throws Exception;
/**
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java
index 6305dcd8522d..5381bdc43082 100644
--- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/SegmentGeneratorConfig.java
@@ -120,6 +120,7 @@ public enum TimeColumnType {
private boolean _optimizeDictionary = false;
private boolean _optimizeDictionaryForMetrics = false;
private double _noDictionarySizeRatioThreshold = IndexingConfig.DEFAULT_NO_DICTIONARY_SIZE_RATIO_THRESHOLD;
+ private boolean _realtimeConversion = false;
private final Map _indexConfigsByColName;
// constructed from FieldConfig
@@ -723,6 +724,14 @@ public double getNoDictionarySizeRatioThreshold() {
return _noDictionarySizeRatioThreshold;
}
+ public boolean isRealtimeConversion() {
+ return _realtimeConversion;
+ }
+
+ public void setRealtimeConversion(boolean realtimeConversion) {
+ _realtimeConversion = realtimeConversion;
+ }
+
public void setNoDictionarySizeRatioThreshold(double noDictionarySizeRatioThreshold) {
_noDictionarySizeRatioThreshold = noDictionarySizeRatioThreshold;
}
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/TextIndexConfig.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/TextIndexConfig.java
index afbf7eb876af..6c400a16db31 100644
--- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/TextIndexConfig.java
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/TextIndexConfig.java
@@ -232,7 +232,7 @@ public boolean equals(Object o) {
&& _fstType == that._fstType && Objects.equals(_rawValueForTextIndex, that._rawValueForTextIndex)
&& Objects.equals(_stopWordsInclude, that._stopWordsInclude) && Objects.equals(_stopWordsExclude,
that._stopWordsExclude) && _luceneUseCompoundFile == that._luceneUseCompoundFile
- && _luceneMaxBufferSizeMB == that._luceneMaxBufferSizeMB;
+ && _luceneMaxBufferSizeMB == that._luceneMaxBufferSizeMB && _luceneAnalyzerClass == that._luceneAnalyzerClass;
}
@Override
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/mutable/MutableIndex.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/mutable/MutableIndex.java
index 2a9c4d9a56c6..dc3bdc98691d 100644
--- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/mutable/MutableIndex.java
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/mutable/MutableIndex.java
@@ -60,4 +60,12 @@ public interface MutableIndex extends IndexReader {
* @param docId The document id of the given row. A non-negative value.
*/
void add(@Nonnull Object[] values, @Nullable int[] dictIds, int docId);
+
+ /**
+ * Commits the mutable index artifacts to disk. This is used in preparation for realtime segment conversion.
+ * commit() should be implemented to perform any required actions before using mutable segment artifacts to
+ * optimize realtime segment conversion.
+ */
+ default void commit() {
+ }
}
From c823430f11bf61f3150f70ac4db9eb2581f89d5e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 15 Apr 2024 14:08:48 -0700
Subject: [PATCH 012/102] Bump io.netty:netty-bom from 4.1.108.Final to
4.1.109.Final (#12929)
---
pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pom.xml b/pom.xml
index 6c836c52c93a..7f1819250b9a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -160,7 +160,7 @@
0.20.0
2.23.1
2.0.12
- 4.1.108.Final
+ 4.1.109.Final
1.0.4
1.19.0
4.1.1
From af8fd4089c33db3c11e09ab74df64746fa324f2f Mon Sep 17 00:00:00 2001
From: swaminathanmanish <126024920+swaminathanmanish@users.noreply.github.com>
Date: Mon, 15 Apr 2024 14:43:18 -0700
Subject: [PATCH 013/102] Using local copy of segment instead of downloading
from remote (#12863)
---
...aseMultipleSegmentsConversionExecutor.java | 26 +++++++++----------
1 file changed, 12 insertions(+), 14 deletions(-)
diff --git a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/BaseMultipleSegmentsConversionExecutor.java b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/BaseMultipleSegmentsConversionExecutor.java
index e7ef8a4eea66..9d9db049827b 100644
--- a/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/BaseMultipleSegmentsConversionExecutor.java
+++ b/pinot-plugins/pinot-minion-tasks/pinot-minion-builtin-tasks/src/main/java/org/apache/pinot/plugin/minion/tasks/BaseMultipleSegmentsConversionExecutor.java
@@ -54,6 +54,7 @@
import org.apache.pinot.segment.spi.index.metadata.SegmentMetadataImpl;
import org.apache.pinot.spi.auth.AuthProvider;
import org.apache.pinot.spi.config.table.TableType;
+import org.apache.pinot.spi.filesystem.LocalPinotFS;
import org.apache.pinot.spi.filesystem.PinotFS;
import org.apache.pinot.spi.ingestion.batch.BatchConfigProperties;
import org.apache.pinot.spi.ingestion.batch.spec.PinotClusterSpec;
@@ -78,6 +79,7 @@
public abstract class BaseMultipleSegmentsConversionExecutor extends BaseTaskExecutor {
private static final Logger LOGGER = LoggerFactory.getLogger(BaseMultipleSegmentsConversionExecutor.class);
private static final String CUSTOM_SEGMENT_UPLOAD_CONTEXT_LINEAGE_ENTRY_ID = "lineageEntryId";
+ private static final PinotFS LOCAL_PINOT_FS = new LocalPinotFS();
private static final int DEFUALT_PUSH_ATTEMPTS = 5;
private static final int DEFAULT_PUSH_PARALLELISM = 1;
@@ -285,14 +287,11 @@ public List executeTask(PinotTaskConfig pinotTaskConfig
String pushMode =
configs.getOrDefault(BatchConfigProperties.PUSH_MODE, BatchConfigProperties.SegmentPushType.TAR.name());
- URI outputSegmentTarURI;
if (BatchConfigProperties.SegmentPushType.valueOf(pushMode.toUpperCase())
!= BatchConfigProperties.SegmentPushType.TAR) {
- outputSegmentTarURI = moveSegmentToOutputPinotFS(configs, convertedTarredSegmentFile);
+ URI outputSegmentTarURI = moveSegmentToOutputPinotFS(configs, convertedTarredSegmentFile);
LOGGER.info("Moved generated segment from [{}] to location: [{}]", convertedTarredSegmentFile,
outputSegmentTarURI);
- } else {
- outputSegmentTarURI = convertedTarredSegmentFile.toURI();
}
List httpHeaders = new ArrayList<>();
@@ -316,7 +315,7 @@ public List executeTask(PinotTaskConfig pinotTaskConfig
List parameters = Arrays.asList(enableParallelPushProtectionParameter, tableNameParameter,
tableTypeParameter);
- pushSegment(tableNameParameter.getValue(), configs, outputSegmentTarURI, httpHeaders, parameters,
+ pushSegment(tableNameParameter.getValue(), configs, convertedTarredSegmentFile.toURI(), httpHeaders, parameters,
segmentConversionResult);
if (!FileUtils.deleteQuietly(convertedTarredSegmentFile)) {
LOGGER.warn("Failed to delete tarred converted segment: {}", convertedTarredSegmentFile.getAbsolutePath());
@@ -338,12 +337,12 @@ public List executeTask(PinotTaskConfig pinotTaskConfig
}
}
- private void pushSegment(String tableName, Map taskConfigs, URI outputSegmentTarURI,
+ private void pushSegment(String tableName, Map taskConfigs, URI localSegmentTarURI,
List headers, List parameters, SegmentConversionResult segmentConversionResult)
throws Exception {
String pushMode =
taskConfigs.getOrDefault(BatchConfigProperties.PUSH_MODE, BatchConfigProperties.SegmentPushType.TAR.name());
- LOGGER.info("Trying to push Pinot segment with push mode {} from {}", pushMode, outputSegmentTarURI);
+ LOGGER.info("Trying to push Pinot segment with push mode {} from {}", pushMode, localSegmentTarURI);
PushJobSpec pushJobSpec = new PushJobSpec();
pushJobSpec.setPushAttempts(DEFUALT_PUSH_ATTEMPTS);
@@ -356,7 +355,7 @@ private void pushSegment(String tableName, Map taskConfigs, URI
switch (BatchConfigProperties.SegmentPushType.valueOf(pushMode.toUpperCase())) {
case TAR:
- File tarFile = new File(outputSegmentTarURI);
+ File tarFile = new File(localSegmentTarURI);
String segmentName = segmentConversionResult.getSegmentName();
String tableNameWithType = segmentConversionResult.getTableNameWithType();
String uploadURL = taskConfigs.get(MinionConstants.UPLOAD_URL_KEY);
@@ -366,12 +365,11 @@ private void pushSegment(String tableName, Map taskConfigs, URI
case METADATA:
if (taskConfigs.containsKey(BatchConfigProperties.OUTPUT_SEGMENT_DIR_URI)) {
URI outputSegmentDirURI = URI.create(taskConfigs.get(BatchConfigProperties.OUTPUT_SEGMENT_DIR_URI));
- try (PinotFS outputFileFS = MinionTaskUtils.getOutputPinotFS(taskConfigs, outputSegmentDirURI)) {
- Map segmentUriToTarPathMap =
- SegmentPushUtils.getSegmentUriToTarPathMap(outputSegmentDirURI, pushJobSpec,
- new String[]{outputSegmentTarURI.toString()});
- SegmentPushUtils.sendSegmentUriAndMetadata(spec, outputFileFS, segmentUriToTarPathMap, headers, parameters);
- }
+ Map segmentUriToTarPathMap =
+ SegmentPushUtils.getSegmentUriToTarPathMap(outputSegmentDirURI, pushJobSpec,
+ new String[]{localSegmentTarURI.toString()});
+ // Use local FS to avoid copying segment from deep store.
+ SegmentPushUtils.sendSegmentUriAndMetadata(spec, LOCAL_PINOT_FS, segmentUriToTarPathMap, headers, parameters);
} else {
throw new RuntimeException("Output dir URI missing for metadata push");
}
From 2459cfce752aafd0ab6ff3d2f1eb98ac57b7efaf Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 15 Apr 2024 16:07:44 -0700
Subject: [PATCH 014/102] Bump slf4j.version from 2.0.12 to 2.0.13 (#12928)
---
pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pom.xml b/pom.xml
index 7f1819250b9a..22daf3bb303b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -159,7 +159,7 @@
1.8.0
0.20.0
2.23.1
- 2.0.12
+ 2.0.13
4.1.109.Final
1.0.4
1.19.0
From 684cece35eb790b971f87952c2f2e260b0539c2e Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 15 Apr 2024 16:07:59 -0700
Subject: [PATCH 015/102] Bump org.apache.maven.plugins:maven-jar-plugin from
3.3.0 to 3.4.0 (#12927)
---
pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pom.xml b/pom.xml
index 22daf3bb303b..7f8bbbb2ab68 100644
--- a/pom.xml
+++ b/pom.xml
@@ -130,7 +130,7 @@
org.apache.pinot.shaded
- 3.3.0
+ 3.4.0
1.11.3
1.13.1
From 013435a6a1ae883d6911ff8133a6111cec5c5b12 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 15 Apr 2024 16:08:56 -0700
Subject: [PATCH 016/102] Bump aws.sdk.version from 2.25.30 to 2.25.31 (#12926)
---
pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pom.xml b/pom.xml
index 7f8bbbb2ab68..0c51fca07432 100644
--- a/pom.xml
+++ b/pom.xml
@@ -168,7 +168,7 @@
0.15.0
0.4.4
4.2.2
- 2.25.30
+ 2.25.31
2.12.7
3.1.12
7.10.1
From edf9d535975426a106991e3f61cce29079b47539 Mon Sep 17 00:00:00 2001
From: "Xiaotian (Jackie) Jiang"
<17555551+Jackie-Jiang@users.noreply.github.com>
Date: Mon, 15 Apr 2024 16:58:11 -0700
Subject: [PATCH 017/102] Specify version for commons-validator (#12935)
---
pom.xml | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/pom.xml b/pom.xml
index 0c51fca07432..6636cdaa892c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -187,6 +187,7 @@
1.16.1
1.6.0
3.10.0
+ 1.8.0
2.6
3.2.2
@@ -728,6 +729,11 @@
commons-net
${commons-net.version}
+
+ commons-validator
+ commons-validator
+ ${commons-validator.version}
+
commons-lang
From c08ba2ccd52db1643dbf9ff89a5bcfe89edb6da0 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 15 Apr 2024 17:01:30 -0700
Subject: [PATCH 018/102] Bump org.apache.maven.scm:maven-scm-provider-gitexe
from 2.0.1 to 2.1.0 (#12925)
---
pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pom.xml b/pom.xml
index 6636cdaa892c..0b27acf9478f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1825,7 +1825,7 @@
org.apache.maven.scm
maven-scm-provider-gitexe
- 2.0.1
+ 2.1.0
From ec452a49f3c885308613bc45dfa44b48a16076ba Mon Sep 17 00:00:00 2001
From: "Xiaotian (Jackie) Jiang"
<17555551+Jackie-Jiang@users.noreply.github.com>
Date: Mon, 15 Apr 2024 17:02:18 -0700
Subject: [PATCH 019/102] Refine PeerServerSegmentFinder (#12933)
---
.../utils/fetcher/BaseSegmentFetcher.java | 9 +-
.../utils/fetcher/HttpSegmentFetcher.java | 28 ++--
.../core/util/PeerServerSegmentFinder.java | 101 +++++-------
.../utils/fetcher/HttpSegmentFetcherTest.java | 152 ++++++------------
.../PinotLLCRealtimeSegmentManager.java | 3 +-
.../PinotLLCRealtimeSegmentManagerTest.java | 91 ++++-------
.../data/manager/BaseTableDataManager.java | 9 +-
.../realtime/RealtimeTableDataManager.java | 28 ++--
.../manager/BaseTableDataManagerTest.java | 4 +-
.../util/PeerServerSegmentFinderTest.java | 128 +++++++--------
.../retry/ExponentialBackoffRetryPolicy.java | 6 +-
11 files changed, 220 insertions(+), 339 deletions(-)
diff --git a/pinot-common/src/main/java/org/apache/pinot/common/utils/fetcher/BaseSegmentFetcher.java b/pinot-common/src/main/java/org/apache/pinot/common/utils/fetcher/BaseSegmentFetcher.java
index d33c7ead4363..5fb82388f2b4 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/utils/fetcher/BaseSegmentFetcher.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/utils/fetcher/BaseSegmentFetcher.java
@@ -42,13 +42,13 @@ public abstract class BaseSegmentFetcher implements SegmentFetcher {
public static final String RETRY_DELAY_SCALE_FACTOR_CONFIG_KEY = "retry.delay.scale.factor";
public static final int DEFAULT_RETRY_COUNT = 3;
public static final int DEFAULT_RETRY_WAIT_MS = 100;
- public static final int DEFAULT_RETRY_DELAY_SCALE_FACTOR = 5;
+ public static final double DEFAULT_RETRY_DELAY_SCALE_FACTOR = 5;
protected final Logger _logger = LoggerFactory.getLogger(getClass().getSimpleName());
protected int _retryCount;
protected int _retryWaitMs;
- protected int _retryDelayScaleFactor;
+ protected double _retryDelayScaleFactor;
protected AuthProvider _authProvider;
@Override
@@ -58,9 +58,8 @@ public void init(PinotConfiguration config) {
_retryDelayScaleFactor = config.getProperty(RETRY_DELAY_SCALE_FACTOR_CONFIG_KEY, DEFAULT_RETRY_DELAY_SCALE_FACTOR);
_authProvider = AuthProviderUtils.extractAuthProvider(config, CommonConstants.KEY_OF_AUTH);
doInit(config);
- _logger
- .info("Initialized with retryCount: {}, retryWaitMs: {}, retryDelayScaleFactor: {}", _retryCount, _retryWaitMs,
- _retryDelayScaleFactor);
+ _logger.info("Initialized with retryCount: {}, retryWaitMs: {}, retryDelayScaleFactor: {}", _retryCount,
+ _retryWaitMs, _retryDelayScaleFactor);
}
/**
diff --git a/pinot-common/src/main/java/org/apache/pinot/common/utils/fetcher/HttpSegmentFetcher.java b/pinot-common/src/main/java/org/apache/pinot/common/utils/fetcher/HttpSegmentFetcher.java
index 170327dc5b14..6872ac771464 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/utils/fetcher/HttpSegmentFetcher.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/utils/fetcher/HttpSegmentFetcher.java
@@ -44,23 +44,16 @@
public class HttpSegmentFetcher extends BaseSegmentFetcher {
protected FileUploadDownloadClient _httpClient;
- @Override
- protected void doInit(PinotConfiguration config) {
- _httpClient = new FileUploadDownloadClient(HttpClientConfig.newBuilder(config).build());
- }
-
- public HttpSegmentFetcher() {
- }
-
@VisibleForTesting
- protected HttpSegmentFetcher(FileUploadDownloadClient httpClient, PinotConfiguration config) {
+ void setHttpClient(FileUploadDownloadClient httpClient) {
_httpClient = httpClient;
- _retryCount = config.getProperty(RETRY_COUNT_CONFIG_KEY, DEFAULT_RETRY_COUNT);
- _retryWaitMs = config.getProperty(RETRY_WAIT_MS_CONFIG_KEY, DEFAULT_RETRY_WAIT_MS);
- _retryDelayScaleFactor = config.getProperty(RETRY_DELAY_SCALE_FACTOR_CONFIG_KEY, DEFAULT_RETRY_DELAY_SCALE_FACTOR);
- _logger
- .info("Initialized with retryCount: {}, retryWaitMs: {}, retryDelayScaleFactor: {}", _retryCount, _retryWaitMs,
- _retryDelayScaleFactor);
+ }
+
+ @Override
+ protected void doInit(PinotConfiguration config) {
+ if (_httpClient == null) {
+ _httpClient = new FileUploadDownloadClient(HttpClientConfig.newBuilder(config).build());
+ }
}
@Override
@@ -87,9 +80,8 @@ public void fetchSegmentToLocal(URI downloadURI, File dest)
httpHeaders.add(new BasicHeader(HttpHeaders.HOST, hostName + ":" + port));
}
int statusCode = _httpClient.downloadFile(uri, dest, _authProvider, httpHeaders);
- _logger
- .info("Downloaded segment from: {} to: {} of size: {}; Response status code: {}", uri, dest, dest.length(),
- statusCode);
+ _logger.info("Downloaded segment from: {} to: {} of size: {}; Response status code: {}", uri, dest,
+ dest.length(), statusCode);
return true;
} catch (HttpErrorStatusException e) {
int statusCode = e.getStatusCode();
diff --git a/pinot-common/src/main/java/org/apache/pinot/core/util/PeerServerSegmentFinder.java b/pinot-common/src/main/java/org/apache/pinot/core/util/PeerServerSegmentFinder.java
index e2c9d509f60a..7f26d759352d 100644
--- a/pinot-common/src/main/java/org/apache/pinot/core/util/PeerServerSegmentFinder.java
+++ b/pinot-common/src/main/java/org/apache/pinot/core/util/PeerServerSegmentFinder.java
@@ -19,21 +19,19 @@
package org.apache.pinot.core.util;
import java.net.URI;
-import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
-import org.apache.commons.collections.ListUtils;
import org.apache.helix.HelixAdmin;
import org.apache.helix.HelixManager;
import org.apache.helix.model.ExternalView;
import org.apache.helix.model.InstanceConfig;
-import org.apache.pinot.common.utils.LLCSegmentName;
-import org.apache.pinot.common.utils.helix.HelixHelper;
-import org.apache.pinot.spi.config.table.TableType;
import org.apache.pinot.spi.utils.CommonConstants;
+import org.apache.pinot.spi.utils.CommonConstants.Helix.Instance;
+import org.apache.pinot.spi.utils.CommonConstants.Helix.StateModel.SegmentStateModel;
+import org.apache.pinot.spi.utils.CommonConstants.Server;
import org.apache.pinot.spi.utils.StringUtil;
-import org.apache.pinot.spi.utils.builder.TableNameBuilder;
+import org.apache.pinot.spi.utils.retry.AttemptsExceededException;
import org.apache.pinot.spi.utils.retry.RetryPolicies;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -47,93 +45,74 @@ public class PeerServerSegmentFinder {
private PeerServerSegmentFinder() {
}
- private static final Logger _logger = LoggerFactory.getLogger(PeerServerSegmentFinder.class);
+ private static final Logger LOGGER = LoggerFactory.getLogger(PeerServerSegmentFinder.class);
private static final int MAX_NUM_ATTEMPTS = 5;
private static final int INITIAL_DELAY_MS = 500;
private static final double DELAY_SCALE_FACTOR = 2;
/**
- *
- * @param segmentName
- * @param downloadScheme Can be either http or https.
- * @param helixManager
- * @return a list of uri strings of the form http(s)://hostname:port/segments/tablenameWithType/segmentName
- * for the servers hosting ONLINE segments; empty list if no such server found.
+ * Returns a list of URIs of the form 'http(s)://hostname:port/segments/tableNameWithType/segmentName' for the servers
+ * hosting ONLINE segments; empty list if no such server found. The download scheme can be either 'http' or 'https'.
*/
- public static List getPeerServerURIs(String segmentName, String downloadScheme, HelixManager helixManager) {
- LLCSegmentName llcSegmentName = new LLCSegmentName(segmentName);
- String tableNameWithType =
- TableNameBuilder.forType(TableType.REALTIME).tableNameWithType(llcSegmentName.getTableName());
- return getPeerServerURIs(segmentName, downloadScheme, helixManager, tableNameWithType);
- }
-
- public static List getPeerServerURIs(String segmentName, String downloadScheme,
- HelixManager helixManager, String tableNameWithType) {
+ public static List getPeerServerURIs(HelixManager helixManager, String tableNameWithType, String segmentName,
+ String downloadScheme) {
HelixAdmin helixAdmin = helixManager.getClusterManagmentTool();
String clusterName = helixManager.getClusterName();
- if (clusterName == null) {
- _logger.error("ClusterName not found");
- return ListUtils.EMPTY_LIST;
- }
- final List onlineServerURIs = new ArrayList<>();
+ List onlineServerURIs = new ArrayList<>();
try {
RetryPolicies.exponentialBackoffRetryPolicy(MAX_NUM_ATTEMPTS, INITIAL_DELAY_MS, DELAY_SCALE_FACTOR)
.attempt(() -> {
- getOnlineServersFromExternalView(segmentName, downloadScheme, tableNameWithType, helixAdmin, clusterName,
+ getOnlineServersFromExternalView(helixAdmin, clusterName, tableNameWithType, segmentName, downloadScheme,
onlineServerURIs);
return !onlineServerURIs.isEmpty();
});
+ } catch (AttemptsExceededException e) {
+ LOGGER.error("Failed to find ONLINE servers for segment: {} in table: {} after {} attempts", segmentName,
+ tableNameWithType, MAX_NUM_ATTEMPTS);
} catch (Exception e) {
- _logger.error("Failure in getting online servers for segment {}", segmentName, e);
+ LOGGER.error("Caught exception while getting peer server URIs for segment: {} in table: {}", segmentName,
+ tableNameWithType, e);
}
return onlineServerURIs;
}
- private static void getOnlineServersFromExternalView(String segmentName, String downloadScheme,
- String tableNameWithType, HelixAdmin helixAdmin, String clusterName, List onlineServerURIs) {
- ExternalView externalViewForResource =
- HelixHelper.getExternalViewForResource(helixAdmin, clusterName, tableNameWithType);
- if (externalViewForResource == null) {
- _logger.warn("External View not found for table {}", tableNameWithType);
+ private static void getOnlineServersFromExternalView(HelixAdmin helixAdmin, String clusterName,
+ String tableNameWithType, String segmentName, String downloadScheme, List onlineServerURIs)
+ throws Exception {
+ ExternalView externalView = helixAdmin.getResourceExternalView(clusterName, tableNameWithType);
+ if (externalView == null) {
+ LOGGER.warn("Failed to find external view for table: {}", tableNameWithType);
return;
}
// Find out the ONLINE servers serving the segment.
- Map instanceToStateMap = externalViewForResource.getStateMap(segmentName);
- for (Map.Entry instanceState : instanceToStateMap.entrySet()) {
- if ("ONLINE".equals(instanceState.getValue())) {
+ Map instanceStateMap = externalView.getStateMap(segmentName);
+ if (instanceStateMap == null) {
+ LOGGER.warn("Failed to find segment: {} in table: {}", segmentName, tableNameWithType);
+ return;
+ }
+ for (Map.Entry instanceState : instanceStateMap.entrySet()) {
+ if (SegmentStateModel.ONLINE.equals(instanceState.getValue())) {
String instanceId = instanceState.getKey();
- _logger.info("Found ONLINE server {} for segment {}.", instanceId, segmentName);
+ LOGGER.info("Found ONLINE server: {} for segment: {} in table: {}", instanceId, segmentName, tableNameWithType);
InstanceConfig instanceConfig = helixAdmin.getInstanceConfig(clusterName, instanceId);
String hostName = instanceConfig.getHostName();
- int port = getServerAdminPort(helixAdmin, clusterName, instanceId, downloadScheme);
- try {
- onlineServerURIs.add(new URI(StringUtil
- .join("/", downloadScheme + "://" + hostName + ":" + port, "segments", tableNameWithType, segmentName)));
- } catch (URISyntaxException e) {
- _logger.warn("Error in uri syntax: ", e);
- }
+ String adminPortKey = getAdminPortKey(downloadScheme);
+ int port = instanceConfig.getRecord().getIntField(adminPortKey, Server.DEFAULT_ADMIN_API_PORT);
+ onlineServerURIs.add(new URI(
+ StringUtil.join("/", downloadScheme + "://" + hostName + ":" + port, "segments", tableNameWithType,
+ segmentName)));
}
}
}
- private static int getServerAdminPort(HelixAdmin helixAdmin, String clusterName, String instanceId,
- String downloadScheme) {
- try {
- return Integer.parseInt(HelixHelper.getInstanceConfigsMapFor(instanceId, clusterName, helixAdmin)
- .get(getServerAdminPortKey(downloadScheme)));
- } catch (Exception e) {
- _logger.warn("Failed to retrieve ADMIN PORT for instanceId {} in the cluster {} ", instanceId, clusterName, e);
- return CommonConstants.Helix.DEFAULT_SERVER_NETTY_PORT;
- }
- }
-
- private static String getServerAdminPortKey(String downloadScheme) {
+ private static String getAdminPortKey(String downloadScheme) {
switch (downloadScheme) {
- case CommonConstants.HTTPS_PROTOCOL:
- return CommonConstants.Helix.Instance.ADMIN_HTTPS_PORT_KEY;
case CommonConstants.HTTP_PROTOCOL:
+ return Instance.ADMIN_PORT_KEY;
+ case CommonConstants.HTTPS_PROTOCOL:
+ return Instance.ADMIN_HTTPS_PORT_KEY;
default:
- return CommonConstants.Helix.Instance.ADMIN_PORT_KEY;
+ throw new IllegalArgumentException("Unsupported download scheme: " + downloadScheme);
}
}
}
diff --git a/pinot-common/src/test/java/org/apache/pinot/common/utils/fetcher/HttpSegmentFetcherTest.java b/pinot-common/src/test/java/org/apache/pinot/common/utils/fetcher/HttpSegmentFetcherTest.java
index 3159168dab95..1a567901b947 100644
--- a/pinot-common/src/test/java/org/apache/pinot/common/utils/fetcher/HttpSegmentFetcherTest.java
+++ b/pinot-common/src/test/java/org/apache/pinot/common/utils/fetcher/HttpSegmentFetcherTest.java
@@ -19,153 +19,97 @@
package org.apache.pinot.common.utils.fetcher;
import java.io.File;
-import java.io.IOException;
import java.net.URI;
-import java.net.URISyntaxException;
-import java.util.ArrayList;
import java.util.List;
-import org.apache.helix.HelixManager;
-import org.apache.pinot.common.exception.HttpErrorStatusException;
+import java.util.function.Supplier;
+import org.apache.commons.io.FileUtils;
import org.apache.pinot.common.utils.FileUploadDownloadClient;
-import org.apache.pinot.core.util.PeerServerSegmentFinder;
import org.apache.pinot.spi.env.PinotConfiguration;
import org.apache.pinot.spi.utils.retry.AttemptsExceededException;
-import org.mockito.MockedStatic;
-import org.testng.Assert;
-import org.testng.annotations.BeforeSuite;
+import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.mockStatic;
import static org.mockito.Mockito.when;
public class HttpSegmentFetcherTest {
- private MockedStatic _peerServerSegmentFinder = mockStatic(PeerServerSegmentFinder.class);
+ private static final String SEGMENT_NAME = "testSegment";
+ private static final File SEGMENT_FILE = new File(FileUtils.getTempDirectory(), SEGMENT_NAME);
+
private PinotConfiguration _fetcherConfig;
- @BeforeSuite
- public void initTest() {
+ @BeforeClass
+ public void setUp() {
_fetcherConfig = new PinotConfiguration();
_fetcherConfig.setProperty(BaseSegmentFetcher.RETRY_COUNT_CONFIG_KEY, 3);
+ _fetcherConfig.setProperty(BaseSegmentFetcher.RETRY_WAIT_MS_CONFIG_KEY, 10);
+ _fetcherConfig.setProperty(BaseSegmentFetcher.RETRY_DELAY_SCALE_FACTOR_CONFIG_KEY, 1.1);
+ }
+
+ private HttpSegmentFetcher getSegmentFetcher(FileUploadDownloadClient client) {
+ HttpSegmentFetcher segmentFetcher = new HttpSegmentFetcher();
+ segmentFetcher.setHttpClient(client);
+ segmentFetcher.init(_fetcherConfig);
+ return segmentFetcher;
}
@Test
public void testFetchSegmentToLocalSucceedAtFirstAttempt()
- throws URISyntaxException, IOException, HttpErrorStatusException {
+ throws Exception {
FileUploadDownloadClient client = mock(FileUploadDownloadClient.class);
when(client.downloadFile(any(), any(), any())).thenReturn(200);
- HttpSegmentFetcher httpSegmentFetcher = new HttpSegmentFetcher(client, _fetcherConfig);
- HelixManager helixManager = mock(HelixManager.class);
-
- List uris = new ArrayList<>();
- uris.add(new URI("http://h1:8080"));
- uris.add(new URI("http://h2:8080"));
- _peerServerSegmentFinder.when(() -> PeerServerSegmentFinder.getPeerServerURIs(any(), any(), any()))
- .thenReturn(uris);
- try {
- httpSegmentFetcher.fetchSegmentToLocal("seg",
- () -> PeerServerSegmentFinder.getPeerServerURIs("seg", "http", helixManager), new File("/file"));
- } catch (Exception e) {
- // If we reach here, the download fails.
- Assert.assertTrue(false, "Download segment failed");
- Assert.assertTrue(e instanceof AttemptsExceededException);
- }
- _peerServerSegmentFinder.reset();
+ HttpSegmentFetcher segmentFetcher = getSegmentFetcher(client);
+ List uris = List.of(new URI("http://h1:8080"), new URI("http://h2:8080"));
+ segmentFetcher.fetchSegmentToLocal(SEGMENT_NAME, () -> uris, SEGMENT_FILE);
}
- @Test
+ @Test(expectedExceptions = AttemptsExceededException.class)
public void testFetchSegmentToLocalAllDownloadAttemptsFailed()
- throws URISyntaxException, IOException, HttpErrorStatusException {
+ throws Exception {
FileUploadDownloadClient client = mock(FileUploadDownloadClient.class);
- // All three attempts fails.
- when(client.downloadFile(any(), any(), any())).thenReturn(300).thenReturn(300).thenReturn(300);
- HttpSegmentFetcher httpSegmentFetcher = new HttpSegmentFetcher(client, _fetcherConfig);
- HelixManager helixManager = mock(HelixManager.class);
- List uris = new ArrayList<>();
- uris.add(new URI("http://h1:8080"));
- uris.add(new URI("http://h2:8080"));
-
- _peerServerSegmentFinder.when(() -> PeerServerSegmentFinder.getPeerServerURIs(any(), any(), any()))
- .thenReturn(uris);
- try {
- httpSegmentFetcher.fetchSegmentToLocal("seg",
- () -> PeerServerSegmentFinder.getPeerServerURIs("seg", "http", helixManager), new File("/file"));
- // The test should not reach here because the fetch will throw exception.
- Assert.assertTrue(false, "Download segment failed");
- } catch (Exception e) {
- // If we reach here, the download fails.
- Assert.assertTrue(true, "Download segment failed");
- }
+ // All attempts failed
+ when(client.downloadFile(any(), any(), any())).thenReturn(300);
+ HttpSegmentFetcher segmentFetcher = getSegmentFetcher(client);
+ List uris = List.of(new URI("http://h1:8080"), new URI("http://h2:8080"));
+ segmentFetcher.fetchSegmentToLocal(SEGMENT_NAME, () -> uris, SEGMENT_FILE);
}
@Test
public void testFetchSegmentToLocalSuccessAfterRetry()
- throws URISyntaxException, IOException, HttpErrorStatusException {
+ throws Exception {
FileUploadDownloadClient client = mock(FileUploadDownloadClient.class);
- // the first two attempts failed until the last attempt succeeds
+ // The first two attempts failed and the last attempt succeeded
when(client.downloadFile(any(), any(), any())).thenReturn(300).thenReturn(300).thenReturn(200);
- HttpSegmentFetcher httpSegmentFetcher = new HttpSegmentFetcher(client, _fetcherConfig);
- HelixManager helixManager = mock(HelixManager.class);
- List uris = new ArrayList<>();
- uris.add(new URI("http://h1:8080"));
- uris.add(new URI("http://h2:8080"));
-
- _peerServerSegmentFinder.when(() -> PeerServerSegmentFinder.getPeerServerURIs(any(), any(), any()))
- .thenReturn(uris);
- try {
- httpSegmentFetcher.fetchSegmentToLocal("seg",
- () -> PeerServerSegmentFinder.getPeerServerURIs("seg", "http", helixManager), new File("/file"));
- } catch (Exception e) {
- // If we reach here, the download fails.
- Assert.assertTrue(false, "Download segment failed");
- }
+ HttpSegmentFetcher segmentFetcher = getSegmentFetcher(client);
+ List uris = List.of(new URI("http://h1:8080"), new URI("http://h2:8080"));
+ segmentFetcher.fetchSegmentToLocal(SEGMENT_NAME, () -> uris, SEGMENT_FILE);
}
@Test
public void testFetchSegmentToLocalSuccessAfterFirstTwoAttemptsFoundNoPeerServers()
- throws URISyntaxException, IOException, HttpErrorStatusException {
+ throws Exception {
FileUploadDownloadClient client = mock(FileUploadDownloadClient.class);
- // The download always succeeds.
+ // The download always succeeds
when(client.downloadFile(any(), any(), any())).thenReturn(200);
- HttpSegmentFetcher httpSegmentFetcher = new HttpSegmentFetcher(client, _fetcherConfig);
- HelixManager helixManager = mock(HelixManager.class);
- List uris = new ArrayList<>();
- uris.add(new URI("http://h1:8080"));
- uris.add(new URI("http://h2:8080"));
-
- // The first two attempts find NO peers hosting the segment but the last one found two servers.
- _peerServerSegmentFinder.when(() -> PeerServerSegmentFinder.getPeerServerURIs(any(), any(), any()))
- .thenReturn(List.of()).thenReturn(List.of()).thenReturn(uris);
- try {
- httpSegmentFetcher.fetchSegmentToLocal("seg",
- () -> PeerServerSegmentFinder.getPeerServerURIs("seg", "http", helixManager), new File("/file"));
- } catch (Exception e) {
- // If we reach here, the download fails.
- Assert.assertTrue(false, "Download segment failed");
- }
+ HttpSegmentFetcher segmentFetcher = getSegmentFetcher(client);
+ List uris = List.of(new URI("http://h1:8080"), new URI("http://h2:8080"));
+ // The first two attempts found NO peers hosting the segment, and the last one found two servers
+ //noinspection unchecked
+ Supplier> uriSupplier = mock(Supplier.class);
+ when(uriSupplier.get()).thenReturn(List.of()).thenReturn(List.of()).thenReturn(uris);
+ segmentFetcher.fetchSegmentToLocal(SEGMENT_NAME, uriSupplier, SEGMENT_FILE);
}
- @Test
+ @Test(expectedExceptions = AttemptsExceededException.class)
public void testFetchSegmentToLocalFailureWithNoPeerServers()
- throws IOException, HttpErrorStatusException {
+ throws Exception {
FileUploadDownloadClient client = mock(FileUploadDownloadClient.class);
- // the download always succeeds.
+ // The download always succeeds
when(client.downloadFile(any(), any(), any())).thenReturn(200);
- HttpSegmentFetcher httpSegmentFetcher = new HttpSegmentFetcher(client, _fetcherConfig);
- HelixManager helixManager = mock(HelixManager.class);
-
- _peerServerSegmentFinder.when(() -> PeerServerSegmentFinder.getPeerServerURIs(any(), any(), any()))
- .thenReturn(List.of()).thenReturn(List.of()).thenReturn(List.of());
- try {
- httpSegmentFetcher.fetchSegmentToLocal("seg",
- () -> PeerServerSegmentFinder.getPeerServerURIs("seg", "http", helixManager), new File("/file"));
- // The test should not reach here because the fetch will throw exception.
- Assert.assertTrue(false, "Download segment failed");
- } catch (Exception e) {
- Assert.assertTrue(true, "Download segment failed");
- Assert.assertTrue(e instanceof AttemptsExceededException);
- }
+ HttpSegmentFetcher segmentFetcher = getSegmentFetcher(client);
+ List uris = List.of();
+ segmentFetcher.fetchSegmentToLocal(SEGMENT_NAME, () -> uris, SEGMENT_FILE);
}
}
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
index 25e40084ab39..838a03a268a4 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManager.java
@@ -1483,7 +1483,8 @@ public void uploadToDeepStoreIfMissing(TableConfig tableConfig, List peerSegmentURIs =
- PeerServerSegmentFinder.getPeerServerURIs(segmentName, CommonConstants.HTTP_PROTOCOL, _helixManager);
+ PeerServerSegmentFinder.getPeerServerURIs(_helixManager, realtimeTableName, segmentName,
+ CommonConstants.HTTP_PROTOCOL);
if (peerSegmentURIs.isEmpty()) {
throw new IllegalStateException(
String.format("Failed to upload segment %s to deep store because no online replica is found",
diff --git a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java
index 60b83ba24ae1..f0496a8ee7e2 100644
--- a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java
+++ b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/realtime/PinotLLCRealtimeSegmentManagerTest.java
@@ -43,7 +43,6 @@
import org.apache.helix.HelixAdmin;
import org.apache.helix.HelixManager;
import org.apache.helix.model.ExternalView;
-import org.apache.helix.model.HelixConfigScope;
import org.apache.helix.model.IdealState;
import org.apache.helix.model.InstanceConfig;
import org.apache.helix.store.zk.ZkHelixPropertyStore;
@@ -75,10 +74,10 @@
import org.apache.pinot.spi.stream.StreamConfig;
import org.apache.pinot.spi.utils.CommonConstants;
import org.apache.pinot.spi.utils.CommonConstants.Helix;
+import org.apache.pinot.spi.utils.CommonConstants.Helix.Instance;
import org.apache.pinot.spi.utils.CommonConstants.Helix.StateModel.SegmentStateModel;
import org.apache.pinot.spi.utils.CommonConstants.Segment.Realtime.Status;
import org.apache.pinot.spi.utils.IngestionConfigUtils;
-import org.apache.pinot.spi.utils.StringUtil;
import org.apache.pinot.spi.utils.builder.TableConfigBuilder;
import org.apache.pinot.spi.utils.builder.TableNameBuilder;
import org.apache.pinot.util.TestUtils;
@@ -91,8 +90,6 @@
import static org.apache.pinot.controller.ControllerConf.ControllerPeriodicTasksConf.ENABLE_TMP_SEGMENT_ASYNC_DELETION;
import static org.apache.pinot.controller.ControllerConf.ControllerPeriodicTasksConf.TMP_SEGMENT_RETENTION_IN_SECONDS;
import static org.apache.pinot.spi.utils.CommonConstants.Segment.METADATA_URI_FOR_PEER_DOWNLOAD;
-import static org.mockito.ArgumentMatchers.any;
-import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import static org.testng.Assert.*;
@@ -101,6 +98,7 @@
public class PinotLLCRealtimeSegmentManagerTest {
private static final File TEMP_DIR = new File(FileUtils.getTempDirectory(), "PinotLLCRealtimeSegmentManagerTest");
private static final String SCHEME = "file:";
+ private static final String CLUSTER_NAME = "testCluster";
private static final String RAW_TABLE_NAME = "testTable";
private static final String REALTIME_TABLE_NAME = TableNameBuilder.REALTIME.tableNameWithType(RAW_TABLE_NAME);
@@ -927,13 +925,13 @@ public void testUploadToSegmentStore()
(ZkHelixPropertyStore) mock(ZkHelixPropertyStore.class);
when(pinotHelixResourceManager.getHelixZkManager()).thenReturn(helixManager);
when(helixManager.getClusterManagmentTool()).thenReturn(helixAdmin);
- when(helixManager.getClusterName()).thenReturn("cluster_name");
+ when(helixManager.getClusterName()).thenReturn(CLUSTER_NAME);
when(pinotHelixResourceManager.getPropertyStore()).thenReturn(zkHelixPropertyStore);
// init fake PinotLLCRealtimeSegmentManager
ControllerConf controllerConfig = new ControllerConf();
- controllerConfig.setProperty(
- ControllerConf.ControllerPeriodicTasksConf.ENABLE_DEEP_STORE_RETRY_UPLOAD_LLC_SEGMENT, true);
+ controllerConfig.setProperty(ControllerConf.ControllerPeriodicTasksConf.ENABLE_DEEP_STORE_RETRY_UPLOAD_LLC_SEGMENT,
+ true);
controllerConfig.setDataDir(TEMP_DIR.toString());
FakePinotLLCRealtimeSegmentManager segmentManager =
new FakePinotLLCRealtimeSegmentManager(pinotHelixResourceManager, controllerConfig);
@@ -946,19 +944,12 @@ public void testUploadToSegmentStore()
segmentsValidationAndRetentionConfig.setRetentionTimeUnit(TimeUnit.DAYS.toString());
segmentsValidationAndRetentionConfig.setRetentionTimeValue("3");
segmentManager._tableConfig.setValidationConfig(segmentsValidationAndRetentionConfig);
- List segmentsZKMetadata =
- new ArrayList<>(segmentManager._segmentZKMetadataMap.values());
+ List segmentsZKMetadata = new ArrayList<>(segmentManager._segmentZKMetadataMap.values());
Assert.assertEquals(segmentsZKMetadata.size(), 5);
// Set up external view for this table
ExternalView externalView = new ExternalView(REALTIME_TABLE_NAME);
- when(helixAdmin.getResourceExternalView("cluster_name", REALTIME_TABLE_NAME))
- .thenReturn(externalView);
- when(helixAdmin.getConfigKeys(any(HelixConfigScope.class))).thenReturn(new ArrayList<>());
- String adminPort = "2077";
- Map instanceConfigMap = new HashMap<>();
- instanceConfigMap.put(CommonConstants.Helix.Instance.ADMIN_PORT_KEY, adminPort);
- when(helixAdmin.getConfig(any(HelixConfigScope.class), any(List.class))).thenReturn(instanceConfigMap);
+ when(helixAdmin.getResourceExternalView(CLUSTER_NAME, REALTIME_TABLE_NAME)).thenReturn(externalView);
// Change 1st segment status to be DONE, but with default peer download url.
// Verify later the download url is fixed after upload success.
@@ -966,28 +957,26 @@ public void testUploadToSegmentStore()
segmentsZKMetadata.get(0).setDownloadUrl(METADATA_URI_FOR_PEER_DOWNLOAD);
// set up the external view for 1st segment
String instance0 = "instance0";
+ int adminPort = 2077;
externalView.setState(segmentsZKMetadata.get(0).getSegmentName(), instance0, "ONLINE");
InstanceConfig instanceConfig0 = new InstanceConfig(instance0);
instanceConfig0.setHostName(instance0);
- when(helixAdmin.getInstanceConfig(any(String.class), eq(instance0))).thenReturn(instanceConfig0);
+ instanceConfig0.getRecord().setIntField(Instance.ADMIN_PORT_KEY, adminPort);
+ when(helixAdmin.getInstanceConfig(CLUSTER_NAME, instance0)).thenReturn(instanceConfig0);
// mock the request/response for 1st segment upload
- String serverUploadRequestUrl0 = StringUtil
- .join("/",
- CommonConstants.HTTP_PROTOCOL + "://" + instance0 + ":" + adminPort,
- "segments",
- REALTIME_TABLE_NAME,
- segmentsZKMetadata.get(0).getSegmentName(),
- "upload") + "?uploadTimeoutMs=-1";
+ String serverUploadRequestUrl0 =
+ String.format("http://%s:%d/segments/%s/%s/upload?uploadTimeoutMs=-1", instance0, adminPort,
+ REALTIME_TABLE_NAME, segmentsZKMetadata.get(0).getSegmentName());
// tempSegmentFileLocation is the location where the segment uploader will upload the segment. This usually ends
// with a random UUID
File tempSegmentFileLocation = new File(TEMP_DIR, segmentsZKMetadata.get(0).getSegmentName() + UUID.randomUUID());
FileUtils.write(tempSegmentFileLocation, "test");
// After the deep-store retry task gets the segment location returned by Pinot server, it will move the segment to
// its final location. This is the expected segment location.
- String expectedSegmentLocation = segmentManager.createSegmentPath(RAW_TABLE_NAME,
- segmentsZKMetadata.get(0).getSegmentName()).toString();
- when(segmentManager._mockedFileUploadDownloadClient
- .uploadToSegmentStore(serverUploadRequestUrl0)).thenReturn(tempSegmentFileLocation.getPath());
+ String expectedSegmentLocation =
+ segmentManager.createSegmentPath(RAW_TABLE_NAME, segmentsZKMetadata.get(0).getSegmentName()).toString();
+ when(segmentManager._mockedFileUploadDownloadClient.uploadToSegmentStore(serverUploadRequestUrl0)).thenReturn(
+ tempSegmentFileLocation.getPath());
// Change 2nd segment status to be DONE, but with default peer download url.
// Verify later the download url isn't fixed after upload failure.
@@ -998,25 +987,20 @@ public void testUploadToSegmentStore()
externalView.setState(segmentsZKMetadata.get(1).getSegmentName(), instance1, "ONLINE");
InstanceConfig instanceConfig1 = new InstanceConfig(instance1);
instanceConfig1.setHostName(instance1);
- when(helixAdmin.getInstanceConfig(any(String.class), eq(instance1))).thenReturn(instanceConfig1);
+ instanceConfig1.getRecord().setIntField(Instance.ADMIN_PORT_KEY, adminPort);
+ when(helixAdmin.getInstanceConfig(CLUSTER_NAME, instance1)).thenReturn(instanceConfig1);
// mock the request/response for 2nd segment upload
- String serverUploadRequestUrl1 = StringUtil
- .join("/",
- CommonConstants.HTTP_PROTOCOL + "://" + instance1 + ":" + adminPort,
- "segments",
- REALTIME_TABLE_NAME,
- segmentsZKMetadata.get(1).getSegmentName(),
- "upload") + "?uploadTimeoutMs=-1";
- when(segmentManager._mockedFileUploadDownloadClient
- .uploadToSegmentStore(serverUploadRequestUrl1))
- .thenThrow(new HttpErrorStatusException(
- "failed to upload segment", Response.Status.INTERNAL_SERVER_ERROR.getStatusCode()));
+ String serverUploadRequestUrl1 =
+ String.format("http://%s:%d/segments/%s/%s/upload?uploadTimeoutMs=-1", instance1, adminPort,
+ REALTIME_TABLE_NAME, segmentsZKMetadata.get(1).getSegmentName());
+ when(segmentManager._mockedFileUploadDownloadClient.uploadToSegmentStore(serverUploadRequestUrl1)).thenThrow(
+ new HttpErrorStatusException("failed to upload segment",
+ Response.Status.INTERNAL_SERVER_ERROR.getStatusCode()));
// Change 3rd segment status to be DONE, but with default peer download url.
// Verify later the download url isn't fixed because no ONLINE replica found in any server.
segmentsZKMetadata.get(2).setStatus(Status.DONE);
- segmentsZKMetadata.get(2).setDownloadUrl(
- METADATA_URI_FOR_PEER_DOWNLOAD);
+ segmentsZKMetadata.get(2).setDownloadUrl(METADATA_URI_FOR_PEER_DOWNLOAD);
// set up the external view for 3rd segment
String instance2 = "instance2";
externalView.setState(segmentsZKMetadata.get(2).getSegmentName(), instance2, "OFFLINE");
@@ -1029,11 +1013,9 @@ public void testUploadToSegmentStore()
// Keep 5th segment status as IN_PROGRESS.
- List segmentNames = segmentsZKMetadata.stream()
- .map(SegmentZKMetadata::getSegmentName).collect(Collectors.toList());
- when(pinotHelixResourceManager.getTableConfig(REALTIME_TABLE_NAME))
- .thenReturn(segmentManager._tableConfig);
-
+ List segmentNames =
+ segmentsZKMetadata.stream().map(SegmentZKMetadata::getSegmentName).collect(Collectors.toList());
+ when(pinotHelixResourceManager.getTableConfig(REALTIME_TABLE_NAME)).thenReturn(segmentManager._tableConfig);
// Verify the result
segmentManager.uploadToDeepStoreIfMissing(segmentManager._tableConfig, segmentsZKMetadata);
@@ -1042,23 +1024,18 @@ public void testUploadToSegmentStore()
TestUtils.waitForCondition(aVoid -> segmentManager.deepStoreUploadExecutorPendingSegmentsIsEmpty(), 30_000L,
"Timed out waiting for upload retry tasks to finish");
- assertEquals(
- segmentManager.getSegmentZKMetadata(REALTIME_TABLE_NAME, segmentNames.get(0), null).getDownloadUrl(),
+ assertEquals(segmentManager.getSegmentZKMetadata(REALTIME_TABLE_NAME, segmentNames.get(0), null).getDownloadUrl(),
expectedSegmentLocation);
assertFalse(tempSegmentFileLocation.exists(),
"Deep-store retry task should move the file from temp location to permanent location");
- assertEquals(
- segmentManager.getSegmentZKMetadata(REALTIME_TABLE_NAME, segmentNames.get(1), null).getDownloadUrl(),
+ assertEquals(segmentManager.getSegmentZKMetadata(REALTIME_TABLE_NAME, segmentNames.get(1), null).getDownloadUrl(),
METADATA_URI_FOR_PEER_DOWNLOAD);
- assertEquals(
- segmentManager.getSegmentZKMetadata(REALTIME_TABLE_NAME, segmentNames.get(2), null).getDownloadUrl(),
+ assertEquals(segmentManager.getSegmentZKMetadata(REALTIME_TABLE_NAME, segmentNames.get(2), null).getDownloadUrl(),
METADATA_URI_FOR_PEER_DOWNLOAD);
- assertEquals(
- segmentManager.getSegmentZKMetadata(REALTIME_TABLE_NAME, segmentNames.get(3), null).getDownloadUrl(),
+ assertEquals(segmentManager.getSegmentZKMetadata(REALTIME_TABLE_NAME, segmentNames.get(3), null).getDownloadUrl(),
defaultDownloadUrl);
- assertNull(
- segmentManager.getSegmentZKMetadata(REALTIME_TABLE_NAME, segmentNames.get(4), null).getDownloadUrl());
+ assertNull(segmentManager.getSegmentZKMetadata(REALTIME_TABLE_NAME, segmentNames.get(4), null).getDownloadUrl());
}
@Test
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/BaseTableDataManager.java b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/BaseTableDataManager.java
index c46a85690dc5..1237db547a08 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/BaseTableDataManager.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/BaseTableDataManager.java
@@ -153,6 +153,13 @@ public void init(InstanceDataManagerConfig instanceDataManagerConfig, TableConfi
if (_peerDownloadScheme == null) {
_peerDownloadScheme = instanceDataManagerConfig.getSegmentPeerDownloadScheme();
}
+ if (_peerDownloadScheme != null) {
+ _peerDownloadScheme = _peerDownloadScheme.toLowerCase();
+ Preconditions.checkState(
+ CommonConstants.HTTP_PROTOCOL.equals(_peerDownloadScheme) || CommonConstants.HTTPS_PROTOCOL.equals(
+ _peerDownloadScheme), "Unsupported peer download scheme: %s for table: %s", _peerDownloadScheme,
+ _tableNameWithType);
+ }
_streamSegmentDownloadUntarRateLimitBytesPerSec =
instanceDataManagerConfig.getStreamSegmentDownloadUntarRateLimit();
@@ -691,7 +698,7 @@ protected void downloadFromPeersWithoutStreaming(String segmentName, SegmentZKMe
throws Exception {
Preconditions.checkState(_peerDownloadScheme != null, "Download peers require non null peer download scheme");
List peerSegmentURIs =
- PeerServerSegmentFinder.getPeerServerURIs(segmentName, _peerDownloadScheme, _helixManager, _tableNameWithType);
+ PeerServerSegmentFinder.getPeerServerURIs(_helixManager, _tableNameWithType, segmentName, _peerDownloadScheme);
if (peerSegmentURIs.isEmpty()) {
String msg = String.format("segment %s doesn't have any peers", segmentName);
LOGGER.warn(msg);
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java
index 8e50049028e9..b120867d6bc3 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java
@@ -635,17 +635,15 @@ void downloadAndReplaceSegment(String segmentName, SegmentZKMetadata segmentZKMe
} catch (Exception e) {
_logger.warn("Download segment {} from deepstore uri {} failed.", segmentName, uri, e);
// Download from deep store failed; try to download from peer if peer download is setup for the table.
- if (isPeerSegmentDownloadEnabled(tableConfig)) {
- downloadSegmentFromPeer(segmentName, tableConfig.getValidationConfig().getPeerSegmentDownloadScheme(),
- indexLoadingConfig);
+ if (_peerDownloadScheme != null) {
+ downloadSegmentFromPeer(segmentName, indexLoadingConfig);
} else {
throw e;
}
}
} else {
- if (isPeerSegmentDownloadEnabled(tableConfig)) {
- downloadSegmentFromPeer(segmentName, tableConfig.getValidationConfig().getPeerSegmentDownloadScheme(),
- indexLoadingConfig);
+ if (_peerDownloadScheme != null) {
+ downloadSegmentFromPeer(segmentName, indexLoadingConfig);
} else {
throw new RuntimeException("Peer segment download not enabled for segment " + segmentName);
}
@@ -687,23 +685,16 @@ private void untarAndMoveSegment(String segmentName, IndexLoadingConfig indexLoa
replaceLLSegment(segmentName, indexLoadingConfig);
}
- private boolean isPeerSegmentDownloadEnabled(TableConfig tableConfig) {
- return
- CommonConstants.HTTP_PROTOCOL.equalsIgnoreCase(tableConfig.getValidationConfig().getPeerSegmentDownloadScheme())
- || CommonConstants.HTTPS_PROTOCOL.equalsIgnoreCase(
- tableConfig.getValidationConfig().getPeerSegmentDownloadScheme());
- }
-
- private void downloadSegmentFromPeer(String segmentName, String downloadScheme,
- IndexLoadingConfig indexLoadingConfig) {
+ private void downloadSegmentFromPeer(String segmentName, IndexLoadingConfig indexLoadingConfig) {
File tempRootDir = null;
try {
tempRootDir = getTmpSegmentDataDir("tmp-" + segmentName + "." + System.currentTimeMillis());
File segmentTarFile = new File(tempRootDir, segmentName + TarGzCompressionUtils.TAR_GZ_FILE_EXTENSION);
// Next download the segment from a randomly chosen server using configured download scheme (http or https).
- SegmentFetcherFactory.getSegmentFetcher(downloadScheme).fetchSegmentToLocal(segmentName, () -> {
+ SegmentFetcherFactory.getSegmentFetcher(_peerDownloadScheme).fetchSegmentToLocal(segmentName, () -> {
List peerServerURIs =
- PeerServerSegmentFinder.getPeerServerURIs(segmentName, downloadScheme, _helixManager);
+ PeerServerSegmentFinder.getPeerServerURIs(_helixManager, _tableNameWithType, segmentName,
+ _peerDownloadScheme);
Collections.shuffle(peerServerURIs);
return peerServerURIs;
}, segmentTarFile);
@@ -711,7 +702,8 @@ private void downloadSegmentFromPeer(String segmentName, String downloadScheme,
segmentTarFile.length());
untarAndMoveSegment(segmentName, indexLoadingConfig, segmentTarFile, tempRootDir);
} catch (Exception e) {
- _logger.warn("Download and move segment {} from peer with scheme {} failed.", segmentName, downloadScheme, e);
+ _logger.warn("Download and move segment {} from peer with scheme {} failed.", segmentName, _peerDownloadScheme,
+ e);
throw new RuntimeException(e);
} finally {
FileUtils.deleteQuietly(tempRootDir);
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/data/manager/BaseTableDataManagerTest.java b/pinot-core/src/test/java/org/apache/pinot/core/data/manager/BaseTableDataManagerTest.java
index d4c5f4fc298c..261fe0f23885 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/data/manager/BaseTableDataManagerTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/data/manager/BaseTableDataManagerTest.java
@@ -660,8 +660,8 @@ public void testDownloadFromPeersWithoutStreaming()
File destFile = new File(tempRootDir, "seg01" + TarGzCompressionUtils.TAR_GZ_FILE_EXTENSION);
try (MockedStatic mockPeerSegFinder = mockStatic(PeerServerSegmentFinder.class)) {
mockPeerSegFinder.when(
- () -> PeerServerSegmentFinder.getPeerServerURIs("seg01", "http", helixManager, TABLE_NAME_WITH_TYPE))
- .thenReturn(Collections.singletonList(uri));
+ () -> PeerServerSegmentFinder.getPeerServerURIs(helixManager, TABLE_NAME_WITH_TYPE, "seg01",
+ CommonConstants.HTTP_PROTOCOL)).thenReturn(List.of(uri));
tmgr.downloadFromPeersWithoutStreaming("seg01", mock(SegmentZKMetadata.class), destFile);
}
assertEquals(FileUtils.readFileToString(destFile), "this is from somewhere remote");
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/util/PeerServerSegmentFinderTest.java b/pinot-core/src/test/java/org/apache/pinot/core/util/PeerServerSegmentFinderTest.java
index 4b6c6fb910c3..2af972695ffa 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/util/PeerServerSegmentFinderTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/util/PeerServerSegmentFinderTest.java
@@ -19,103 +19,93 @@
package org.apache.pinot.core.util;
import java.net.URI;
-import java.util.ArrayList;
-import java.util.HashMap;
import java.util.List;
-import java.util.Map;
import org.apache.helix.HelixAdmin;
import org.apache.helix.HelixManager;
import org.apache.helix.model.ExternalView;
-import org.apache.helix.model.HelixConfigScope;
import org.apache.helix.model.InstanceConfig;
import org.apache.pinot.spi.utils.CommonConstants;
-import org.apache.pinot.spi.utils.StringUtil;
-import org.testng.Assert;
+import org.apache.pinot.spi.utils.CommonConstants.Helix.Instance;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
-import static org.mockito.ArgumentMatchers.any;
-import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
public class PeerServerSegmentFinderTest {
- private static final String TABLE_NAME_WITH_TYPE = "testTable_REALTIME";
- private static final String SEGMENT_1 = "testTable__0__0__t11";
- private static final String SEGMENT_2 = "testTable__0__1__t11";
- private static final String CLUSTER_NAME = "dummyCluster";
- private static final String INSTANCE_ID1 = "Server_localhost_1000";
- private static final String INSTANCE_ID2 = "Server_localhost_1001";
- private static final String INSTANCE_ID3 = "Server_localhost_1003";
- public static final String ADMIN_PORT = "1008";
- public static final String HOST_1_NAME = "s1";
- public static final String HOST_2_NAME = "s2";
- public static final String HOST_3_NAME = "s3";
+ private static final String CLUSTER_NAME = "testCluster";
+ private static final String REALTIME_TABLE_NAME = "testTable_REALTIME";
+ private static final String SEGMENT_1 = "testSegment1";
+ private static final String SEGMENT_2 = "testSegment2";
+ private static final String INSTANCE_ID_1 = "Server_s1_1007";
+ private static final String INSTANCE_ID_2 = "Server_s2_1007";
+ private static final String INSTANCE_ID_3 = "Server_s3_1007";
+ private static final String HOSTNAME_1 = "s1";
+ private static final String HOSTNAME_2 = "s2";
+ private static final String HOSTNAME_3 = "s3";
+ private static final int HELIX_PORT = 1007;
+ private static final int HTTP_ADMIN_PORT = 1008;
+ private static final int HTTPS_ADMIN_PORT = 1009;
+
private HelixManager _helixManager;
@BeforeClass
- public void initSegmentFetcherFactoryWithPeerServerSegmentFetcher()
- throws Exception {
- HelixAdmin helixAdmin;
- {
- ExternalView ev = new ExternalView(TABLE_NAME_WITH_TYPE);
- ev.setState(SEGMENT_1, INSTANCE_ID1, "ONLINE");
- ev.setState(SEGMENT_1, INSTANCE_ID2, "OFFLINE");
- ev.setState(SEGMENT_1, INSTANCE_ID3, "ONLINE");
- ev.setState(SEGMENT_2, INSTANCE_ID1, "OFFLINE");
- ev.setState(SEGMENT_2, INSTANCE_ID2, "OFFLINE");
- _helixManager = mock(HelixManager.class);
- helixAdmin = mock(HelixAdmin.class);
- when(_helixManager.getClusterManagmentTool()).thenReturn(helixAdmin);
- when(_helixManager.getClusterName()).thenReturn(CLUSTER_NAME);
- when(helixAdmin.getResourceExternalView(CLUSTER_NAME, TABLE_NAME_WITH_TYPE)).thenReturn(ev);
- when(helixAdmin.getConfigKeys(any(HelixConfigScope.class))).thenReturn(new ArrayList<>());
- Map instanceConfigMap = new HashMap<>();
- instanceConfigMap.put(CommonConstants.Helix.Instance.ADMIN_PORT_KEY, ADMIN_PORT);
- when(helixAdmin.getConfig(any(HelixConfigScope.class), any(List.class))).thenReturn(instanceConfigMap);
- InstanceConfig instanceConfig1 = new InstanceConfig(INSTANCE_ID1);
- instanceConfig1.setHostName(HOST_1_NAME);
- instanceConfig1.setPort("1000");
- when(helixAdmin.getInstanceConfig(any(String.class), eq(INSTANCE_ID1))).thenReturn(instanceConfig1);
+ public void initSegmentFetcherFactoryWithPeerServerSegmentFetcher() {
+ ExternalView externalView = new ExternalView(REALTIME_TABLE_NAME);
+ externalView.setState(SEGMENT_1, INSTANCE_ID_1, "ONLINE");
+ externalView.setState(SEGMENT_1, INSTANCE_ID_2, "OFFLINE");
+ externalView.setState(SEGMENT_1, INSTANCE_ID_3, "ONLINE");
+ externalView.setState(SEGMENT_2, INSTANCE_ID_1, "OFFLINE");
+ externalView.setState(SEGMENT_2, INSTANCE_ID_2, "OFFLINE");
- InstanceConfig instanceConfig2 = new InstanceConfig(INSTANCE_ID2);
- instanceConfig2.setHostName(HOST_2_NAME);
- instanceConfig2.setPort("1000");
- when(helixAdmin.getInstanceConfig(any(String.class), eq(INSTANCE_ID2))).thenReturn(instanceConfig2);
+ _helixManager = mock(HelixManager.class);
+ HelixAdmin helixAdmin = mock(HelixAdmin.class);
+ when(_helixManager.getClusterManagmentTool()).thenReturn(helixAdmin);
+ when(_helixManager.getClusterName()).thenReturn(CLUSTER_NAME);
+ when(helixAdmin.getResourceExternalView(CLUSTER_NAME, REALTIME_TABLE_NAME)).thenReturn(externalView);
+ when(helixAdmin.getInstanceConfig(CLUSTER_NAME, INSTANCE_ID_1)).thenReturn(
+ getInstanceConfig(INSTANCE_ID_1, HOSTNAME_1));
+ when(helixAdmin.getInstanceConfig(CLUSTER_NAME, INSTANCE_ID_2)).thenReturn(
+ getInstanceConfig(INSTANCE_ID_2, HOSTNAME_2));
+ when(helixAdmin.getInstanceConfig(CLUSTER_NAME, INSTANCE_ID_3)).thenReturn(
+ getInstanceConfig(INSTANCE_ID_3, HOSTNAME_3));
+ }
- InstanceConfig instanceConfig3 = new InstanceConfig(INSTANCE_ID3);
- instanceConfig3.setHostName(HOST_3_NAME);
- instanceConfig3.setPort("1000");
- when(helixAdmin.getInstanceConfig(any(String.class), eq(INSTANCE_ID3))).thenReturn(instanceConfig3);
- }
+ private static InstanceConfig getInstanceConfig(String instanceId, String hostName) {
+ InstanceConfig instanceConfig = new InstanceConfig(instanceId);
+ instanceConfig.setHostName(hostName);
+ instanceConfig.setPort(Integer.toString(HELIX_PORT));
+ instanceConfig.getRecord().setIntField(Instance.ADMIN_PORT_KEY, HTTP_ADMIN_PORT);
+ instanceConfig.getRecord().setIntField(Instance.ADMIN_HTTPS_PORT_KEY, HTTPS_ADMIN_PORT);
+ return instanceConfig;
}
@Test
public void testSegmentFoundSuccessfully()
throws Exception {
// SEGMENT_1 has only 2 online replicas.
- List httpServerURIs =
- PeerServerSegmentFinder.getPeerServerURIs(SEGMENT_1, CommonConstants.HTTP_PROTOCOL, _helixManager);
- assertEquals(2, httpServerURIs.size());
- httpServerURIs.contains(new URI(
- StringUtil.join("/", "http://" + HOST_1_NAME + ":" + ADMIN_PORT, "segments", TABLE_NAME_WITH_TYPE, SEGMENT_1)));
- httpServerURIs.contains(new URI(
- StringUtil.join("/", "http://" + HOST_3_NAME + ":" + ADMIN_PORT, "segments", TABLE_NAME_WITH_TYPE, SEGMENT_1)));
- List httpsServerURIs =
- PeerServerSegmentFinder.getPeerServerURIs(SEGMENT_1, CommonConstants.HTTPS_PROTOCOL, _helixManager);
- assertEquals(2, httpsServerURIs.size());
- httpServerURIs.contains(new URI(StringUtil
- .join("/", "https://" + HOST_1_NAME + ":" + ADMIN_PORT, "segments", TABLE_NAME_WITH_TYPE, SEGMENT_1)));
- httpServerURIs.contains(new URI(StringUtil
- .join("/", "https://" + HOST_3_NAME + ":" + ADMIN_PORT, "segments", TABLE_NAME_WITH_TYPE, SEGMENT_1)));
+ List httpServerURIs = PeerServerSegmentFinder.getPeerServerURIs(_helixManager, REALTIME_TABLE_NAME, SEGMENT_1,
+ CommonConstants.HTTP_PROTOCOL);
+ assertEquals(httpServerURIs.size(), 2);
+ assertTrue(httpServerURIs.contains(new URI(
+ String.format("http://%s:%d/segments/%s/%s", HOSTNAME_1, HTTP_ADMIN_PORT, REALTIME_TABLE_NAME, SEGMENT_1))));
+ assertTrue(httpServerURIs.contains(new URI(
+ String.format("http://%s:%d/segments/%s/%s", HOSTNAME_3, HTTP_ADMIN_PORT, REALTIME_TABLE_NAME, SEGMENT_1))));
+ List httpsServerURIs = PeerServerSegmentFinder.getPeerServerURIs(_helixManager, REALTIME_TABLE_NAME, SEGMENT_1,
+ CommonConstants.HTTPS_PROTOCOL);
+ assertEquals(httpsServerURIs.size(), 2);
+ assertTrue(httpsServerURIs.contains(new URI(
+ String.format("https://%s:%d/segments/%s/%s", HOSTNAME_1, HTTPS_ADMIN_PORT, REALTIME_TABLE_NAME, SEGMENT_1))));
+ assertTrue(httpsServerURIs.contains(new URI(
+ String.format("https://%s:%d/segments/%s/%s", HOSTNAME_3, HTTPS_ADMIN_PORT, REALTIME_TABLE_NAME, SEGMENT_1))));
}
@Test
- public void testSegmentNotFound()
- throws Exception {
- Assert.assertEquals(0,
- PeerServerSegmentFinder.getPeerServerURIs(SEGMENT_2, CommonConstants.HTTP_PROTOCOL, _helixManager).size());
+ public void testSegmentNotFound() {
+ assertTrue(PeerServerSegmentFinder.getPeerServerURIs(_helixManager, REALTIME_TABLE_NAME, SEGMENT_2,
+ CommonConstants.HTTP_PROTOCOL).isEmpty());
}
}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/retry/ExponentialBackoffRetryPolicy.java b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/retry/ExponentialBackoffRetryPolicy.java
index 6151aab06f96..e5b9b7dc1a52 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/retry/ExponentialBackoffRetryPolicy.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/retry/ExponentialBackoffRetryPolicy.java
@@ -39,8 +39,8 @@ public ExponentialBackoffRetryPolicy(int maxNumAttempts, long initialDelayMs, do
@Override
protected long getDelayMs(int currentAttempt) {
- double minDelayMs = _initialDelayMs * Math.pow(_delayScaleFactor, currentAttempt);
- double maxDelayMs = minDelayMs * _delayScaleFactor;
- return _random.nextLong((long) minDelayMs, (long) maxDelayMs);
+ long minDelayMs = (long) (_initialDelayMs * Math.pow(_delayScaleFactor, currentAttempt));
+ long maxDelayMs = (long) (minDelayMs * _delayScaleFactor);
+ return minDelayMs < maxDelayMs ? _random.nextLong(minDelayMs, maxDelayMs) : minDelayMs;
}
}
From 3c45469c3962f726b3c12cc8c9bbd2bd15838798 Mon Sep 17 00:00:00 2001
From: Xiang Fu
Date: Tue, 16 Apr 2024 12:47:19 +0800
Subject: [PATCH 020/102] Update superset docker build script (#12385)
---
.../.superset_docker_image_build_and_push.sh | 2 +-
..._multi_arch_docker_image_build_and_push.sh | 59 +++++++++++++++++++
2 files changed, 60 insertions(+), 1 deletion(-)
create mode 100755 .github/workflows/scripts/docker/.superset_multi_arch_docker_image_build_and_push.sh
diff --git a/.github/workflows/scripts/docker/.superset_docker_image_build_and_push.sh b/.github/workflows/scripts/docker/.superset_docker_image_build_and_push.sh
index 700b8e01a1be..a82997e2d842 100755
--- a/.github/workflows/scripts/docker/.superset_docker_image_build_and_push.sh
+++ b/.github/workflows/scripts/docker/.superset_docker_image_build_and_push.sh
@@ -49,7 +49,7 @@ done
cd ${DOCKER_FILE_BASE_DIR}
-docker buildx build \
+docker build \
--no-cache \
--platform=${BUILD_PLATFORM} \
--file Dockerfile \
diff --git a/.github/workflows/scripts/docker/.superset_multi_arch_docker_image_build_and_push.sh b/.github/workflows/scripts/docker/.superset_multi_arch_docker_image_build_and_push.sh
new file mode 100755
index 000000000000..a82997e2d842
--- /dev/null
+++ b/.github/workflows/scripts/docker/.superset_multi_arch_docker_image_build_and_push.sh
@@ -0,0 +1,59 @@
+#!/bin/bash -x
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+if [ -z "${DOCKER_IMAGE_NAME}" ]; then
+ DOCKER_IMAGE_NAME="apachepinot/pinot-superset"
+fi
+if [ -z "${SUPERSET_IMAGE_TAG}" ]; then
+ SUPERSET_IMAGE_TAG="latest"
+fi
+if [ -z "${BUILD_PLATFORM}" ]; then
+ BUILD_PLATFORM="linux/amd64"
+fi
+
+DATE=`date +%Y%m%d`
+docker pull apache/superset:${SUPERSET_IMAGE_TAG}
+COMMIT_ID=`docker images apache/superset:${SUPERSET_IMAGE_TAG} --format "{{.ID}}"`
+
+tags=()
+if [ -z "${TAGS}" ]; then
+ tags=("${COMMIT_ID}-${DATE}")
+ tags+=("latest")
+else
+ declare -a tags=($(echo ${TAGS} | tr "," " "))
+fi
+
+DOCKER_BUILD_TAGS=""
+for tag in "${tags[@]}"
+do
+ echo "Plan to build and push docker images for: ${DOCKER_IMAGE_NAME}:${tag}"
+ DOCKER_BUILD_TAGS+=" --tag ${DOCKER_IMAGE_NAME}:${tag} "
+done
+
+cd ${DOCKER_FILE_BASE_DIR}
+
+docker build \
+ --no-cache \
+ --platform=${BUILD_PLATFORM} \
+ --file Dockerfile \
+ --build-arg SUPERSET_IMAGE_TAG=${SUPERSET_IMAGE_TAG} \
+ ${DOCKER_BUILD_TAGS} \
+ --push \
+ .
From 1393750622462b8a80dfc0550de3d3723f2bba26 Mon Sep 17 00:00:00 2001
From: Chaitanya Deepthi <45308220+deepthi912@users.noreply.github.com>
Date: Tue, 16 Apr 2024 02:39:15 -0400
Subject: [PATCH 021/102] Allow Server throttling just before executing queries
on server to allow max CPU and disk utilization (#12930)
Co-authored-by: deepthi912
---
.../pinot/server/starter/helix/BaseServerStarter.java | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/BaseServerStarter.java b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/BaseServerStarter.java
index f16b32659165..02c7b81ea5eb 100644
--- a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/BaseServerStarter.java
+++ b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/BaseServerStarter.java
@@ -578,9 +578,6 @@ public void start()
_serverInstance = new ServerInstance(serverConf, _helixManager, accessControlFactory);
ServerMetrics serverMetrics = _serverInstance.getServerMetrics();
- // Enable Server level realtime ingestion rate limier
- RealtimeConsumptionRateManager.getInstance().createServerRateLimiter(_serverConf, serverMetrics);
-
InstanceDataManager instanceDataManager = _serverInstance.getInstanceDataManager();
instanceDataManager.setSupplierOfIsServerReadyToServeQueries(() -> _isServerReadyToServeQueries);
// initialize the thread accountant for query killing
@@ -639,6 +636,9 @@ public void start()
preServeQueries();
+ // Enable Server level realtime ingestion rate limier
+ RealtimeConsumptionRateManager.getInstance().createServerRateLimiter(_serverConf, serverMetrics);
+
// Start the query server after finishing the service status check. If the query server is started before all the
// segments are loaded, broker might not have finished processing the callback of routing table update, and start
// querying the server pre-maturely.
From 9b4ec33d685ef7d357b57095505b76df21e65293 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 16 Apr 2024 10:06:16 -0700
Subject: [PATCH 022/102] Bump org.scala-lang.modules:scala-xml_2.12 from 1.3.0
to 2.3.0 (#12939)
---
pinot-connectors/pinot-spark-2-connector/pom.xml | 2 +-
pinot-connectors/pinot-spark-common/pom.xml | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/pinot-connectors/pinot-spark-2-connector/pom.xml b/pinot-connectors/pinot-spark-2-connector/pom.xml
index c2a0ea3f4f5d..58e307221b71 100644
--- a/pinot-connectors/pinot-spark-2-connector/pom.xml
+++ b/pinot-connectors/pinot-spark-2-connector/pom.xml
@@ -35,7 +35,7 @@
${basedir}/../..
2.4.8
2.8
- 1.3.0
+ 2.3.0
3.2.18
org.apache.pinot.\$internal
diff --git a/pinot-connectors/pinot-spark-common/pom.xml b/pinot-connectors/pinot-spark-common/pom.xml
index 5c8f812cf31b..ec708e5d5c5e 100644
--- a/pinot-connectors/pinot-spark-common/pom.xml
+++ b/pinot-connectors/pinot-spark-common/pom.xml
@@ -35,7 +35,7 @@
${basedir}/../..
0.14.6
2.8
- 1.3.0
+ 2.3.0
3.2.18
From b264512c0ca335ae83afe15d004c48010264cfda Mon Sep 17 00:00:00 2001
From: Xiang Fu
Date: Wed, 17 Apr 2024 01:06:39 +0800
Subject: [PATCH 023/102] Bump org.codehaus.mojo:buildnumber-maven-plugin from
1.3 to 3.2.0 (#12937)
---
pom.xml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pom.xml b/pom.xml
index 0b27acf9478f..bcf5614a39f0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2282,7 +2282,7 @@
org.codehaus.mojo
buildnumber-maven-plugin
- 1.3
+ 3.2.0
validate
From d4cb93dbda1e268e0c064b1b3c62e8b81599db32 Mon Sep 17 00:00:00 2001
From: Shounak kulkarni
Date: Tue, 16 Apr 2024 23:27:40 +0500
Subject: [PATCH 024/102] Fix metric rule pattern regex (#12856)
---
.../configs/broker.yml | 68 +++---
.../configs/controller.yml | 80 +++----
.../configs/minion.yml | 10 +-
.../configs/pinot.yml | 220 +++++++++---------
.../configs/server.yml | 60 ++---
5 files changed, 219 insertions(+), 219 deletions(-)
diff --git a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/broker.yml b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/broker.yml
index ce5e5df7b9b0..5c7a6cc8a900 100644
--- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/broker.yml
+++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/broker.yml
@@ -1,173 +1,173 @@
rules:
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_authorization_$4"
cache: true
labels:
database: "$2"
table: "$1$3"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_documentsScanned_$4"
cache: true
labels:
database: "$2"
table: "$1$3"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_entriesScannedInFilter_$4"
cache: true
labels:
database: "$2"
table: "$1$3"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_entriesScannedPostFilter_$4"
cache: true
labels:
database: "$2"
table: "$1$3"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_freshnessLagMs_$4"
cache: true
labels:
database: "$2"
table: "$1$3"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_queries_$4"
cache: true
labels:
database: "$2"
table: "$1$3"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_queryExecution_$4"
cache: true
labels:
database: "$2"
table: "$1$3"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_queryRouting_$4"
cache: true
labels:
database: "$2"
table: "$1$3"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_reduce_$4"
cache: true
labels:
database: "$2"
table: "$1$3"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_requestCompilation_$4"
cache: true
labels:
database: "$2"
table: "$1$3"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_scatterGather_$4"
cache: true
labels:
database: "$2"
table: "$1$3"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_totalServerResponseSize_$4"
cache: true
labels:
database: "$2"
table: "$1$3"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_groupBySize_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
tableType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_noServingHostForSegment_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
tableType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_healthcheck_$1_$2"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_helix_$1_$2"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_helix_zookeeper_$1_$2"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_nettyConnection_$1_$2"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_unhealthyServers_$1"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_clusterChangeCheck_$1"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_proactiveClusterChangeCheck_$1"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_exceptions_$1_$2"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_routingTableUpdateTime_$1"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_brokerResponsesWithPartialServersResponded_$4"
cache: true
labels:
database: "$2"
table: "$1$3"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_brokerResponsesWithTimeouts_$4"
cache: true
labels:
database: "$2"
table: "$1$3"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_noServerFoundExceptions_$4"
cache: true
labels:
database: "$2"
table: "$1$3"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_brokerResponsesWithProcessingExceptions_$4"
cache: true
labels:
database: "$2"
table: "$1$3"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_brokerResponsesWithNumGroupsLimitReached_$4"
cache: true
labels:
database: "$2"
table: "$1$3"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_queryQuotaExceeded_$4"
cache: true
labels:
database: "$2"
table: "$1$3"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_queryTotalTimeMs_$4"
cache: true
labels:
database: "$2"
table: "$1$3"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_serverMissingForRouting_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
tableType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_deserialization_$4"
cache: true
labels:
database: "$2"
table: "$1$3"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_broker_requestConnectionWait_$4"
cache: true
labels:
@@ -207,7 +207,7 @@ rules:
table: "$3$5"
tableType: "$6"
#This is a catch-all pattern for pinot table metrics with offline/realtime suffix that also contain kafka topic
-- pattern: "\"?org\\.apache\\.pinot\\.common\\.metrics\"?<>(\\w+)"
+- pattern: "\"?org\\.apache\\.pinot\\.common\\.metrics\"?<>(\\w+)"
name: "pinot_$1_$2_$9"
cache: true
labels:
diff --git a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml
index 44b07d1718ef..a441f714e4a6 100644
--- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml
+++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml
@@ -1,138 +1,138 @@
rules:
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_$1_$2"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_helix_$1_$2"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_helix_ZookeeperReconnects_$1"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_idealstateZnodeSize_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
tableType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_idealstateZnodeByteSize_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
tableType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_replicationFromConfig_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
tableType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_numberOfReplicas_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
tableType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_percentOfReplicas_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
tableType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_percentSegmentsAvailable_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
tableType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_segmentCount_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
tableType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_segmentsInErrorState_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
tableType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_numberSegmentUploadTimeoutExceeded_$1"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_numberTimesScheduleTasksCalled_$1"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_periodicTaskNumTablesProcessed_$1_$2"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_pinotControllerLeader_$1"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_partitionLeader_$1_$2"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_realtimeTableCount_$1"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_offlineTableCount_$1"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_validateion_$4_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_cronSchedulerJobScheduled_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
taskType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_cronSchedulerJobTriggered_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
taskType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_cronSchedulerJobSkipped_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
taskType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_cronSchedulerJobExecutionTimeMs_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
taskType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_tableRebalanceExecutionTimeMs_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
result: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_taskStatus_$3"
cache: true
labels:
taskType: "$1"
status: "$2"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_timeMsSinceLastMinionTaskMetadataUpdate_$6"
cache: true
labels:
@@ -140,7 +140,7 @@ rules:
table: "$1$3"
tableType: "$4"
taskType: "$5"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_$1_$7"
cache: true
labels:
@@ -148,12 +148,12 @@ rules:
table: "$2$4"
tableType: "$5"
taskType: "$6"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_$1_$3"
cache: true
labels:
taskType: "$2"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_timeMsSinceLastSuccessfulMinionTaskGeneration_$6"
cache: true
labels:
@@ -161,7 +161,7 @@ rules:
table: "$1$3"
tableType: "$4"
taskType: "$5"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_lastMinionTaskGenerationEncountersError_$6"
cache: true
labels:
@@ -169,23 +169,23 @@ rules:
table: "$1$3"
tableType: "$4"
taskType: "$5"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_pinotLeadControllerResourceEnabled_$1"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_offlineTableEstimatedSize_$4"
cache: true
labels:
database: "$2"
table: "$1$3"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_tableQuota_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
tableType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_periodicTaskError_$6"
cache: true
labels:
@@ -193,33 +193,33 @@ rules:
table: "$1$3"
tableType: "$4"
periodicTask: "$5"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_tableStorageQuotaUtilization_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
tableType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_tableStorageEstMissingSegmentPercent_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
tableType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_tableTotalSizeOnServer_$5"
labels:
database: "$2"
table: "$1$3"
tableType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_tableSizePerReplicaOnServer_$5"
labels:
database: "$2"
table: "$1$3"
tableType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_tableCompressedSize_$5"
labels:
database: "$2"
@@ -251,7 +251,7 @@ rules:
table: "$2$4"
tableType: "$5"
#This is a catch-all pattern for pinot table metrics with offline/realtime suffix that also contain kafka topic
-- pattern: "\"?org\\.apache\\.pinot\\.common\\.metrics\"?<>(\\w+)"
+- pattern: "\"?org\\.apache\\.pinot\\.common\\.metrics\"?<>(\\w+)"
name: "pinot_$1_$2_$9"
cache: true
labels:
diff --git a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/minion.yml b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/minion.yml
index 3541cc8f3669..4d21107a1b9f 100644
--- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/minion.yml
+++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/minion.yml
@@ -1,17 +1,17 @@
rules:
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_minion_version"
cache: true
labels:
version: "$1"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_minion_numberOfTasks_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
tableType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_minion_$6_$7"
cache: true
labels:
@@ -19,10 +19,10 @@ rules:
table: "$1$3"
tableType: "$4"
taskType: "$5"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_minion_$1_$2"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_minion_$2_$3"
cache: true
labels:
diff --git a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/pinot.yml b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/pinot.yml
index 2739fb557d27..b530a79b363b 100644
--- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/pinot.yml
+++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/pinot.yml
@@ -5,140 +5,140 @@
rules:
# Pinot Controller
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_$1_$2"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_helix_$1_$2"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_helix_ZookeeperReconnects_$1"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_idealstateZnodeSize_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
tableType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_idealstateZnodeByteSize_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
tableType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_replicationFromConfig_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
tableType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_numberOfReplicas_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
tableType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_percentOfReplicas_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
tableType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_percentSegmentsAvailable_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
tableType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_segmentCount_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
tableType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_segmentsInErrorState_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
tableType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_numberSegmentUploadTimeoutExceeded_$1"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_numberTimesScheduleTasksCalled_$1"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_periodicTaskNumTablesProcessed_$1_$2"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_pinotControllerLeader_$1"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_partitionLeader_$1_$2"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_realtimeTableCount_$1"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_offlineTableCount_$1"
cache: true
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_validateion_$4_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_cronSchedulerJobScheduled_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
taskType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_cronSchedulerJobTriggered_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
taskType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_cronSchedulerJobSkipped_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
taskType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_cronSchedulerJobExecutionTimeMs_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
taskType: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_tableRebalanceExecutionTimeMs_$5"
cache: true
labels:
database: "$2"
table: "$1$3"
result: "$4"
-- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)"
+- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)"
name: "pinot_controller_taskStatus_$3"
cache: true
labels:
taskType: "$1"
status: "$2"
-- pattern: "\"org.apache.pinot.common.metrics\"