From dbbf44ce153bbe72fb3161f1977d0af1b403e06e Mon Sep 17 00:00:00 2001 From: deemoliu Date: Thu, 18 Apr 2024 15:54:59 -0700 Subject: [PATCH 01/58] Add splitPartWithLimit and splitPartFromEnd UDFs (#12437) --- .../function/scalar/StringFunctions.java | 26 ++++++++-- .../function/scalar/StringFunctionsTest.java | 48 +++++++++++++++++++ 2 files changed, 71 insertions(+), 3 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java index 8ce77e8ccb6d..374917ec9939 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java @@ -49,7 +49,6 @@ private StringFunctions() { private final static Pattern LTRIM = Pattern.compile("^\\s+"); private final static Pattern RTRIM = Pattern.compile("\\s+$"); - /** * @see StringUtils#reverse(String) * @param input @@ -585,14 +584,35 @@ public static String[] split(String input, String delimiter, int limit) { * TODO: Revisit if index should be one-based (both Presto and Postgres use one-based index, which starts with 1) * @param input * @param delimiter - * @param index + * @param index we allow negative value for index which indicates the index from the end. * @return splits string on specified delimiter and returns String at specified index from the split. */ @ScalarFunction(names = {"splitPart", "split_part"}) public static String splitPart(String input, String delimiter, int index) { String[] splitString = StringUtils.splitByWholeSeparator(input, delimiter); - if (index < splitString.length) { + if (index >= 0 && index < splitString.length) { + return splitString[index]; + } else if (index < 0 && index >= -splitString.length) { + return splitString[splitString.length + index]; + } else { + return "null"; + } + } + + /** + * @param input the input String to be split into parts. + * @param delimiter the specified delimiter to split the input string. + * @param limit the max count of parts that the input string can be splitted into. + * @param index the specified index for the splitted parts to be returned. + * @return splits string on the delimiter with the limit count and returns String at specified index from the split. + */ + @ScalarFunction + public static String splitPart(String input, String delimiter, int limit, int index) { + String[] splitString = StringUtils.splitByWholeSeparator(input, delimiter, limit); + if (index >= 0 && index < splitString.length) { return splitString[index]; + } else if (index < 0 && index >= -splitString.length) { + return splitString[splitString.length + index]; } else { return "null"; } diff --git a/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java b/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java index 9129ccdc3769..d75b8ada435d 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java @@ -26,6 +26,47 @@ public class StringFunctionsTest { + @DataProvider(name = "splitPartTestCases") + public static Object[][] splitPartTestCases() { + return new Object[][]{ + {"org.apache.pinot.common.function", ".", 0, 100, "org", "org"}, + {"org.apache.pinot.common.function", ".", 10, 100, "null", "null"}, + {"org.apache.pinot.common.function", ".", 1, 0, "apache", "apache"}, + {"org.apache.pinot.common.function", ".", 1, 1, "apache", "null"}, + {"org.apache.pinot.common.function", ".", 0, 1, "org", "org.apache.pinot.common.function"}, + {"org.apache.pinot.common.function", ".", 1, 2, "apache", "apache.pinot.common.function"}, + {"org.apache.pinot.common.function", ".", 2, 3, "pinot", "pinot.common.function"}, + {"org.apache.pinot.common.function", ".", 3, 4, "common", "common.function"}, + {"org.apache.pinot.common.function", ".", 4, 5, "function", "function"}, + {"org.apache.pinot.common.function", ".", 5, 6, "null", "null"}, + {"org.apache.pinot.common.function", ".", 3, 3, "common", "null"}, + {"+++++", "+", 0, 100, "", ""}, + {"+++++", "+", 1, 100, "null", "null"}, + // note that splitPart will split with limit first, then lookup by index from START or END. + {"org.apache.pinot.common.function", ".", -1, 100, "function", "function"}, + {"org.apache.pinot.common.function", ".", -10, 100, "null", "null"}, + {"org.apache.pinot.common.function", ".", -2, 0, "common", "common"}, // Case: limit=0 is not taking effect. + {"org.apache.pinot.common.function", ".", -1, 1, "function", "org.apache.pinot.common.function"}, + {"org.apache.pinot.common.function", ".", -2, 1, "common", "null"}, + {"org.apache.pinot.common.function", ".", -1, 2, "function", "apache.pinot.common.function"}, + {"org.apache.pinot.common.function", ".", -2, 2, "common", "org"}, + {"org.apache.pinot.common.function", ".", -1, 3, "function", "pinot.common.function"}, + {"org.apache.pinot.common.function", ".", -3, 3, "pinot", "org"}, + {"org.apache.pinot.common.function", ".", -4, 3, "apache", "null"}, + {"org.apache.pinot.common.function", ".", -1, 4, "function", "common.function"}, + {"org.apache.pinot.common.function", ".", -3, 4, "pinot", "apache"}, + {"org.apache.pinot.common.function", ".", -4, 4, "apache", "org"}, + {"org.apache.pinot.common.function", ".", -1, 5, "function", "function"}, + {"org.apache.pinot.common.function", ".", -5, 5, "org", "org"}, + {"org.apache.pinot.common.function", ".", -6, 5, "null", "null"}, + {"org.apache.pinot.common.function", ".", -1, 6, "function", "function"}, + {"org.apache.pinot.common.function", ".", -5, 6, "org", "org"}, + {"org.apache.pinot.common.function", ".", -6, 6, "null", "null"}, + {"+++++", "+", -1, 100, "", ""}, + {"+++++", "+", -2, 100, "null", "null"}, + }; + } + @DataProvider(name = "isJson") public static Object[][] isJsonTestCases() { return new Object[][]{ @@ -40,4 +81,11 @@ public static Object[][] isJsonTestCases() { public void testIsJson(String input, boolean expectedValue) { assertEquals(StringFunctions.isJson(input), expectedValue); } + + @Test(dataProvider = "splitPartTestCases") + public void testSplitPart(String input, String delimiter, int index, int limit, String expectedToken, + String expectedTokenWithLimitCounts) { + assertEquals(StringFunctions.splitPart(input, delimiter, index), expectedToken); + assertEquals(StringFunctions.splitPart(input, delimiter, limit, index), expectedTokenWithLimitCounts); + } } From ea60408debb2dbc3b82c93dee6df8d30db5bf65d Mon Sep 17 00:00:00 2001 From: Abhishek Sharma Date: Fri, 19 Apr 2024 00:28:11 -0400 Subject: [PATCH 02/58] hash4j version upgrade to 0.17.0 (#12968) --- pom.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 67c437975f57..24006bcea490 100644 --- a/pom.xml +++ b/pom.xml @@ -155,6 +155,7 @@ 1.36.0 9.8.0 0.10.2 + 0.17.0 4.2.25 1.1.10.5 @@ -1242,7 +1243,7 @@ com.dynatrace.hash4j hash4j - 0.13.0 + ${dynatrace.hash4j.version} com.tdunning From 5b90c6564571721f480a0f4ed63937ce3a79f255 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 18 Apr 2024 21:31:02 -0700 Subject: [PATCH 03/58] Bump moment in /pinot-controller/src/main/resources (#9030) --- .../src/main/resources/package-lock.json | 14 +++++++------- pinot-controller/src/main/resources/package.json | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pinot-controller/src/main/resources/package-lock.json b/pinot-controller/src/main/resources/package-lock.json index d41e32c9eaa7..be7e77f7bab9 100644 --- a/pinot-controller/src/main/resources/package-lock.json +++ b/pinot-controller/src/main/resources/package-lock.json @@ -30,7 +30,7 @@ "jsonlint": "1.6.3", "jwt-decode": "^3.1.2", "lodash": "4.17.21", - "moment": "2.29.3", + "moment": "2.29.4", "prop-types": "15.8.1", "re-resizable": "6.9.9", "react": "16.13.1", @@ -7786,9 +7786,9 @@ } }, "node_modules/moment": { - "version": "2.29.3", - "resolved": "https://registry.npmjs.org/moment/-/moment-2.29.3.tgz", - "integrity": "sha512-c6YRvhEo//6T2Jz/vVtYzqBzwvPT95JBQ+smCytzf7c50oMZRsR/a4w88aD34I+/QVSfnoAnSBFPJHItlOMJVw==", + "version": "2.29.4", + "resolved": "https://registry.npmjs.org/moment/-/moment-2.29.4.tgz", + "integrity": "sha512-5LC9SOxjSc2HF6vO2CyuTDNivEdoz2IvyJJGj6X8DJ0eFyfszE0QiEd+iXmBvUP3WHxSjFH/vIsA0EN00cgr8w==", "engines": { "node": "*" } @@ -20324,9 +20324,9 @@ } }, "moment": { - "version": "2.29.3", - "resolved": "https://registry.npmjs.org/moment/-/moment-2.29.3.tgz", - "integrity": "sha512-c6YRvhEo//6T2Jz/vVtYzqBzwvPT95JBQ+smCytzf7c50oMZRsR/a4w88aD34I+/QVSfnoAnSBFPJHItlOMJVw==" + "version": "2.29.4", + "resolved": "https://registry.npmjs.org/moment/-/moment-2.29.4.tgz", + "integrity": "sha512-5LC9SOxjSc2HF6vO2CyuTDNivEdoz2IvyJJGj6X8DJ0eFyfszE0QiEd+iXmBvUP3WHxSjFH/vIsA0EN00cgr8w==" }, "move-concurrently": { "version": "1.0.1", diff --git a/pinot-controller/src/main/resources/package.json b/pinot-controller/src/main/resources/package.json index 08638569491e..865af39bd42f 100644 --- a/pinot-controller/src/main/resources/package.json +++ b/pinot-controller/src/main/resources/package.json @@ -81,7 +81,7 @@ "jsonlint": "1.6.3", "jwt-decode": "^3.1.2", "lodash": "4.17.21", - "moment": "2.29.3", + "moment": "2.29.4", "prop-types": "15.8.1", "re-resizable": "6.9.9", "react": "16.13.1", From 5e8428c497e1003f84c8fe2acb899b3471acfd70 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 18 Apr 2024 21:31:30 -0700 Subject: [PATCH 04/58] Bump terser from 4.8.0 to 4.8.1 in /pinot-controller/src/main/resources (#9085) --- .../src/main/resources/package-lock.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pinot-controller/src/main/resources/package-lock.json b/pinot-controller/src/main/resources/package-lock.json index be7e77f7bab9..551f2274b2a6 100644 --- a/pinot-controller/src/main/resources/package-lock.json +++ b/pinot-controller/src/main/resources/package-lock.json @@ -11793,9 +11793,9 @@ } }, "node_modules/terser": { - "version": "4.8.0", - "resolved": "https://registry.npmjs.org/terser/-/terser-4.8.0.tgz", - "integrity": "sha512-EAPipTNeWsb/3wLPeup1tVPaXfIaU68xMnVdPafIL1TV05OhASArYyIfFvnvJCNrR2NIOvDVNNTFRa+Re2MWyw==", + "version": "4.8.1", + "resolved": "https://registry.npmjs.org/terser/-/terser-4.8.1.tgz", + "integrity": "sha512-4GnLC0x667eJG0ewJTa6z/yXrbLGv80D9Ru6HIpCQmO+Q4PfEtBFi0ObSckqwL6VyQv/7ENJieXHo2ANmdQwgw==", "dev": true, "dependencies": { "commander": "^2.20.0", @@ -23531,9 +23531,9 @@ "dev": true }, "terser": { - "version": "4.8.0", - "resolved": "https://registry.npmjs.org/terser/-/terser-4.8.0.tgz", - "integrity": "sha512-EAPipTNeWsb/3wLPeup1tVPaXfIaU68xMnVdPafIL1TV05OhASArYyIfFvnvJCNrR2NIOvDVNNTFRa+Re2MWyw==", + "version": "4.8.1", + "resolved": "https://registry.npmjs.org/terser/-/terser-4.8.1.tgz", + "integrity": "sha512-4GnLC0x667eJG0ewJTa6z/yXrbLGv80D9Ru6HIpCQmO+Q4PfEtBFi0ObSckqwL6VyQv/7ENJieXHo2ANmdQwgw==", "dev": true, "requires": { "commander": "^2.20.0", From 31d2ee8de19458020d24951b7046fdc1a225a5af Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 18 Apr 2024 21:31:59 -0700 Subject: [PATCH 05/58] Bump json5 from 1.0.1 to 1.0.2 in /pinot-controller/src/main/resources (#10067) --- .../src/main/resources/package-lock.json | 60 +++++++++---------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/pinot-controller/src/main/resources/package-lock.json b/pinot-controller/src/main/resources/package-lock.json index 551f2274b2a6..f56f0aaeff8a 100644 --- a/pinot-controller/src/main/resources/package-lock.json +++ b/pinot-controller/src/main/resources/package-lock.json @@ -3902,9 +3902,9 @@ } }, "node_modules/eslint-loader/node_modules/json5": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.1.tgz", - "integrity": "sha512-1hqLFMSrGHRHxav9q9gNjJ5EXznIxGVO09xQRrwplcS8qs28pZ8s8hupZAmqDwZUmVZ2Qb2jnyPOWcDH8m8dlA==", + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", + "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", "dev": true, "bin": { "json5": "lib/cli.js" @@ -4925,9 +4925,9 @@ } }, "node_modules/file-loader/node_modules/json5": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.1.tgz", - "integrity": "sha512-1hqLFMSrGHRHxav9q9gNjJ5EXznIxGVO09xQRrwplcS8qs28pZ8s8hupZAmqDwZUmVZ2Qb2jnyPOWcDH8m8dlA==", + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", + "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", "dev": true, "bin": { "json5": "lib/cli.js" @@ -7034,9 +7034,9 @@ "dev": true }, "node_modules/json5": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/json5/-/json5-1.0.1.tgz", - "integrity": "sha512-aKS4WQjPenRxiQsC93MNfjx+nbF4PAdYzmd/1JIj8HYzqfbu86beTuNgXDzPknWk0n0uARlyewZo4s++ES36Ow==", + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/json5/-/json5-1.0.2.tgz", + "integrity": "sha512-g1MWMLBiz8FKi1e4w0UyVL3w+iJceWAFBAaBnnGKOpNa5f8TLktkbre1+s6oICydWAm+HRUGTmI+//xv2hvXYA==", "dev": true, "dependencies": { "minimist": "^1.2.0" @@ -11644,9 +11644,9 @@ } }, "node_modules/style-loader/node_modules/json5": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.1.tgz", - "integrity": "sha512-1hqLFMSrGHRHxav9q9gNjJ5EXznIxGVO09xQRrwplcS8qs28pZ8s8hupZAmqDwZUmVZ2Qb2jnyPOWcDH8m8dlA==", + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", + "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", "dev": true, "bin": { "json5": "lib/cli.js" @@ -12373,9 +12373,9 @@ } }, "node_modules/url-loader/node_modules/json5": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.1.tgz", - "integrity": "sha512-1hqLFMSrGHRHxav9q9gNjJ5EXznIxGVO09xQRrwplcS8qs28pZ8s8hupZAmqDwZUmVZ2Qb2jnyPOWcDH8m8dlA==", + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", + "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", "dev": true, "bin": { "json5": "lib/cli.js" @@ -17342,9 +17342,9 @@ } }, "json5": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.1.tgz", - "integrity": "sha512-1hqLFMSrGHRHxav9q9gNjJ5EXznIxGVO09xQRrwplcS8qs28pZ8s8hupZAmqDwZUmVZ2Qb2jnyPOWcDH8m8dlA==", + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", + "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", "dev": true }, "loader-utils": { @@ -18070,9 +18070,9 @@ }, "dependencies": { "json5": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.1.tgz", - "integrity": "sha512-1hqLFMSrGHRHxav9q9gNjJ5EXznIxGVO09xQRrwplcS8qs28pZ8s8hupZAmqDwZUmVZ2Qb2jnyPOWcDH8m8dlA==", + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", + "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", "dev": true }, "loader-utils": { @@ -19712,9 +19712,9 @@ "dev": true }, "json5": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/json5/-/json5-1.0.1.tgz", - "integrity": "sha512-aKS4WQjPenRxiQsC93MNfjx+nbF4PAdYzmd/1JIj8HYzqfbu86beTuNgXDzPknWk0n0uARlyewZo4s++ES36Ow==", + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/json5/-/json5-1.0.2.tgz", + "integrity": "sha512-g1MWMLBiz8FKi1e4w0UyVL3w+iJceWAFBAaBnnGKOpNa5f8TLktkbre1+s6oICydWAm+HRUGTmI+//xv2hvXYA==", "dev": true, "requires": { "minimist": "^1.2.0" @@ -23424,9 +23424,9 @@ }, "dependencies": { "json5": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.1.tgz", - "integrity": "sha512-1hqLFMSrGHRHxav9q9gNjJ5EXznIxGVO09xQRrwplcS8qs28pZ8s8hupZAmqDwZUmVZ2Qb2jnyPOWcDH8m8dlA==", + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", + "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", "dev": true }, "loader-utils": { @@ -24003,9 +24003,9 @@ }, "dependencies": { "json5": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.1.tgz", - "integrity": "sha512-1hqLFMSrGHRHxav9q9gNjJ5EXznIxGVO09xQRrwplcS8qs28pZ8s8hupZAmqDwZUmVZ2Qb2jnyPOWcDH8m8dlA==", + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", + "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", "dev": true }, "loader-utils": { From da6823600b5bf13a2b9fdbfb60f5e29525496bbc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 18 Apr 2024 21:34:34 -0700 Subject: [PATCH 06/58] Bump net.openhft:posix from 2.23.2 to 2.25ea0 (#12828) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 24006bcea490..5c5c654befc4 100644 --- a/pom.xml +++ b/pom.xml @@ -1445,7 +1445,7 @@ net.openhft posix - 2.23.2 + 2.25ea0 net.openhft From 76eebc24fc53b2f78491b9ed2e63f85d98f3990e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 19 Apr 2024 10:10:58 -0700 Subject: [PATCH 07/58] Bump net.openhft:chronicle-core from 2.25ea13 to 2.25ea14 (#12971) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 5c5c654befc4..1aa4d37006de 100644 --- a/pom.xml +++ b/pom.xml @@ -1450,7 +1450,7 @@ net.openhft chronicle-core - 2.25ea13 + 2.25ea14 org.ow2.asm From fe63a026ab24478570f751dc62dd097ca29aa1ba Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 19 Apr 2024 10:11:12 -0700 Subject: [PATCH 08/58] Bump org.apache.maven.plugins:maven-gpg-plugin from 3.2.3 to 3.2.4 (#12972) --- pinot-connectors/pinot-spark-2-connector/pom.xml | 2 +- pinot-connectors/pinot-spark-3-connector/pom.xml | 2 +- pinot-connectors/pinot-spark-common/pom.xml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pinot-connectors/pinot-spark-2-connector/pom.xml b/pinot-connectors/pinot-spark-2-connector/pom.xml index 58e307221b71..ec5564a52c49 100644 --- a/pinot-connectors/pinot-spark-2-connector/pom.xml +++ b/pinot-connectors/pinot-spark-2-connector/pom.xml @@ -152,7 +152,7 @@ Thus, explicitly adding this plugin to a new profile to sign the files at the end all at once. --> org.apache.maven.plugins maven-gpg-plugin - 3.2.3 + 3.2.4 diff --git a/pinot-connectors/pinot-spark-3-connector/pom.xml b/pinot-connectors/pinot-spark-3-connector/pom.xml index 2cf4a3fe2e0f..1f43254fb4bb 100644 --- a/pinot-connectors/pinot-spark-3-connector/pom.xml +++ b/pinot-connectors/pinot-spark-3-connector/pom.xml @@ -148,7 +148,7 @@ Thus, explicitly adding this plugin to a new profile to sign the files at the end all at once. --> org.apache.maven.plugins maven-gpg-plugin - 3.2.3 + 3.2.4 diff --git a/pinot-connectors/pinot-spark-common/pom.xml b/pinot-connectors/pinot-spark-common/pom.xml index ec708e5d5c5e..4da9dd3d5571 100644 --- a/pinot-connectors/pinot-spark-common/pom.xml +++ b/pinot-connectors/pinot-spark-common/pom.xml @@ -163,7 +163,7 @@ Thus, explicitly adding this plugin to a new profile to sign the files at the end all at once. --> org.apache.maven.plugins maven-gpg-plugin - 3.2.3 + 3.2.4 From bebb491ddad96ea83fb977baa70e3ed2ed125109 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 19 Apr 2024 10:12:18 -0700 Subject: [PATCH 09/58] Bump aws.sdk.version from 2.25.33 to 2.25.34 (#12975) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 1aa4d37006de..932dcdf04d85 100644 --- a/pom.xml +++ b/pom.xml @@ -172,7 +172,7 @@ 0.15.0 0.4.4 4.2.2 - 2.25.33 + 2.25.34 2.12.7 3.1.12 7.10.1 From d840413432e9b3b887d239a1293a0de692934ee6 Mon Sep 17 00:00:00 2001 From: aishikbh Date: Fri, 19 Apr 2024 22:46:17 +0530 Subject: [PATCH 10/58] reduce logging for SpecialValueTransformer (#12970) --- .../local/recordtransformer/SpecialValueTransformer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java index d019384ed7cf..1075ff349722 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java @@ -111,7 +111,7 @@ public GenericRow transform(GenericRow record) { } } if (_negativeZeroConversionCount > 0 || _nanConversionCount > 0) { - LOGGER.info("Converted {} -0.0s to 0.0 and {} NaNs to null", _negativeZeroConversionCount, _nanConversionCount); + LOGGER.debug("Converted {} -0.0s to 0.0 and {} NaNs to null", _negativeZeroConversionCount, _nanConversionCount); } return record; } From e1b0e5357ebfcecffcc6cce3997a3edcdac1aa2c Mon Sep 17 00:00:00 2001 From: Pratik Tibrewal Date: Sat, 20 Apr 2024 03:35:01 +0530 Subject: [PATCH 11/58] Refactor PinotTaskManager class (#12964) --- .../resources/PinotTaskRestletResource.java | 14 +- .../helix/core/minion/CronJobScheduleJob.java | 2 +- .../helix/core/minion/PinotTaskManager.java | 162 ++++++++---------- ...rgeRollupMinionClusterIntegrationTest.java | 140 +++++++-------- .../PurgeMinionClusterIntegrationTest.java | 40 ++--- ...eSegmentsMinionClusterIntegrationTest.java | 55 +++--- .../SimpleMinionClusterIntegrationTest.java | 45 ++--- .../integration/tests/TlsIntegrationTest.java | 2 +- .../tests/UpsertTableIntegrationTest.java | 14 +- .../tests/UrlAuthRealtimeIntegrationTest.java | 2 +- 10 files changed, 217 insertions(+), 259 deletions(-) diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotTaskRestletResource.java b/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotTaskRestletResource.java index e09bde84668a..0d9d3a05c123 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotTaskRestletResource.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotTaskRestletResource.java @@ -618,22 +618,20 @@ public Map getCronSchedulerJobDetails( @ApiOperation("Schedule tasks and return a map from task type to task name scheduled") public Map scheduleTasks(@ApiParam(value = "Task type") @QueryParam("taskType") String taskType, @ApiParam(value = "Table name (with type suffix)") @QueryParam("tableName") String tableName, - @ApiParam(value = "Minion Instance tag to schedule the task explicitly on") - @QueryParam("minionInstanceTag") @Nullable String minionInstanceTag, - @Context HttpHeaders headers) { + @ApiParam(value = "Minion Instance tag to schedule the task explicitly on") @QueryParam("minionInstanceTag") + @Nullable String minionInstanceTag, @Context HttpHeaders headers) { String database = headers != null ? headers.getHeaderString(DATABASE) : DEFAULT_DATABASE; if (taskType != null) { // Schedule task for the given task type - List taskNames = tableName != null - ? _pinotTaskManager.scheduleTask(taskType, + List taskNames = tableName != null ? _pinotTaskManager.scheduleTaskForTable(taskType, DatabaseUtils.translateTableName(tableName, headers), minionInstanceTag) : _pinotTaskManager.scheduleTaskForDatabase(taskType, database, minionInstanceTag); return Collections.singletonMap(taskType, taskNames == null ? null : StringUtils.join(taskNames, ',')); } else { // Schedule tasks for all task types - Map> allTaskNames = tableName != null - ? _pinotTaskManager.scheduleTasks(DatabaseUtils.translateTableName(tableName, headers), minionInstanceTag) - : _pinotTaskManager.scheduleTasksForDatabase(database, minionInstanceTag); + Map> allTaskNames = tableName != null ? _pinotTaskManager.scheduleAllTasksForTable( + DatabaseUtils.translateTableName(tableName, headers), minionInstanceTag) + : _pinotTaskManager.scheduleAllTasksForDatabase(database, minionInstanceTag); return allTaskNames.entrySet().stream() .collect(Collectors.toMap(Map.Entry::getKey, entry -> String.join(",", entry.getValue()))); } diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/CronJobScheduleJob.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/CronJobScheduleJob.java index 8c0433854f0c..f9b250b2bcd4 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/CronJobScheduleJob.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/CronJobScheduleJob.java @@ -65,7 +65,7 @@ public void execute(JobExecutionContext jobExecutionContext) return; } long jobStartTime = System.currentTimeMillis(); - pinotTaskManager.scheduleTask(taskType, table); + pinotTaskManager.scheduleTaskForTable(taskType, table, null); LOGGER.info("Finished CronJob: table - {}, task - {}, next runtime is {}", table, taskType, jobExecutionContext.getNextFireTime()); pinotTaskManager.getControllerMetrics().addTimedTableValue(PinotTaskManager.getCronJobName(table, taskType), diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotTaskManager.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotTaskManager.java index 40299441390a..97417d6bea94 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotTaskManager.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/PinotTaskManager.java @@ -22,7 +22,6 @@ import java.io.PrintWriter; import java.io.StringWriter; import java.util.ArrayList; -import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.HashSet; @@ -480,30 +479,72 @@ public void registerTaskGenerator(PinotTaskGenerator taskGenerator) { } /** - * Public API to schedule tasks (all task types) for all tables in all databases. + * Schedules tasks (all task types) for all tables. * It might be called from the non-leader controller. * Returns a map from the task type to the list of tasks scheduled. */ - public synchronized Map> scheduleTasks() { - return scheduleTasks(_pinotHelixResourceManager.getAllTables(), false, null); + public synchronized Map> scheduleAllTasksForAllTables(@Nullable String minionInstanceTag) { + return scheduleTasks(_pinotHelixResourceManager.getAllTables(), false, minionInstanceTag); } /** - * Public API to schedule tasks (all task types) for all tables in given database. + * Schedules tasks (all task types) for all tables in the given database. * It might be called from the non-leader controller. * Returns a map from the task type to the list of tasks scheduled. */ - public synchronized Map> scheduleTasksForDatabase(@Nullable String database, + public synchronized Map> scheduleAllTasksForDatabase(@Nullable String database, @Nullable String minionInstanceTag) { return scheduleTasks(_pinotHelixResourceManager.getAllTables(database), false, minionInstanceTag); } + /** + * Schedules tasks (all task types) for the given table. + * It might be called from the non-leader controller. + * Returns a map from the task type to the list of tasks scheduled. + */ + public synchronized Map> scheduleAllTasksForTable(String tableNameWithType, + @Nullable String minionInstanceTag) { + return scheduleTasks(List.of(tableNameWithType), false, minionInstanceTag); + } + + /** + * Schedules task for the given task type for all tables. + * It might be called from the non-leader controller. + * Returns a list of tasks scheduled, or {@code null} if no task is scheduled. + */ + @Nullable + public synchronized List scheduleTaskForAllTables(String taskType, @Nullable String minionInstanceTag) { + return scheduleTask(taskType, _pinotHelixResourceManager.getAllTables(), minionInstanceTag); + } + + /** + * Schedules task for the given task type for all tables in the given database. + * It might be called from the non-leader controller. + * Returns a list of tasks scheduled, or {@code null} if no task is scheduled. + */ + @Nullable + public synchronized List scheduleTaskForDatabase(String taskType, @Nullable String database, + @Nullable String minionInstanceTag) { + return scheduleTask(taskType, _pinotHelixResourceManager.getAllTables(database), minionInstanceTag); + } + + /** + * Schedules task for the given task type for the give table. + * It might be called from the non-leader controller. + * Returns a list of tasks scheduled, or {@code null} if no task is scheduled. + */ + @Nullable + public synchronized List scheduleTaskForTable(String taskType, String tableNameWithType, + @Nullable String minionInstanceTag) { + return scheduleTask(taskType, List.of(tableNameWithType), minionInstanceTag); + } + /** * Helper method to schedule tasks (all task types) for the given tables that have the tasks enabled. Returns a map * from the task type to the list of the tasks scheduled. */ - private synchronized Map> scheduleTasks(List tableNamesWithType, - boolean isLeader, @Nullable String minionInstanceTag) { + private synchronized Map> scheduleTasks(List tableNamesWithType, boolean isLeader, + @Nullable String minionInstanceTag) { _controllerMetrics.addMeteredGlobalValue(ControllerMeter.NUMBER_TIMES_SCHEDULE_TASKS_CALLED, 1L); // Scan all table configs to get the tables with tasks enabled @@ -541,6 +582,27 @@ private synchronized Map> scheduleTasks(List tableN return tasksScheduled; } + @Nullable + private synchronized List scheduleTask(String taskType, List tables, + @Nullable String minionInstanceTag) { + PinotTaskGenerator taskGenerator = _taskGeneratorRegistry.getTaskGenerator(taskType); + Preconditions.checkState(taskGenerator != null, "Task type: %s is not registered", taskType); + + // Scan all table configs to get the tables with task enabled + List enabledTableConfigs = new ArrayList<>(); + for (String tableNameWithType : tables) { + TableConfig tableConfig = _pinotHelixResourceManager.getTableConfig(tableNameWithType); + if (tableConfig != null && tableConfig.getTaskConfig() != null && tableConfig.getTaskConfig() + .isTaskTypeEnabled(taskType)) { + enabledTableConfigs.add(tableConfig); + } + } + + _helixTaskResourceManager.ensureTaskQueueExists(taskType); + addTaskTypeMetricsUpdaterIfNeeded(taskType); + return scheduleTask(taskGenerator, enabledTableConfigs, false, minionInstanceTag); + } + /** * Helper method to schedule task with the given task generator for the given tables that have the task enabled. * Returns the list of task names, or {@code null} if no task is scheduled. @@ -554,8 +616,8 @@ private List scheduleTask(PinotTaskGenerator taskGenerator, List presentTaskConfig = minionInstanceTagToTaskConfigs.computeIfAbsent(minionInstanceTag, k -> new ArrayList<>()); taskGenerator.generateTasks(List.of(tableConfig), presentTaskConfig); @@ -622,86 +684,6 @@ private List scheduleTask(PinotTaskGenerator taskGenerator, List> scheduleTasks(String tableNameWithType) { - return scheduleTasks(Collections.singletonList(tableNameWithType), false, null); - } - - /** - * Public API to schedule tasks (all task types) for the given table on a specific instance tag. - * It might be called from the non-leader controller. Returns a map from the task type to the list of tasks scheduled. - */ - public synchronized Map> scheduleTasks(String tableNameWithType, - @Nullable String minionInstanceTag) { - return scheduleTasks(Collections.singletonList(tableNameWithType), false, minionInstanceTag); - } - - /** - * Public API to schedule task for the given task type in all databases. - * It might be called from the non-leader controller. - * Returns the list of task names, or {@code null} if no task is scheduled. - */ - @Nullable - public synchronized List scheduleTask(String taskType, @Nullable String minionInstanceTag) { - return scheduleTask(taskType, _pinotHelixResourceManager.getAllTables(), minionInstanceTag); - } - - /** - * Public API to schedule task for the given task type in given database. - * It might be called from the non-leader controller. - * Returns the list of task name, or {@code null} if no task is scheduled. - */ - @Nullable - public synchronized List scheduleTaskForDatabase(String taskType, @Nullable String database, - @Nullable String minionInstanceTag) { - return scheduleTask(taskType, _pinotHelixResourceManager.getAllTables(database), minionInstanceTag); - } - - @Nullable - private List scheduleTask(String taskType, List tables, @Nullable String minionInstanceTag) { - PinotTaskGenerator taskGenerator = _taskGeneratorRegistry.getTaskGenerator(taskType); - Preconditions.checkState(taskGenerator != null, "Task type: %s is not registered", taskType); - - // Scan all table configs to get the tables with task enabled - List enabledTableConfigs = new ArrayList<>(); - for (String tableNameWithType : tables) { - TableConfig tableConfig = _pinotHelixResourceManager.getTableConfig(tableNameWithType); - if (tableConfig != null && tableConfig.getTaskConfig() != null && tableConfig.getTaskConfig() - .isTaskTypeEnabled(taskType)) { - enabledTableConfigs.add(tableConfig); - } - } - - _helixTaskResourceManager.ensureTaskQueueExists(taskType); - addTaskTypeMetricsUpdaterIfNeeded(taskType); - return scheduleTask(taskGenerator, enabledTableConfigs, false, minionInstanceTag); - } - - /** - * Public API to schedule task for the given task type on the given table. It might be called from the non-leader - * controller. Returns the list of task names, or {@code null} if no task is scheduled. - */ - @Nullable - public synchronized List scheduleTask(String taskType, String tableNameWithType, - @Nullable String minionInstanceTag) { - PinotTaskGenerator taskGenerator = _taskGeneratorRegistry.getTaskGenerator(taskType); - Preconditions.checkState(taskGenerator != null, "Task type: %s is not registered", taskType); - - TableConfig tableConfig = _pinotHelixResourceManager.getTableConfig(tableNameWithType); - Preconditions.checkState(tableConfig != null, "Failed to find table config for table: %s", tableNameWithType); - - Preconditions.checkState( - tableConfig.getTaskConfig() != null && tableConfig.getTaskConfig().isTaskTypeEnabled(taskType), - "Table: %s does not have task type: %s enabled", tableNameWithType, taskType); - - _helixTaskResourceManager.ensureTaskQueueExists(taskType); - addTaskTypeMetricsUpdaterIfNeeded(taskType); - return scheduleTask(taskGenerator, Collections.singletonList(tableConfig), false, minionInstanceTag); - } - @Override protected void processTables(List tableNamesWithType, Properties taskProperties) { scheduleTasks(tableNamesWithType, true, null); diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/MergeRollupMinionClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/MergeRollupMinionClusterIntegrationTest.java index b655416c878c..c5be600661f3 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/MergeRollupMinionClusterIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/MergeRollupMinionClusterIntegrationTest.java @@ -139,14 +139,14 @@ public void setUp() List avroFiles = unpackAvroData(_tempDir); // Create and upload segments - ClusterIntegrationTestUtils - .buildSegmentsFromAvro(avroFiles, singleLevelConcatTableConfig, schema, 0, _segmentDir1, _tarDir1); + ClusterIntegrationTestUtils.buildSegmentsFromAvro(avroFiles, singleLevelConcatTableConfig, schema, 0, _segmentDir1, + _tarDir1); buildSegmentsFromAvroWithPostfix(avroFiles, singleLevelRollupTableConfig, schema, 0, _segmentDir2, _tarDir2, "1"); buildSegmentsFromAvroWithPostfix(avroFiles, singleLevelRollupTableConfig, schema, 0, _segmentDir2, _tarDir2, "2"); - ClusterIntegrationTestUtils - .buildSegmentsFromAvro(avroFiles, multiLevelConcatTableConfig, schema, 0, _segmentDir3, _tarDir3); - ClusterIntegrationTestUtils - .buildSegmentsFromAvro(avroFiles, singleLevelConcatMetadataTableConfig, schema, 0, _segmentDir4, _tarDir4); + ClusterIntegrationTestUtils.buildSegmentsFromAvro(avroFiles, multiLevelConcatTableConfig, schema, 0, _segmentDir3, + _tarDir3); + ClusterIntegrationTestUtils.buildSegmentsFromAvro(avroFiles, singleLevelConcatMetadataTableConfig, schema, 0, + _segmentDir4, _tarDir4); uploadSegments(SINGLE_LEVEL_CONCAT_TEST_TABLE, _tarDir1); uploadSegments(SINGLE_LEVEL_ROLLUP_TEST_TABLE, _tarDir2); uploadSegments(MULTI_LEVEL_CONCAT_TEST_TABLE, _tarDir3); @@ -160,8 +160,8 @@ public void setUp() schema.setSchemaName(MULTI_LEVEL_CONCAT_PROCESS_ALL_REALTIME_TABLE); addSchema(schema); TableConfig singleLevelConcatProcessAllRealtimeTableConfig = - createRealtimeTableConfigWithProcessAllMode(avroFiles.get(0), - MULTI_LEVEL_CONCAT_PROCESS_ALL_REALTIME_TABLE, PROCESS_ALL_MODE_KAFKA_TOPIC); + createRealtimeTableConfigWithProcessAllMode(avroFiles.get(0), MULTI_LEVEL_CONCAT_PROCESS_ALL_REALTIME_TABLE, + PROCESS_ALL_MODE_KAFKA_TOPIC); addTableConfig(singleLevelConcatProcessAllRealtimeTableConfig); // Push data into Kafka @@ -172,9 +172,8 @@ PROCESS_ALL_MODE_KAFKA_TOPIC, getMaxNumKafkaMessagesPerBatch(), getKafkaMessageH ClusterIntegrationTestUtils.pushAvroIntoKafka(avroFiles.subList(0, 3), "localhost:" + getKafkaPort(), PROCESS_ALL_MODE_KAFKA_TOPIC, getMaxNumKafkaMessagesPerBatch(), getKafkaMessageHeader(), getPartitionColumn(), injectTombstones()); - ClusterIntegrationTestUtils - .buildSegmentsFromAvro(avroFiles.subList(3, 9), singleLevelConcatProcessAllRealtimeTableConfig, schema, 0, - _segmentDir5, _tarDir5); + ClusterIntegrationTestUtils.buildSegmentsFromAvro(avroFiles.subList(3, 9), + singleLevelConcatProcessAllRealtimeTableConfig, schema, 0, _segmentDir5, _tarDir5); // Wait for all documents loaded waitForAllDocsLoaded(600_000L); @@ -216,14 +215,14 @@ private TableConfig createOfflineTableConfig(String tableName, TableTaskConfig t private TableConfig createOfflineTableConfig(String tableName, TableTaskConfig taskConfig, @Nullable SegmentPartitionConfig partitionConfig) { - return new TableConfigBuilder(TableType.OFFLINE).setTableName(tableName) - .setTimeColumnName(getTimeColumnName()).setSortedColumn(getSortedColumn()) - .setInvertedIndexColumns(getInvertedIndexColumns()).setNoDictionaryColumns(getNoDictionaryColumns()) - .setRangeIndexColumns(getRangeIndexColumns()).setBloomFilterColumns(getBloomFilterColumns()) - .setFieldConfigList(getFieldConfigs()).setNumReplicas(getNumReplicas()).setSegmentVersion(getSegmentVersion()) - .setLoadMode(getLoadMode()).setTaskConfig(taskConfig).setBrokerTenant(getBrokerTenant()) - .setServerTenant(getServerTenant()).setIngestionConfig(getIngestionConfig()) - .setNullHandlingEnabled(getNullHandlingEnabled()).setSegmentPartitionConfig(partitionConfig).build(); + return new TableConfigBuilder(TableType.OFFLINE).setTableName(tableName).setTimeColumnName(getTimeColumnName()) + .setSortedColumn(getSortedColumn()).setInvertedIndexColumns(getInvertedIndexColumns()) + .setNoDictionaryColumns(getNoDictionaryColumns()).setRangeIndexColumns(getRangeIndexColumns()) + .setBloomFilterColumns(getBloomFilterColumns()).setFieldConfigList(getFieldConfigs()) + .setNumReplicas(getNumReplicas()).setSegmentVersion(getSegmentVersion()).setLoadMode(getLoadMode()) + .setTaskConfig(taskConfig).setBrokerTenant(getBrokerTenant()).setServerTenant(getServerTenant()) + .setIngestionConfig(getIngestionConfig()).setNullHandlingEnabled(getNullHandlingEnabled()) + .setSegmentPartitionConfig(partitionConfig).build(); } protected TableConfig createRealtimeTableConfigWithProcessAllMode(File sampleAvroFile, String tableName, @@ -246,12 +245,12 @@ protected TableConfig createRealtimeTableConfigWithProcessAllMode(File sampleAvr tableTaskConfigs.put("ActualElapsedTime.aggregationType", "min"); tableTaskConfigs.put("WeatherDelay.aggregationType", "sum"); tableTaskConfigs.put("mode", "processAll"); - return new TableConfigBuilder(TableType.REALTIME).setTableName(tableName) - .setTimeColumnName(getTimeColumnName()).setSortedColumn(getSortedColumn()) - .setInvertedIndexColumns(getInvertedIndexColumns()).setNoDictionaryColumns(getNoDictionaryColumns()) - .setRangeIndexColumns(getRangeIndexColumns()).setBloomFilterColumns(getBloomFilterColumns()) - .setFieldConfigList(getFieldConfigs()).setNumReplicas(getNumReplicas()).setSegmentVersion(getSegmentVersion()) - .setLoadMode(getLoadMode()).setTaskConfig( + return new TableConfigBuilder(TableType.REALTIME).setTableName(tableName).setTimeColumnName(getTimeColumnName()) + .setSortedColumn(getSortedColumn()).setInvertedIndexColumns(getInvertedIndexColumns()) + .setNoDictionaryColumns(getNoDictionaryColumns()).setRangeIndexColumns(getRangeIndexColumns()) + .setBloomFilterColumns(getBloomFilterColumns()).setFieldConfigList(getFieldConfigs()) + .setNumReplicas(getNumReplicas()).setSegmentVersion(getSegmentVersion()).setLoadMode(getLoadMode()) + .setTaskConfig( new TableTaskConfig(Collections.singletonMap(MinionConstants.MergeRollupTask.TASK_TYPE, tableTaskConfigs))) .setBrokerTenant(getBrokerTenant()).setServerTenant(getServerTenant()).setIngestionConfig(getIngestionConfig()) .setQueryConfig(getQueryConfig()).setStreamConfigs(streamConfigs) @@ -411,17 +410,16 @@ public void testOfflineTableSingleLevelConcat() int numTasks = 0; List taskList; for (String tasks = - _taskManager.scheduleTasks(offlineTableName).get(MinionConstants.MergeRollupTask.TASK_TYPE).get(0); - tasks != null; - taskList = _taskManager.scheduleTasks(offlineTableName).get(MinionConstants.MergeRollupTask.TASK_TYPE), - tasks = taskList != null ? taskList.get(0) : null, - numTasks++) { + _taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.MergeRollupTask.TASK_TYPE) + .get(0); tasks != null; taskList = + _taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.MergeRollupTask.TASK_TYPE), + tasks = taskList != null ? taskList.get(0) : null, numTasks++) { assertEquals(_helixTaskResourceManager.getSubtaskConfigs(tasks).size(), expectedNumSubTasks[numTasks]); assertTrue(_helixTaskResourceManager.getTaskQueues() .contains(PinotHelixTaskResourceManager.getHelixJobQueueName(MinionConstants.MergeRollupTask.TASK_TYPE))); // Will not schedule task if there's incomplete task - assertNull( - _taskManager.scheduleTasks(offlineTableName).get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); + assertNull(_taskManager.scheduleAllTasksForTable(offlineTableName, null) + .get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); waitForTaskToComplete(); // Check watermark @@ -527,17 +525,16 @@ public void testOfflineTableSingleLevelConcatWithMetadataPush() int numTasks = 0; List taskList; for (String tasks = - _taskManager.scheduleTasks(offlineTableName).get(MinionConstants.MergeRollupTask.TASK_TYPE).get(0); - tasks != null; - taskList = _taskManager.scheduleTasks(offlineTableName).get(MinionConstants.MergeRollupTask.TASK_TYPE), - tasks = taskList != null ? taskList.get(0) : null, - numTasks++) { + _taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.MergeRollupTask.TASK_TYPE) + .get(0); tasks != null; taskList = + _taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.MergeRollupTask.TASK_TYPE), + tasks = taskList != null ? taskList.get(0) : null, numTasks++) { assertEquals(_helixTaskResourceManager.getSubtaskConfigs(tasks).size(), expectedNumSubTasks[numTasks]); assertTrue(_helixTaskResourceManager.getTaskQueues() .contains(PinotHelixTaskResourceManager.getHelixJobQueueName(MinionConstants.MergeRollupTask.TASK_TYPE))); // Will not schedule task if there's incomplete task - assertNull( - _taskManager.scheduleTasks(offlineTableName).get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); + assertNull(_taskManager.scheduleAllTasksForTable(offlineTableName, null) + .get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); waitForTaskToComplete(); // Check watermark @@ -636,17 +633,16 @@ public void testOfflineTableSingleLevelRollup() int numTasks = 0; List taskList; for (String tasks = - _taskManager.scheduleTasks(offlineTableName).get(MinionConstants.MergeRollupTask.TASK_TYPE).get(0); - tasks != null; - taskList = _taskManager.scheduleTasks(offlineTableName).get(MinionConstants.MergeRollupTask.TASK_TYPE), - tasks = taskList != null ? taskList.get(0) : null, - numTasks++) { + _taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.MergeRollupTask.TASK_TYPE) + .get(0); tasks != null; taskList = + _taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.MergeRollupTask.TASK_TYPE), + tasks = taskList != null ? taskList.get(0) : null, numTasks++) { assertEquals(_helixTaskResourceManager.getSubtaskConfigs(tasks).size(), 1); assertTrue(_helixTaskResourceManager.getTaskQueues() .contains(PinotHelixTaskResourceManager.getHelixJobQueueName(MinionConstants.MergeRollupTask.TASK_TYPE))); // Will not schedule task if there's incomplete task - assertNull( - _taskManager.scheduleTasks(offlineTableName).get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); + assertNull(_taskManager.scheduleAllTasksForTable(offlineTableName, null) + .get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); waitForTaskToComplete(); // Check watermark @@ -788,17 +784,16 @@ public void testOfflineTableMultiLevelConcat() int numTasks = 0; List taskList; for (String tasks = - _taskManager.scheduleTasks(offlineTableName).get(MinionConstants.MergeRollupTask.TASK_TYPE).get(0); - tasks != null; - taskList = _taskManager.scheduleTasks(offlineTableName).get(MinionConstants.MergeRollupTask.TASK_TYPE), - tasks = taskList != null ? taskList.get(0) : null, - numTasks++) { + _taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.MergeRollupTask.TASK_TYPE) + .get(0); tasks != null; taskList = + _taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.MergeRollupTask.TASK_TYPE), + tasks = taskList != null ? taskList.get(0) : null, numTasks++) { assertEquals(_helixTaskResourceManager.getSubtaskConfigs(tasks).size(), expectedNumSubTasks[numTasks]); assertTrue(_helixTaskResourceManager.getTaskQueues() .contains(PinotHelixTaskResourceManager.getHelixJobQueueName(MinionConstants.MergeRollupTask.TASK_TYPE))); // Will not schedule task if there's incomplete task - assertNull( - _taskManager.scheduleTasks(offlineTableName).get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); + assertNull(_taskManager.scheduleAllTasksForTable(offlineTableName, null) + .get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); waitForTaskToComplete(); // Check watermark @@ -859,8 +854,8 @@ protected void verifyTableDelete(String tableNameWithType) { return false; } // Check if the task metadata is cleaned up - if (MinionTaskMetadataUtils - .fetchTaskMetadata(_propertyStore, MinionConstants.MergeRollupTask.TASK_TYPE, tableNameWithType) != null) { + if (MinionTaskMetadataUtils.fetchTaskMetadata(_propertyStore, MinionConstants.MergeRollupTask.TASK_TYPE, + tableNameWithType) != null) { return false; } return true; @@ -921,18 +916,17 @@ public void testRealtimeTableSingleLevelConcat() int numTasks = 0; List taskList; for (String tasks = - taskManager.scheduleTasks(realtimeTableName).get(MinionConstants.MergeRollupTask.TASK_TYPE).get(0); - tasks != null; - taskList = taskManager.scheduleTasks(realtimeTableName).get(MinionConstants.MergeRollupTask.TASK_TYPE), - tasks = taskList != null ? taskList.get(0) : null, - numTasks++) { + taskManager.scheduleAllTasksForTable(realtimeTableName, null).get(MinionConstants.MergeRollupTask.TASK_TYPE) + .get(0); tasks != null; taskList = + taskManager.scheduleAllTasksForTable(realtimeTableName, null).get(MinionConstants.MergeRollupTask.TASK_TYPE), + tasks = taskList != null ? taskList.get(0) : null, numTasks++) { // assertEquals(helixTaskResourceManager.getSubtaskConfigs(tasks).size(), expectedNumSubTasks[numTasks]); assertTrue(helixTaskResourceManager.getTaskQueues() .contains(PinotHelixTaskResourceManager.getHelixJobQueueName(MinionConstants.MergeRollupTask.TASK_TYPE))); // Will not schedule task if there's incomplete task - assertNull( - taskManager.scheduleTasks(realtimeTableName).get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); + assertNull(taskManager.scheduleAllTasksForTable(realtimeTableName, null) + .get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); waitForTaskToComplete(); // Check watermark @@ -1027,17 +1021,16 @@ public void testRealtimeTableProcessAllModeMultiLevelConcat() int numTasks = 0; List taskList; for (String tasks = - taskManager.scheduleTasks(realtimeTableName).get(MinionConstants.MergeRollupTask.TASK_TYPE).get(0); - tasks != null; taskList = - taskManager.scheduleTasks(realtimeTableName).get(MinionConstants.MergeRollupTask.TASK_TYPE), - tasks = taskList != null ? taskList.get(0) : null, - numTasks++) { + taskManager.scheduleAllTasksForTable(realtimeTableName, null).get(MinionConstants.MergeRollupTask.TASK_TYPE) + .get(0); tasks != null; taskList = + taskManager.scheduleAllTasksForTable(realtimeTableName, null).get(MinionConstants.MergeRollupTask.TASK_TYPE), + tasks = taskList != null ? taskList.get(0) : null, numTasks++) { assertTrue(helixTaskResourceManager.getTaskQueues() .contains(PinotHelixTaskResourceManager.getHelixJobQueueName(MinionConstants.MergeRollupTask.TASK_TYPE))); // Will not schedule task if there's incomplete task - assertNull( - taskManager.scheduleTasks(realtimeTableName).get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); + assertNull(taskManager.scheduleAllTasksForTable(realtimeTableName, null) + .get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); waitForTaskToComplete(); // Check not using watermarks @@ -1069,11 +1062,10 @@ public void testRealtimeTableProcessAllModeMultiLevelConcat() waitForAllDocsLoaded(600_000L); for (String tasks = - taskManager.scheduleTasks(realtimeTableName).get(MinionConstants.MergeRollupTask.TASK_TYPE).get(0); - tasks != null; taskList = - taskManager.scheduleTasks(realtimeTableName).get(MinionConstants.MergeRollupTask.TASK_TYPE), - tasks = taskList != null ? taskList.get(0) : null, - numTasks++) { + taskManager.scheduleAllTasksForTable(realtimeTableName, null).get(MinionConstants.MergeRollupTask.TASK_TYPE) + .get(0); tasks != null; taskList = + taskManager.scheduleAllTasksForTable(realtimeTableName, null).get(MinionConstants.MergeRollupTask.TASK_TYPE), + tasks = taskList != null ? taskList.get(0) : null, numTasks++) { waitForTaskToComplete(); // Check metrics long numBucketsToProcess = MetricValueUtils.getGaugeValue(_controllerStarter.getControllerMetrics(), diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/PurgeMinionClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/PurgeMinionClusterIntegrationTest.java index c4ba131f6de3..da4e85696c7f 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/PurgeMinionClusterIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/PurgeMinionClusterIntegrationTest.java @@ -18,7 +18,6 @@ */ package org.apache.pinot.integration.tests; -import com.google.common.collect.ImmutableList; import java.io.File; import java.util.ArrayList; import java.util.Arrays; @@ -63,7 +62,6 @@ public class PurgeMinionClusterIntegrationTest extends BaseClusterIntegrationTes private static final String PURGE_DELTA_NOT_PASSED_TABLE = "myTable3"; private static final String PURGE_OLD_SEGMENTS_WITH_NEW_INDICES_TABLE = "myTable4"; - protected PinotHelixTaskResourceManager _helixTaskResourceManager; protected PinotTaskManager _taskManager; protected PinotHelixResourceManager _pinotHelixResourceManager; @@ -83,12 +81,8 @@ public void setUp() startBrokers(1); startServers(1); - List allTables = ImmutableList.of( - PURGE_FIRST_RUN_TABLE, - PURGE_DELTA_PASSED_TABLE, - PURGE_DELTA_NOT_PASSED_TABLE, - PURGE_OLD_SEGMENTS_WITH_NEW_INDICES_TABLE - ); + List allTables = List.of(PURGE_FIRST_RUN_TABLE, PURGE_DELTA_PASSED_TABLE, PURGE_DELTA_NOT_PASSED_TABLE, + PURGE_OLD_SEGMENTS_WITH_NEW_INDICES_TABLE); Schema schema = null; TableConfig tableConfig = null; for (String tableName : allTables) { @@ -152,12 +146,9 @@ public void setUp() private void setRecordPurger() { MinionContext minionContext = MinionContext.getInstance(); minionContext.setRecordPurgerFactory(rawTableName -> { - List tableNames = Arrays.asList( - PURGE_FIRST_RUN_TABLE, - PURGE_DELTA_PASSED_TABLE, - PURGE_DELTA_NOT_PASSED_TABLE, - PURGE_OLD_SEGMENTS_WITH_NEW_INDICES_TABLE - ); + List tableNames = + Arrays.asList(PURGE_FIRST_RUN_TABLE, PURGE_DELTA_PASSED_TABLE, PURGE_DELTA_NOT_PASSED_TABLE, + PURGE_OLD_SEGMENTS_WITH_NEW_INDICES_TABLE); if (tableNames.contains(rawTableName)) { return row -> row.getValue("ArrTime").equals(1); } else { @@ -195,11 +186,12 @@ public void testFirstRunPurge() // 5. Check the purge process itself by setting an expecting number of rows String offlineTableName = TableNameBuilder.OFFLINE.tableNameWithType(PURGE_FIRST_RUN_TABLE); - assertNotNull(_taskManager.scheduleTasks(offlineTableName).get(MinionConstants.PurgeTask.TASK_TYPE)); + assertNotNull( + _taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.PurgeTask.TASK_TYPE)); assertTrue(_helixTaskResourceManager.getTaskQueues() .contains(PinotHelixTaskResourceManager.getHelixJobQueueName(MinionConstants.PurgeTask.TASK_TYPE))); // Will not schedule task if there's incomplete task - assertNull(_taskManager.scheduleTasks(offlineTableName).get(MinionConstants.PurgeTask.TASK_TYPE)); + assertNull(_taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.PurgeTask.TASK_TYPE)); waitForTaskToComplete(); // Check that metadata contains expected values @@ -209,7 +201,7 @@ public void testFirstRunPurge() metadata.getCustomMap().containsKey(MinionConstants.PurgeTask.TASK_TYPE + MinionConstants.TASK_TIME_SUFFIX)); } // Should not generate new purge task as the last time purge is not greater than last + 1day (default purge delay) - assertNull(_taskManager.scheduleTasks(offlineTableName).get(MinionConstants.PurgeTask.TASK_TYPE)); + assertNull(_taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.PurgeTask.TASK_TYPE)); // 52 rows with ArrTime = 1 // 115545 totals rows @@ -239,11 +231,12 @@ public void testPassedDelayTimePurge() // 5. Check the purge process itself by setting an expecting number of rows String offlineTableName = TableNameBuilder.OFFLINE.tableNameWithType(PURGE_DELTA_PASSED_TABLE); - assertNotNull(_taskManager.scheduleTasks(offlineTableName).get(MinionConstants.PurgeTask.TASK_TYPE)); + assertNotNull( + _taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.PurgeTask.TASK_TYPE)); assertTrue(_helixTaskResourceManager.getTaskQueues() .contains(PinotHelixTaskResourceManager.getHelixJobQueueName(MinionConstants.PurgeTask.TASK_TYPE))); // Will not schedule task if there's incomplete task - assertNull(_taskManager.scheduleTasks(offlineTableName).get(MinionConstants.PurgeTask.TASK_TYPE)); + assertNull(_taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.PurgeTask.TASK_TYPE)); waitForTaskToComplete(); // Check that metadata contains expected values @@ -255,7 +248,7 @@ public void testPassedDelayTimePurge() assertTrue(System.currentTimeMillis() - Long.parseLong(purgeTime) < 86400000); } // Should not generate new purge task as the last time purge is not greater than last + 1day (default purge delay) - assertNull(_taskManager.scheduleTasks(offlineTableName).get(MinionConstants.PurgeTask.TASK_TYPE)); + assertNull(_taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.PurgeTask.TASK_TYPE)); // 52 rows with ArrTime = 1 // 115545 totals rows @@ -287,7 +280,7 @@ public void testNotPassedDelayTimePurge() String offlineTableName = TableNameBuilder.OFFLINE.tableNameWithType(PURGE_DELTA_NOT_PASSED_TABLE); // No task should be schedule as the delay is not passed - assertNull(_taskManager.scheduleTasks(offlineTableName).get(MinionConstants.PurgeTask.TASK_TYPE)); + assertNull(_taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.PurgeTask.TASK_TYPE)); for (SegmentZKMetadata metadata : _pinotHelixResourceManager.getSegmentsZKMetadata(offlineTableName)) { // Check purge time String purgeTime = @@ -338,10 +331,11 @@ public void testPurgeOnOldSegmentsWithIndicesOnNewColumns() // schedule purge tasks String offlineTableName = TableNameBuilder.OFFLINE.tableNameWithType(PURGE_OLD_SEGMENTS_WITH_NEW_INDICES_TABLE); - assertNotNull(_taskManager.scheduleTasks(offlineTableName).get(MinionConstants.PurgeTask.TASK_TYPE)); + assertNotNull( + _taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.PurgeTask.TASK_TYPE)); assertTrue(_helixTaskResourceManager.getTaskQueues() .contains(PinotHelixTaskResourceManager.getHelixJobQueueName(MinionConstants.PurgeTask.TASK_TYPE))); - assertNull(_taskManager.scheduleTasks(offlineTableName).get(MinionConstants.PurgeTask.TASK_TYPE)); + assertNull(_taskManager.scheduleAllTasksForTable(offlineTableName, null).get(MinionConstants.PurgeTask.TASK_TYPE)); waitForTaskToComplete(); // Check that metadata contains expected values diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java index 043c654ef77a..e6c8ce270030 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/RealtimeToOfflineSegmentsMinionClusterIntegrationTest.java @@ -134,14 +134,14 @@ public void setUp() Map taskConfigsWithMetadata = new HashMap<>(); taskConfigsWithMetadata.put(BatchConfigProperties.OVERWRITE_OUTPUT, "true"); - taskConfigsWithMetadata.put( - BatchConfigProperties.PUSH_MODE, BatchConfigProperties.SegmentPushType.METADATA.toString()); + taskConfigsWithMetadata.put(BatchConfigProperties.PUSH_MODE, + BatchConfigProperties.SegmentPushType.METADATA.toString()); String tableWithMetadataPush = "myTable2"; schema.setSchemaName(tableWithMetadataPush); addSchema(schema); TableConfig realtimeMetadataTableConfig = createRealtimeTableConfig(avroFiles.get(0), tableWithMetadataPush, - new TableTaskConfig(Collections.singletonMap( - MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE, taskConfigsWithMetadata))); + new TableTaskConfig(Collections.singletonMap(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE, + taskConfigsWithMetadata))); realtimeMetadataTableConfig.setIngestionConfig(ingestionConfig); realtimeMetadataTableConfig.setFieldConfigList(Collections.singletonList(tsFieldConfig)); addTableConfig(realtimeMetadataTableConfig); @@ -151,7 +151,6 @@ public void setUp() offlineMetadataTableConfig.setFieldConfigList(Collections.singletonList(tsFieldConfig)); addTableConfig(offlineMetadataTableConfig); - // Push data into Kafka pushAvroIntoKafka(avroFiles); @@ -163,7 +162,6 @@ public void setUp() waitForDocsLoaded(600_000L, true, tableWithMetadataPush); - _taskResourceManager = _controllerStarter.getHelixTaskResourceManager(); _taskManager = _controllerStarter.getTaskManager(); _realtimeTableName = TableNameBuilder.REALTIME.tableNameWithType(getTableName()); @@ -181,8 +179,8 @@ public void setUp() } _dataSmallestTimeMs = minSegmentTimeMs; - segmentsZKMetadata = _helixResourceManager.getSegmentsZKMetadata(_realtimeMetadataTableName); - minSegmentTimeMs = Long.MAX_VALUE; + segmentsZKMetadata = _helixResourceManager.getSegmentsZKMetadata(_realtimeMetadataTableName); + minSegmentTimeMs = Long.MAX_VALUE; for (SegmentZKMetadata segmentZKMetadata : segmentsZKMetadata) { if (segmentZKMetadata.getStatus() == CommonConstants.Segment.Realtime.Status.DONE) { minSegmentTimeMs = Math.min(minSegmentTimeMs, segmentZKMetadata.getStartTimeMs()); @@ -193,29 +191,28 @@ public void setUp() private TableConfig createOfflineTableConfig(String tableName, @Nullable TableTaskConfig taskConfig, @Nullable SegmentPartitionConfig partitionConfig) { - return new TableConfigBuilder(TableType.OFFLINE).setTableName(tableName) - .setTimeColumnName(getTimeColumnName()).setSortedColumn(getSortedColumn()) - .setInvertedIndexColumns(getInvertedIndexColumns()).setNoDictionaryColumns(getNoDictionaryColumns()) - .setRangeIndexColumns(getRangeIndexColumns()).setBloomFilterColumns(getBloomFilterColumns()) - .setFieldConfigList(getFieldConfigs()).setNumReplicas(getNumReplicas()).setSegmentVersion(getSegmentVersion()) - .setLoadMode(getLoadMode()).setTaskConfig(taskConfig).setBrokerTenant(getBrokerTenant()) - .setServerTenant(getServerTenant()).setIngestionConfig(getIngestionConfig()) - .setNullHandlingEnabled(getNullHandlingEnabled()).setSegmentPartitionConfig(partitionConfig).build(); + return new TableConfigBuilder(TableType.OFFLINE).setTableName(tableName).setTimeColumnName(getTimeColumnName()) + .setSortedColumn(getSortedColumn()).setInvertedIndexColumns(getInvertedIndexColumns()) + .setNoDictionaryColumns(getNoDictionaryColumns()).setRangeIndexColumns(getRangeIndexColumns()) + .setBloomFilterColumns(getBloomFilterColumns()).setFieldConfigList(getFieldConfigs()) + .setNumReplicas(getNumReplicas()).setSegmentVersion(getSegmentVersion()).setLoadMode(getLoadMode()) + .setTaskConfig(taskConfig).setBrokerTenant(getBrokerTenant()).setServerTenant(getServerTenant()) + .setIngestionConfig(getIngestionConfig()).setNullHandlingEnabled(getNullHandlingEnabled()) + .setSegmentPartitionConfig(partitionConfig).build(); } protected TableConfig createRealtimeTableConfig(File sampleAvroFile, String tableName, TableTaskConfig taskConfig) { AvroFileSchemaKafkaAvroMessageDecoder._avroFile = sampleAvroFile; - return new TableConfigBuilder(TableType.REALTIME).setTableName(tableName) - .setTimeColumnName(getTimeColumnName()).setSortedColumn(getSortedColumn()) - .setInvertedIndexColumns(getInvertedIndexColumns()).setNoDictionaryColumns(getNoDictionaryColumns()) - .setRangeIndexColumns(getRangeIndexColumns()).setBloomFilterColumns(getBloomFilterColumns()) - .setFieldConfigList(getFieldConfigs()).setNumReplicas(getNumReplicas()).setSegmentVersion(getSegmentVersion()) - .setLoadMode(getLoadMode()).setTaskConfig(taskConfig).setBrokerTenant(getBrokerTenant()) - .setServerTenant(getServerTenant()).setIngestionConfig(getIngestionConfig()).setQueryConfig(getQueryConfig()) - .setStreamConfigs(getStreamConfigs()).setNullHandlingEnabled(getNullHandlingEnabled()).build(); + return new TableConfigBuilder(TableType.REALTIME).setTableName(tableName).setTimeColumnName(getTimeColumnName()) + .setSortedColumn(getSortedColumn()).setInvertedIndexColumns(getInvertedIndexColumns()) + .setNoDictionaryColumns(getNoDictionaryColumns()).setRangeIndexColumns(getRangeIndexColumns()) + .setBloomFilterColumns(getBloomFilterColumns()).setFieldConfigList(getFieldConfigs()) + .setNumReplicas(getNumReplicas()).setSegmentVersion(getSegmentVersion()).setLoadMode(getLoadMode()) + .setTaskConfig(taskConfig).setBrokerTenant(getBrokerTenant()).setServerTenant(getServerTenant()) + .setIngestionConfig(getIngestionConfig()).setQueryConfig(getQueryConfig()).setStreamConfigs(getStreamConfigs()) + .setNullHandlingEnabled(getNullHandlingEnabled()).build(); } - @Test public void testRealtimeToOfflineSegmentsTask() throws Exception { @@ -234,12 +231,12 @@ public void testRealtimeToOfflineSegmentsTask() long expectedWatermark = _dataSmallestTimeMs + 86400000; for (int i = 0; i < 3; i++) { // Schedule task - assertNotNull(_taskManager.scheduleTasks(_realtimeTableName) + assertNotNull(_taskManager.scheduleAllTasksForTable(_realtimeTableName, null) .get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); assertTrue(_taskResourceManager.getTaskQueues().contains( PinotHelixTaskResourceManager.getHelixJobQueueName(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE))); // Should not generate more tasks - assertNull(_taskManager.scheduleTasks(_realtimeTableName) + assertNull(_taskManager.scheduleAllTasksForTable(_realtimeTableName, null) .get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); // Wait at most 600 seconds for all tasks COMPLETED @@ -286,12 +283,12 @@ public void testRealtimeToOfflineSegmentsMetadataPushTask() _taskManager.cleanUpTask(); for (int i = 0; i < 3; i++) { // Schedule task - assertNotNull(_taskManager.scheduleTasks(_realtimeMetadataTableName) + assertNotNull(_taskManager.scheduleAllTasksForTable(_realtimeMetadataTableName, null) .get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); assertTrue(_taskResourceManager.getTaskQueues().contains( PinotHelixTaskResourceManager.getHelixJobQueueName(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE))); // Should not generate more tasks - assertNull(_taskManager.scheduleTasks(_realtimeMetadataTableName) + assertNull(_taskManager.scheduleAllTasksForTable(_realtimeMetadataTableName, null) .get(MinionConstants.RealtimeToOfflineSegmentsTask.TASK_TYPE)); // Wait at most 600 seconds for all tasks COMPLETED diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/SimpleMinionClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/SimpleMinionClusterIntegrationTest.java index 241c1c0876ff..78aa4d1c2470 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/SimpleMinionClusterIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/SimpleMinionClusterIntegrationTest.java @@ -87,8 +87,8 @@ public void setUp() properties.put(TASK_TYPE + MinionConstants.MAX_ATTEMPTS_PER_TASK_KEY_SUFFIX, "2"); helixResourceManager.getHelixAdmin().setConfig( - new HelixConfigScopeBuilder(HelixConfigScope.ConfigScopeProperty.CLUSTER) - .forCluster(helixResourceManager.getHelixClusterName()).build(), properties); + new HelixConfigScopeBuilder(HelixConfigScope.ConfigScopeProperty.CLUSTER).forCluster( + helixResourceManager.getHelixClusterName()).build(), properties); // Add 3 offline tables, where 2 of them have TestTask enabled addDummySchema(TABLE_NAME_1); @@ -136,7 +136,7 @@ public void testStopResumeDeleteTaskQueue() { assertEquals(_helixTaskResourceManager.getTasksInProgress(TASK_TYPE).size(), 0); // Should create the task queues and generate a task in the same minion instance - List task1 = _taskManager.scheduleTasks().get(TASK_TYPE); + List task1 = _taskManager.scheduleAllTasksForAllTables(null).get(TASK_TYPE); assertNotNull(task1); assertEquals(task1.size(), 1); assertTrue(_helixTaskResourceManager.getTaskQueues() @@ -150,7 +150,7 @@ public void testStopResumeDeleteTaskQueue() { verifyTaskCount(task1.get(0), 0, 1, 1, 2); // Should generate one more task, with two sub-tasks. Both of these sub-tasks will wait // since we have one minion instance that is still running one of the sub-tasks. - List task2 = _taskManager.scheduleTask(TASK_TYPE, null); + List task2 = _taskManager.scheduleTaskForAllTables(TASK_TYPE, null); assertNotNull(task2); assertEquals(task2.size(), 1); assertTrue(_helixTaskResourceManager.getTasksInProgress(TASK_TYPE).contains(task2.get(0))); @@ -159,8 +159,8 @@ public void testStopResumeDeleteTaskQueue() { // Should not generate more tasks since SimpleMinionClusterIntegrationTests.NUM_TASKS is 2. // Our test task generator does not generate if there are already this many sub-tasks in the // running+waiting count already. - assertNull(_taskManager.scheduleTasks().get(TASK_TYPE)); - assertNull(_taskManager.scheduleTask(TASK_TYPE, null)); + assertNull(_taskManager.scheduleAllTasksForAllTables(null).get(TASK_TYPE)); + assertNull(_taskManager.scheduleTaskForAllTables(TASK_TYPE, null)); // Wait at most 60 seconds for all tasks IN_PROGRESS TestUtils.waitForCondition(input -> { @@ -183,13 +183,12 @@ public void testStopResumeDeleteTaskQueue() { String inProgressGauge = TASK_TYPE + "." + TaskState.IN_PROGRESS; String stoppedGauge = TASK_TYPE + "." + TaskState.STOPPED; String completedGauge = TASK_TYPE + "." + TaskState.COMPLETED; - TestUtils.waitForCondition( - input -> MetricValueUtils.getGlobalGaugeValue(controllerMetrics, inProgressGauge, ControllerGauge.TASK_STATUS) + TestUtils.waitForCondition(input -> + MetricValueUtils.getGlobalGaugeValue(controllerMetrics, inProgressGauge, ControllerGauge.TASK_STATUS) == NUM_TASKS && MetricValueUtils.getGlobalGaugeValue(controllerMetrics, stoppedGauge, ControllerGauge.TASK_STATUS) == 0 && MetricValueUtils.getGlobalGaugeValue(controllerMetrics, completedGauge, ControllerGauge.TASK_STATUS) - == 0, - ZK_CALLBACK_TIMEOUT_MS, "Failed to update the controller gauges"); + == 0, ZK_CALLBACK_TIMEOUT_MS, "Failed to update the controller gauges"); // Stop the task queue _helixTaskResourceManager.stopTaskQueue(TASK_TYPE); @@ -211,14 +210,12 @@ public void testStopResumeDeleteTaskQueue() { }, STATE_TRANSITION_TIMEOUT_MS, "Failed to get all tasks STOPPED"); // Wait at most 30 seconds for ZK callback to update the controller gauges - TestUtils.waitForCondition( - input -> MetricValueUtils.getGlobalGaugeValue(controllerMetrics, inProgressGauge, ControllerGauge.TASK_STATUS) - == 0 + TestUtils.waitForCondition(input -> + MetricValueUtils.getGlobalGaugeValue(controllerMetrics, inProgressGauge, ControllerGauge.TASK_STATUS) == 0 && MetricValueUtils.getGlobalGaugeValue(controllerMetrics, stoppedGauge, ControllerGauge.TASK_STATUS) == NUM_TASKS && MetricValueUtils.getGlobalGaugeValue(controllerMetrics, completedGauge, ControllerGauge.TASK_STATUS) - == 0, - ZK_CALLBACK_TIMEOUT_MS, "Failed to update the controller gauges"); + == 0, ZK_CALLBACK_TIMEOUT_MS, "Failed to update the controller gauges"); // Task deletion requires the task queue to be stopped, // so deleting task1 here before resuming the task queue. @@ -247,13 +244,11 @@ public void testStopResumeDeleteTaskQueue() { }, STATE_TRANSITION_TIMEOUT_MS, "Failed to get all tasks COMPLETED"); // Wait at most 30 seconds for ZK callback to update the controller gauges - TestUtils.waitForCondition( - input -> MetricValueUtils.getGlobalGaugeValue(controllerMetrics, inProgressGauge, ControllerGauge.TASK_STATUS) - == 0 + TestUtils.waitForCondition(input -> + MetricValueUtils.getGlobalGaugeValue(controllerMetrics, inProgressGauge, ControllerGauge.TASK_STATUS) == 0 && MetricValueUtils.getGlobalGaugeValue(controllerMetrics, stoppedGauge, ControllerGauge.TASK_STATUS) == 0 - && MetricValueUtils.getGlobalGaugeValue(controllerMetrics, completedGauge, ControllerGauge.TASK_STATUS) - == (NUM_TASKS - 1), - ZK_CALLBACK_TIMEOUT_MS, "Failed to update the controller gauges"); + && MetricValueUtils.getGlobalGaugeValue(controllerMetrics, completedGauge, ControllerGauge.TASK_STATUS) == ( + NUM_TASKS - 1), ZK_CALLBACK_TIMEOUT_MS, "Failed to update the controller gauges"); // Delete the task queue _helixTaskResourceManager.deleteTaskQueue(TASK_TYPE, false); @@ -263,13 +258,11 @@ public void testStopResumeDeleteTaskQueue() { STATE_TRANSITION_TIMEOUT_MS, "Failed to delete the task queue"); // Wait at most 30 seconds for ZK callback to update the controller gauges - TestUtils.waitForCondition( - input -> MetricValueUtils.getGlobalGaugeValue(controllerMetrics, inProgressGauge, ControllerGauge.TASK_STATUS) - == 0 + TestUtils.waitForCondition(input -> + MetricValueUtils.getGlobalGaugeValue(controllerMetrics, inProgressGauge, ControllerGauge.TASK_STATUS) == 0 && MetricValueUtils.getGlobalGaugeValue(controllerMetrics, stoppedGauge, ControllerGauge.TASK_STATUS) == 0 && MetricValueUtils.getGlobalGaugeValue(controllerMetrics, completedGauge, ControllerGauge.TASK_STATUS) - == 0, - ZK_CALLBACK_TIMEOUT_MS, "Failed to update the controller gauges"); + == 0, ZK_CALLBACK_TIMEOUT_MS, "Failed to update the controller gauges"); } @AfterClass diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/TlsIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/TlsIntegrationTest.java index d292ef4c9bd8..5058fd4b759f 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/TlsIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/TlsIntegrationTest.java @@ -489,7 +489,7 @@ public void testRealtimeSegmentUploadDownload() Assert.assertTrue(resultBeforeOffline.getResultSet(0).getLong(0) > 0); // schedule offline segment generation - Assert.assertNotNull(_controllerStarter.getTaskManager().scheduleTasks()); + Assert.assertNotNull(_controllerStarter.getTaskManager().scheduleAllTasksForAllTables(null)); // wait for offline segments JsonNode offlineSegments = TestUtils.waitForResult(() -> { diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UpsertTableIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UpsertTableIntegrationTest.java index 238d515b54b8..19c3ac61ff9d 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UpsertTableIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UpsertTableIntegrationTest.java @@ -471,8 +471,8 @@ public void testUpsertCompaction() waitForAllDocsLoaded(tableName, 600_000L, 1000); assertEquals(getScore(tableName), 3692); waitForNumQueriedSegmentsToConverge(tableName, 10_000L, 3); - - assertNotNull(_taskManager.scheduleTasks(TableNameBuilder.forType(TableType.REALTIME).tableNameWithType(tableName)) + String realtimeTableName = TableNameBuilder.forType(TableType.REALTIME).tableNameWithType(tableName); + assertNotNull(_taskManager.scheduleAllTasksForTable(realtimeTableName, null) .get(MinionConstants.UpsertCompactionTask.TASK_TYPE)); waitForTaskToComplete(); waitForAllDocsLoaded(tableName, 600_000L, 3); @@ -501,8 +501,8 @@ public void testUpsertCompactionDeletesSegments() waitForAllDocsLoaded(tableName, 600_000L, 2000); assertEquals(getScore(tableName), 3692); waitForNumQueriedSegmentsToConverge(tableName, 10_000L, 5); - - assertNotNull(_taskManager.scheduleTasks(TableNameBuilder.forType(TableType.REALTIME).tableNameWithType(tableName)) + String realtimeTableName = TableNameBuilder.forType(TableType.REALTIME).tableNameWithType(tableName); + assertNotNull(_taskManager.scheduleAllTasksForTable(realtimeTableName, null) .get(MinionConstants.UpsertCompactionTask.TASK_TYPE)); waitForTaskToComplete(); waitForAllDocsLoaded(tableName, 600_000L, 3); @@ -546,7 +546,8 @@ public void testUpsertCompactionWithSoftDelete() // Run segment compaction. This time, we expect that the deleting rows are still there because they are // as part of the consuming segment - assertNotNull(_taskManager.scheduleTasks(TableNameBuilder.forType(TableType.REALTIME).tableNameWithType(tableName)) + String realtimeTableName = TableNameBuilder.forType(TableType.REALTIME).tableNameWithType(tableName); + assertNotNull(_taskManager.scheduleAllTasksForTable(realtimeTableName, null) .get(MinionConstants.UpsertCompactionTask.TASK_TYPE)); waitForTaskToComplete(); waitForAllDocsLoaded(tableName, 600_000L, 3); @@ -563,7 +564,8 @@ public void testUpsertCompactionWithSoftDelete() assertEquals(getNumDeletedRows(tableName), 2); // Run segment compaction. This time, we expect that the deleting rows are cleaned up - assertNotNull(_taskManager.scheduleTasks(TableNameBuilder.forType(TableType.REALTIME).tableNameWithType(tableName)) + realtimeTableName = TableNameBuilder.forType(TableType.REALTIME).tableNameWithType(tableName); + assertNotNull(_taskManager.scheduleAllTasksForTable(realtimeTableName, null) .get(MinionConstants.UpsertCompactionTask.TASK_TYPE)); waitForTaskToComplete(); waitForAllDocsLoaded(tableName, 600_000L, 3); diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UrlAuthRealtimeIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UrlAuthRealtimeIntegrationTest.java index e8389b377f59..08aa9aee6afc 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UrlAuthRealtimeIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/UrlAuthRealtimeIntegrationTest.java @@ -203,7 +203,7 @@ public void testSegmentUploadDownload() Assert.assertTrue(resultBeforeOffline.getResultSet(0).getLong(0) > 0); // schedule offline segment generation - Assert.assertNotNull(_controllerStarter.getTaskManager().scheduleTasks()); + Assert.assertNotNull(_controllerStarter.getTaskManager().scheduleAllTasksForAllTables(null)); // wait for offline segments JsonNode offlineSegments = TestUtils.waitForResult(() -> { From f83e466c4205844f87e4b4c77e063c901d69f94f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Apr 2024 09:12:13 -0700 Subject: [PATCH 12/58] Bump org.roaringbitmap:RoaringBitmap from 1.0.5 to 1.0.6 (#12985) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 932dcdf04d85..1ba3aba56844 100644 --- a/pom.xml +++ b/pom.xml @@ -457,7 +457,7 @@ org.roaringbitmap RoaringBitmap - 1.0.5 + 1.0.6 com.101tec From 7b68aa369d9b8304860fd2b7679111a41eeadb46 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Apr 2024 09:12:30 -0700 Subject: [PATCH 13/58] Bump aws.sdk.version from 2.25.34 to 2.25.35 (#12984) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 1ba3aba56844..79df63429c83 100644 --- a/pom.xml +++ b/pom.xml @@ -172,7 +172,7 @@ 0.15.0 0.4.4 4.2.2 - 2.25.34 + 2.25.35 2.12.7 3.1.12 7.10.1 From c9d513aef03774b5ea92020cf05b37f7ee7c72f2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 22 Apr 2024 10:19:50 -0700 Subject: [PATCH 14/58] Bump org.apache.maven.plugins:maven-jar-plugin from 3.4.0 to 3.4.1 (#12983) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 79df63429c83..a4a83c8b287a 100644 --- a/pom.xml +++ b/pom.xml @@ -130,7 +130,7 @@ org.apache.pinot.shaded - 3.4.0 + 3.4.1 3.5.2 none From a852c8a42446cb2d798f67cc6bd133b603b7b99f Mon Sep 17 00:00:00 2001 From: Yash Mayya Date: Mon, 22 Apr 2024 22:51:11 +0530 Subject: [PATCH 15/58] Update ORC and Hive dependency versions in the license binary file (#12986) --- LICENSE-binary | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index 9ffb5b19eb2c..5944cc2bba50 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -420,7 +420,7 @@ org.apache.helix:helix-core:1.3.1 org.apache.helix:metadata-store-directory-common:1.3.1 org.apache.helix:metrics-common:1.3.1 org.apache.helix:zookeeper-api:1.3.1 -org.apache.hive:hive-storage-api:2.7.1 +org.apache.hive:hive-storage-api:2.8.1 org.apache.httpcomponents:httpclient:4.5.14 org.apache.httpcomponents:httpcore:4.4.13 org.apache.httpcomponents:httpmime:4.5.13 @@ -438,8 +438,8 @@ org.apache.lucene:lucene-core:9.8.0 org.apache.lucene:lucene-queries:9.8.0 org.apache.lucene:lucene-queryparser:9.8.0 org.apache.lucene:lucene-sandbox:9.8.0 -org.apache.orc:orc-core:1.5.9 -org.apache.orc:orc-shims:1.5.9 +org.apache.orc:orc-core:1.9.3 +org.apache.orc:orc-shims:1.9.3 org.apache.parquet:parquet-avro:1.13.1 org.apache.parquet:parquet-column:1.13.1 org.apache.parquet:parquet-common:1.13.1 From a5c728f549fe1be5560a88080caaa2063def3d87 Mon Sep 17 00:00:00 2001 From: Xiang Fu Date: Tue, 23 Apr 2024 02:58:07 +0800 Subject: [PATCH 16/58] Add back profile for shade (#12979) --- pinot-clients/pinot-jdbc-client/pom.xml | 15 ++++++++++++++- pinot-common/pom.xml | 11 ++++++++++- pinot-core/pom.xml | 11 +++++++++++ pinot-plugins/pinot-file-system/pinot-s3/pom.xml | 16 +++++++++++++++- .../pinot-stream-ingestion/pinot-kinesis/pom.xml | 16 +++++++++++++++- pinot-spi/pom.xml | 11 +++++++++++ 6 files changed, 76 insertions(+), 4 deletions(-) diff --git a/pinot-clients/pinot-jdbc-client/pom.xml b/pinot-clients/pinot-jdbc-client/pom.xml index 08c3880a483f..6ffc2fa19ae3 100644 --- a/pinot-clients/pinot-jdbc-client/pom.xml +++ b/pinot-clients/pinot-jdbc-client/pom.xml @@ -33,7 +33,6 @@ https://pinot.apache.org/ ${basedir}/../.. - package @@ -82,4 +81,18 @@ jsr305 + + + build-shaded-jar + + + skipShade + !true + + + + package + + + diff --git a/pinot-common/pom.xml b/pinot-common/pom.xml index 16e5b6d41bb9..2381f024d46b 100644 --- a/pinot-common/pom.xml +++ b/pinot-common/pom.xml @@ -33,7 +33,6 @@ https://pinot.apache.org/ ${basedir}/.. - package @@ -414,5 +413,15 @@ + + + build-shaded-jar + + true + + + package + + diff --git a/pinot-core/pom.xml b/pinot-core/pom.xml index 6aa29f7d8e2d..6d00a98d75ab 100644 --- a/pinot-core/pom.xml +++ b/pinot-core/pom.xml @@ -179,4 +179,15 @@ + + + build-shaded-jar + + false + + + package + + + diff --git a/pinot-plugins/pinot-file-system/pinot-s3/pom.xml b/pinot-plugins/pinot-file-system/pinot-s3/pom.xml index bd650eadf229..0f4d2eea78f0 100644 --- a/pinot-plugins/pinot-file-system/pinot-s3/pom.xml +++ b/pinot-plugins/pinot-file-system/pinot-s3/pom.xml @@ -36,7 +36,6 @@ ${basedir}/../../.. 2.12.2 - package @@ -65,4 +64,19 @@ test + + + + build-shaded-jar + + + skipShade + !true + + + + package + + + diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml index 916a617504af..d58e3313ecaa 100644 --- a/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml +++ b/pinot-plugins/pinot-stream-ingestion/pinot-kinesis/pom.xml @@ -35,7 +35,6 @@ https://pinot.apache.org/ ${basedir}/../../.. - package 1.0.2 0.2.23 @@ -133,4 +132,19 @@ + + + + build-shaded-jar + + + skipShade + !true + + + + package + + + diff --git a/pinot-spi/pom.xml b/pinot-spi/pom.xml index 43ae753a6d8d..baa63f0edc08 100644 --- a/pinot-spi/pom.xml +++ b/pinot-spi/pom.xml @@ -179,4 +179,15 @@ reflections + + + build-shaded-jar + + false + + + package + + + From 8e103205955e8af4fe286ebd6e97b30605724be2 Mon Sep 17 00:00:00 2001 From: Xiaobing <61892277+klsince@users.noreply.github.com> Date: Mon, 22 Apr 2024 16:41:46 -0700 Subject: [PATCH 17/58] handle absent segments so that catchup checker doesn't get stuck on them (#12883) * skip missing segments while checking freshness during server startup * get new consuming segments again if current consuming segments are committed by other servers --- .../starter/helix/BaseServerStarter.java | 71 ++++++---- ...reshnessBasedConsumptionStatusChecker.java | 7 +- ...ngestionBasedConsumptionStatusChecker.java | 128 ++++++++++++------ .../OffsetBasedConsumptionStatusChecker.java | 7 +- .../ConsumptionStatusCheckerTestUtils.java | 38 ++++++ ...nessBasedConsumptionStatusCheckerTest.java | 103 ++++++++++++-- ...fsetBasedConsumptionStatusCheckerTest.java | 32 +++-- 7 files changed, 288 insertions(+), 98 deletions(-) create mode 100644 pinot-server/src/test/java/org/apache/pinot/server/starter/helix/ConsumptionStatusCheckerTestUtils.java diff --git a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/BaseServerStarter.java b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/BaseServerStarter.java index 02c7b81ea5eb..78cd1a14e77d 100644 --- a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/BaseServerStarter.java +++ b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/BaseServerStarter.java @@ -25,6 +25,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; @@ -153,8 +154,8 @@ public void init(PinotConfiguration serverConf) _helixClusterName = _serverConf.getProperty(CommonConstants.Helix.CONFIG_OF_CLUSTER_NAME); ServiceStartableUtils.applyClusterConfig(_serverConf, _zkAddress, _helixClusterName, ServiceRole.SERVER); - PinotInsecureMode.setPinotInInsecureMode( - Boolean.valueOf(_serverConf.getProperty(CommonConstants.CONFIG_OF_PINOT_INSECURE_MODE, + PinotInsecureMode.setPinotInInsecureMode(Boolean.parseBoolean( + _serverConf.getProperty(CommonConstants.CONFIG_OF_PINOT_INSECURE_MODE, CommonConstants.DEFAULT_PINOT_INSECURE_MODE))); setupHelixSystemProperties(); @@ -275,8 +276,7 @@ private void registerServiceStatusHandler() { // collect all resources which have this instance in the ideal state List resourcesToMonitor = new ArrayList<>(); - - Set consumingSegments = new HashSet<>(); + Map> consumingSegments = new HashMap<>(); boolean checkRealtime = realtimeConsumptionCatchupWaitMs > 0; if (isFreshnessStatusCheckerEnabled && realtimeMinFreshnessMs <= 0) { LOGGER.warn("Realtime min freshness {} must be > 0. Setting relatime min freshness to default {}.", @@ -289,23 +289,22 @@ private void registerServiceStatusHandler() { if (!TableNameBuilder.isTableResource(resourceName)) { continue; } - // Only monitor enabled resources IdealState idealState = _helixAdmin.getResourceIdealState(_helixClusterName, resourceName); - if (idealState.isEnabled()) { - - for (String partitionName : idealState.getPartitionSet()) { - if (idealState.getInstanceSet(partitionName).contains(_instanceId)) { - resourcesToMonitor.add(resourceName); - break; - } + if (idealState == null || !idealState.isEnabled()) { + continue; + } + for (String partitionName : idealState.getPartitionSet()) { + if (idealState.getInstanceSet(partitionName).contains(_instanceId)) { + resourcesToMonitor.add(resourceName); + break; } - if (checkRealtime && TableNameBuilder.isRealtimeTableResource(resourceName)) { - for (String partitionName : idealState.getPartitionSet()) { - if (StateModel.SegmentStateModel.CONSUMING.equals( - idealState.getInstanceStateMap(partitionName).get(_instanceId))) { - consumingSegments.add(partitionName); - } + } + if (checkRealtime && TableNameBuilder.isRealtimeTableResource(resourceName)) { + for (String partitionName : idealState.getPartitionSet()) { + if (StateModel.SegmentStateModel.CONSUMING.equals( + idealState.getInstanceStateMap(partitionName).get(_instanceId))) { + consumingSegments.computeIfAbsent(resourceName, k -> new HashSet<>()).add(partitionName); } } } @@ -332,7 +331,7 @@ private void registerServiceStatusHandler() { realtimeMinFreshnessMs, idleTimeoutMs); FreshnessBasedConsumptionStatusChecker freshnessStatusChecker = new FreshnessBasedConsumptionStatusChecker(_serverInstance.getInstanceDataManager(), consumingSegments, - realtimeMinFreshnessMs, idleTimeoutMs); + this::getConsumingSegments, realtimeMinFreshnessMs, idleTimeoutMs); Supplier getNumConsumingSegmentsNotReachedMinFreshness = freshnessStatusChecker::getNumConsumingSegmentsNotReachedIngestionCriteria; serviceStatusCallbackListBuilder.add( @@ -341,7 +340,8 @@ private void registerServiceStatusHandler() { } else if (isOffsetBasedConsumptionStatusCheckerEnabled) { LOGGER.info("Setting up offset based status checker"); OffsetBasedConsumptionStatusChecker consumptionStatusChecker = - new OffsetBasedConsumptionStatusChecker(_serverInstance.getInstanceDataManager(), consumingSegments); + new OffsetBasedConsumptionStatusChecker(_serverInstance.getInstanceDataManager(), consumingSegments, + this::getConsumingSegments); Supplier getNumConsumingSegmentsNotReachedTheirLatestOffset = consumptionStatusChecker::getNumConsumingSegmentsNotReachedIngestionCriteria; serviceStatusCallbackListBuilder.add( @@ -359,6 +359,22 @@ private void registerServiceStatusHandler() { new ServiceStatus.MultipleCallbackServiceStatusCallback(serviceStatusCallbackListBuilder.build())); } + @Nullable + private Set getConsumingSegments(String realtimeTableName) { + IdealState idealState = _helixAdmin.getResourceIdealState(_helixClusterName, realtimeTableName); + if (idealState == null || !idealState.isEnabled()) { + return null; + } + Set consumingSegments = new HashSet<>(); + for (String partitionName : idealState.getPartitionSet()) { + if (StateModel.SegmentStateModel.CONSUMING.equals( + idealState.getInstanceStateMap(partitionName).get(_instanceId))) { + consumingSegments.add(partitionName); + } + } + return consumingSegments; + } + private void updateInstanceConfigIfNeeded(ServerConf serverConf) { InstanceConfig instanceConfig = HelixHelper.getInstanceConfig(_helixManager, _instanceId); @@ -518,12 +534,13 @@ private void startupServiceStatusCheck(long endTimeMs) { } } - boolean exitServerOnIncompleteStartup = _serverConf.getProperty( - Server.CONFIG_OF_EXIT_ON_SERVICE_STATUS_CHECK_FAILURE, - Server.DEFAULT_EXIT_ON_SERVICE_STATUS_CHECK_FAILURE); + boolean exitServerOnIncompleteStartup = + _serverConf.getProperty(Server.CONFIG_OF_EXIT_ON_SERVICE_STATUS_CHECK_FAILURE, + Server.DEFAULT_EXIT_ON_SERVICE_STATUS_CHECK_FAILURE); if (exitServerOnIncompleteStartup) { - String errorMessage = String.format("Service status %s has not turned GOOD within %dms: %s. Exiting server.", - serviceStatus, System.currentTimeMillis() - startTimeMs, ServiceStatus.getStatusDescription()); + String errorMessage = + String.format("Service status %s has not turned GOOD within %dms: %s. Exiting server.", serviceStatus, + System.currentTimeMillis() - startTimeMs, ServiceStatus.getStatusDescription()); throw new IllegalStateException(errorMessage); } LOGGER.warn("Service status has not turned GOOD within {}ms: {}", System.currentTimeMillis() - startTimeMs, @@ -581,8 +598,8 @@ public void start() InstanceDataManager instanceDataManager = _serverInstance.getInstanceDataManager(); instanceDataManager.setSupplierOfIsServerReadyToServeQueries(() -> _isServerReadyToServeQueries); // initialize the thread accountant for query killing - Tracing.ThreadAccountantOps - .initializeThreadAccountant(_serverConf.subset(CommonConstants.PINOT_QUERY_SCHEDULER_PREFIX), _instanceId); + Tracing.ThreadAccountantOps.initializeThreadAccountant( + _serverConf.subset(CommonConstants.PINOT_QUERY_SCHEDULER_PREFIX), _instanceId); initSegmentFetcher(_serverConf); StateModelFactory stateModelFactory = new SegmentOnlineOfflineStateModelFactory(_instanceId, instanceDataManager); diff --git a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/FreshnessBasedConsumptionStatusChecker.java b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/FreshnessBasedConsumptionStatusChecker.java index 6f3610e59623..77eac3832ed5 100644 --- a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/FreshnessBasedConsumptionStatusChecker.java +++ b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/FreshnessBasedConsumptionStatusChecker.java @@ -19,7 +19,9 @@ package org.apache.pinot.server.starter.helix; +import java.util.Map; import java.util.Set; +import java.util.function.Function; import org.apache.pinot.core.data.manager.InstanceDataManager; import org.apache.pinot.core.data.manager.realtime.RealtimeSegmentDataManager; import org.apache.pinot.spi.stream.StreamPartitionMsgOffset; @@ -37,9 +39,10 @@ public class FreshnessBasedConsumptionStatusChecker extends IngestionBasedConsum private final long _minFreshnessMs; private final long _idleTimeoutMs; - public FreshnessBasedConsumptionStatusChecker(InstanceDataManager instanceDataManager, Set consumingSegments, + public FreshnessBasedConsumptionStatusChecker(InstanceDataManager instanceDataManager, + Map> consumingSegments, Function> consumingSegmentsSupplier, long minFreshnessMs, long idleTimeoutMs) { - super(instanceDataManager, consumingSegments); + super(instanceDataManager, consumingSegments, consumingSegmentsSupplier); _minFreshnessMs = minFreshnessMs; _idleTimeoutMs = idleTimeoutMs; } diff --git a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/IngestionBasedConsumptionStatusChecker.java b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/IngestionBasedConsumptionStatusChecker.java index 83de35a63c9d..c6fe0d16d6dc 100644 --- a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/IngestionBasedConsumptionStatusChecker.java +++ b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/IngestionBasedConsumptionStatusChecker.java @@ -19,15 +19,16 @@ package org.apache.pinot.server.starter.helix; +import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; import java.util.Set; -import org.apache.pinot.common.utils.LLCSegmentName; +import java.util.function.Function; import org.apache.pinot.core.data.manager.InstanceDataManager; import org.apache.pinot.core.data.manager.realtime.RealtimeSegmentDataManager; import org.apache.pinot.segment.local.data.manager.SegmentDataManager; import org.apache.pinot.segment.local.data.manager.TableDataManager; -import org.apache.pinot.spi.config.table.TableType; -import org.apache.pinot.spi.utils.builder.TableNameBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,64 +36,103 @@ public abstract class IngestionBasedConsumptionStatusChecker { protected final Logger _logger = LoggerFactory.getLogger(getClass()); - // constructor parameters - protected final InstanceDataManager _instanceDataManager; - protected final Set _consumingSegments; - - // helper variable - private final Set _caughtUpSegments = new HashSet<>(); + private final InstanceDataManager _instanceDataManager; + private final Map> _consumingSegmentsByTable; + private final Map> _caughtUpSegmentsByTable = new HashMap<>(); + private final Function> _consumingSegmentsSupplier; + /** + * Both consumingSegmentsByTable and consumingSegmentsSupplier are provided as it can be costly to get + * consumingSegmentsByTable via the supplier, so only use it when any missing segment is detected. + */ public IngestionBasedConsumptionStatusChecker(InstanceDataManager instanceDataManager, - Set consumingSegments) { + Map> consumingSegmentsByTable, Function> consumingSegmentsSupplier) { _instanceDataManager = instanceDataManager; - _consumingSegments = consumingSegments; + _consumingSegmentsByTable = consumingSegmentsByTable; + _consumingSegmentsSupplier = consumingSegmentsSupplier; } - public int getNumConsumingSegmentsNotReachedIngestionCriteria() { - for (String segName : _consumingSegments) { - if (_caughtUpSegments.contains(segName)) { - continue; - } - TableDataManager tableDataManager = getTableDataManager(segName); + // This might be called by multiple threads, thus synchronized to be correct. + public synchronized int getNumConsumingSegmentsNotReachedIngestionCriteria() { + // If the checker found any consuming segments are missing or committed for a table, it should reset the set of + // consuming segments for the table to continue to monitor the freshness, otherwise the checker might get stuck + // on deleted segments or tables, or miss new consuming segments created in the table and get ready prematurely. + Set tablesToRefresh = new HashSet<>(); + Iterator>> itr = _consumingSegmentsByTable.entrySet().iterator(); + while (itr.hasNext()) { + Map.Entry> tableSegments = itr.next(); + String tableNameWithType = tableSegments.getKey(); + TableDataManager tableDataManager = _instanceDataManager.getTableDataManager(tableNameWithType); if (tableDataManager == null) { - _logger.info("TableDataManager is not yet setup for segment {}. Will check consumption status later", segName); + _logger.info("No tableDataManager for table: {}. Refresh table's consuming segments", tableNameWithType); + tablesToRefresh.add(tableNameWithType); continue; } - SegmentDataManager segmentDataManager = null; - try { - segmentDataManager = tableDataManager.acquireSegment(segName); - if (segmentDataManager == null) { - _logger.info("SegmentDataManager is not yet setup for segment {}. Will check consumption status later", - segName); + Set consumingSegments = tableSegments.getValue(); + Set caughtUpSegments = _caughtUpSegmentsByTable.computeIfAbsent(tableNameWithType, k -> new HashSet<>()); + for (String segName : consumingSegments) { + if (caughtUpSegments.contains(segName)) { continue; } - if (!(segmentDataManager instanceof RealtimeSegmentDataManager)) { - // There's a possibility that a consuming segment has converted to a committed segment. If that's the case, - // segment data manager will not be of type RealtimeSegmentDataManager. - _logger.info("Segment {} is already committed and is considered caught up.", segName); - _caughtUpSegments.add(segName); + SegmentDataManager segmentDataManager = tableDataManager.acquireSegment(segName); + if (segmentDataManager == null) { + _logger.info("No segmentDataManager for segment: {} from table: {}. Refresh table's consuming segments", + segName, tableNameWithType); + tablesToRefresh.add(tableNameWithType); continue; } - - RealtimeSegmentDataManager rtSegmentDataManager = (RealtimeSegmentDataManager) segmentDataManager; - if (isSegmentCaughtUp(segName, rtSegmentDataManager)) { - _caughtUpSegments.add(segName); - } - } finally { - if (segmentDataManager != null) { + try { + if (!(segmentDataManager instanceof RealtimeSegmentDataManager)) { + // It's possible that the consuming segment has been committed by another server. In this case, we should + // get the new consuming segments for the table and continue to monitor their consumption status, until the + // current server catches up the consuming segments. + _logger.info("Segment: {} from table: {} is already committed. Refresh table's consuming segments.", + segName, tableNameWithType); + tablesToRefresh.add(tableNameWithType); + continue; + } + RealtimeSegmentDataManager rtSegmentDataManager = (RealtimeSegmentDataManager) segmentDataManager; + if (isSegmentCaughtUp(segName, rtSegmentDataManager)) { + caughtUpSegments.add(segName); + } + } finally { tableDataManager.releaseSegment(segmentDataManager); } } + int numLaggingSegments = consumingSegments.size() - caughtUpSegments.size(); + if (numLaggingSegments == 0) { + _logger.info("Consuming segments from table: {} have all caught up", tableNameWithType); + itr.remove(); + _caughtUpSegmentsByTable.remove(tableNameWithType); + } + } + if (!tablesToRefresh.isEmpty()) { + for (String tableNameWithType : tablesToRefresh) { + Set updatedConsumingSegments = _consumingSegmentsSupplier.apply(tableNameWithType); + if (updatedConsumingSegments == null || updatedConsumingSegments.isEmpty()) { + _consumingSegmentsByTable.remove(tableNameWithType); + _caughtUpSegmentsByTable.remove(tableNameWithType); + _logger.info("Found no consuming segments from table: {}, which is probably removed", tableNameWithType); + } else { + _consumingSegmentsByTable.put(tableNameWithType, updatedConsumingSegments); + _caughtUpSegmentsByTable.computeIfAbsent(tableNameWithType, k -> new HashSet<>()) + .retainAll(updatedConsumingSegments); + _logger.info( + "Updated consumingSegments: {} and caughtUpSegments: {} for table: {}, as consuming segments were " + + "missing or committed", updatedConsumingSegments, _caughtUpSegmentsByTable.get(tableNameWithType), + tableNameWithType); + } + } } - return _consumingSegments.size() - _caughtUpSegments.size(); + int numLaggingSegments = 0; + for (Map.Entry> tableSegments : _consumingSegmentsByTable.entrySet()) { + String tableNameWithType = tableSegments.getKey(); + Set consumingSegments = tableSegments.getValue(); + Set caughtUpSegments = _caughtUpSegmentsByTable.computeIfAbsent(tableNameWithType, k -> new HashSet<>()); + numLaggingSegments += consumingSegments.size() - caughtUpSegments.size(); + } + return numLaggingSegments; } protected abstract boolean isSegmentCaughtUp(String segmentName, RealtimeSegmentDataManager rtSegmentDataManager); - - private TableDataManager getTableDataManager(String segmentName) { - LLCSegmentName llcSegmentName = new LLCSegmentName(segmentName); - String tableName = llcSegmentName.getTableName(); - String tableNameWithType = TableNameBuilder.forType(TableType.REALTIME).tableNameWithType(tableName); - return _instanceDataManager.getTableDataManager(tableNameWithType); - } } diff --git a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/OffsetBasedConsumptionStatusChecker.java b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/OffsetBasedConsumptionStatusChecker.java index 6b597e3fa2ac..ad7d2905baa1 100644 --- a/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/OffsetBasedConsumptionStatusChecker.java +++ b/pinot-server/src/main/java/org/apache/pinot/server/starter/helix/OffsetBasedConsumptionStatusChecker.java @@ -19,7 +19,9 @@ package org.apache.pinot.server.starter.helix; +import java.util.Map; import java.util.Set; +import java.util.function.Function; import org.apache.pinot.core.data.manager.InstanceDataManager; import org.apache.pinot.core.data.manager.realtime.RealtimeSegmentDataManager; import org.apache.pinot.spi.stream.StreamPartitionMsgOffset; @@ -34,8 +36,9 @@ */ public class OffsetBasedConsumptionStatusChecker extends IngestionBasedConsumptionStatusChecker { - public OffsetBasedConsumptionStatusChecker(InstanceDataManager instanceDataManager, Set consumingSegments) { - super(instanceDataManager, consumingSegments); + public OffsetBasedConsumptionStatusChecker(InstanceDataManager instanceDataManager, + Map> consumingSegments, Function> consumingSegmentsSupplier) { + super(instanceDataManager, consumingSegments, consumingSegmentsSupplier); } @Override diff --git a/pinot-server/src/test/java/org/apache/pinot/server/starter/helix/ConsumptionStatusCheckerTestUtils.java b/pinot-server/src/test/java/org/apache/pinot/server/starter/helix/ConsumptionStatusCheckerTestUtils.java new file mode 100644 index 000000000000..ccd8f6f8558e --- /dev/null +++ b/pinot-server/src/test/java/org/apache/pinot/server/starter/helix/ConsumptionStatusCheckerTestUtils.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.server.starter.helix; + +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; + + +class ConsumptionStatusCheckerTestUtils { + private ConsumptionStatusCheckerTestUtils() { + } + + public static Function> getConsumingSegments(Map> consumingSegments) { + // Create a new Set instance to keep updates separated from the consumingSegments. + return (tableName) -> { + Set updated = consumingSegments.get(tableName); + return updated == null ? null : new HashSet<>(updated); + }; + } +} diff --git a/pinot-server/src/test/java/org/apache/pinot/server/starter/helix/FreshnessBasedConsumptionStatusCheckerTest.java b/pinot-server/src/test/java/org/apache/pinot/server/starter/helix/FreshnessBasedConsumptionStatusCheckerTest.java index 6301b54d0441..e619ba7d707a 100644 --- a/pinot-server/src/test/java/org/apache/pinot/server/starter/helix/FreshnessBasedConsumptionStatusCheckerTest.java +++ b/pinot-server/src/test/java/org/apache/pinot/server/starter/helix/FreshnessBasedConsumptionStatusCheckerTest.java @@ -20,7 +20,11 @@ package org.apache.pinot.server.starter.helix; import com.google.common.collect.ImmutableSet; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; import java.util.Set; +import java.util.function.Function; import org.apache.pinot.core.data.manager.InstanceDataManager; import org.apache.pinot.core.data.manager.offline.ImmutableSegmentDataManager; import org.apache.pinot.core.data.manager.realtime.RealtimeSegmentDataManager; @@ -42,8 +46,9 @@ private class FakeFreshnessBasedConsumptionStatusChecker extends FreshnessBasedC private final long _now; public FakeFreshnessBasedConsumptionStatusChecker(InstanceDataManager instanceDataManager, - Set consumingSegments, long minFreshnessMs, long idleTimeoutMs, long now) { - super(instanceDataManager, consumingSegments, minFreshnessMs, idleTimeoutMs); + Map> consumingSegments, Function> consumingSegmentsSupplier, + long minFreshnessMs, long idleTimeoutMs, long now) { + super(instanceDataManager, consumingSegments, consumingSegmentsSupplier, minFreshnessMs, idleTimeoutMs); _now = now; } @@ -58,10 +63,13 @@ public void regularCaseWithOffsetCatchup() { String segA0 = "tableA__0__0__123Z"; String segA1 = "tableA__1__0__123Z"; String segB0 = "tableB__0__0__123Z"; - Set consumingSegments = ImmutableSet.of(segA0, segA1, segB0); + Map> consumingSegments = new HashMap<>(); + consumingSegments.put("tableA_REALTIME", ImmutableSet.of(segA0, segA1)); + consumingSegments.put("tableB_REALTIME", ImmutableSet.of(segB0)); InstanceDataManager instanceDataManager = mock(InstanceDataManager.class); FreshnessBasedConsumptionStatusChecker statusChecker = - new FreshnessBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, 10000L, 0L); + new FreshnessBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, + ConsumptionStatusCheckerTestUtils.getConsumingSegments(consumingSegments), 10000L, 0L); // TableDataManager is not set up yet assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 3); @@ -119,6 +127,55 @@ public void regularCaseWithOffsetCatchup() { assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 0); } + @Test + public void testWithDroppedTableAndSegment() + throws InterruptedException { + String segA0 = "tableA__0__0__123Z"; + String segA1 = "tableA__1__0__123Z"; + String segB0 = "tableB__0__0__123Z"; + Map> consumingSegments = new HashMap<>(); + consumingSegments.computeIfAbsent("tableA_REALTIME", k -> new HashSet<>()).add(segA0); + consumingSegments.computeIfAbsent("tableA_REALTIME", k -> new HashSet<>()).add(segA1); + consumingSegments.computeIfAbsent("tableB_REALTIME", k -> new HashSet<>()).add(segB0); + InstanceDataManager instanceDataManager = mock(InstanceDataManager.class); + FreshnessBasedConsumptionStatusChecker statusChecker = + new FreshnessBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, + ConsumptionStatusCheckerTestUtils.getConsumingSegments(consumingSegments), 10L, 0L); + + // TableDataManager is not set up yet + assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 3); + + // setup TableDataMangers + TableDataManager tableDataManagerA = mock(TableDataManager.class); + when(instanceDataManager.getTableDataManager("tableA_REALTIME")).thenReturn(tableDataManagerA); + when(instanceDataManager.getTableDataManager("tableB_REALTIME")).thenReturn(null); + + // setup SegmentDataManagers + RealtimeSegmentDataManager segMngrA0 = mock(RealtimeSegmentDataManager.class); + when(tableDataManagerA.acquireSegment(segA0)).thenReturn(segMngrA0); + when(tableDataManagerA.acquireSegment(segA1)).thenReturn(null); + + when(segMngrA0.fetchLatestStreamOffset(5000)).thenReturn(new LongMsgOffset(20)); + when(segMngrA0.getCurrentOffset()).thenReturn(new LongMsgOffset(0)); + // ensure negative values are ignored + setupLatestIngestionTimestamp(segMngrA0, Long.MIN_VALUE); + + // current offset latest stream offset current time last ingestion time + // segA0 0 20 100 Long.MIN_VALUE + // segA1 (segment is absent) + // segB0 (table is absent) + assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 3); + + // updatedConsumingSegments still provide 3 segments to checker but one has caught up. + when(segMngrA0.getCurrentOffset()).thenReturn(new LongMsgOffset(20)); + assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 2); + // Remove the missing segments and check again. + consumingSegments.get("tableA_REALTIME").remove(segA1); + assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 1); + consumingSegments.remove("tableB_REALTIME"); + assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 0); + } + private void setupLatestIngestionTimestamp(RealtimeSegmentDataManager segmentDataManager, long latestIngestionTimestamp) { MutableSegment mockSegment = mock(MutableSegment.class); @@ -133,10 +190,13 @@ public void regularCaseWithFreshnessCatchup() { String segA0 = "tableA__0__0__123Z"; String segA1 = "tableA__1__0__123Z"; String segB0 = "tableB__0__0__123Z"; - Set consumingSegments = ImmutableSet.of(segA0, segA1, segB0); + Map> consumingSegments = new HashMap<>(); + consumingSegments.put("tableA_REALTIME", ImmutableSet.of(segA0, segA1)); + consumingSegments.put("tableB_REALTIME", ImmutableSet.of(segB0)); InstanceDataManager instanceDataManager = mock(InstanceDataManager.class); FreshnessBasedConsumptionStatusChecker statusChecker = - new FakeFreshnessBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, 10L, 0L, 100L); + new FakeFreshnessBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, + ConsumptionStatusCheckerTestUtils.getConsumingSegments(consumingSegments), 10L, 0L, 100L); // TableDataManager is not set up yet assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 3); @@ -195,12 +255,14 @@ public void regularCaseWithIdleTimeout() { String segA0 = "tableA__0__0__123Z"; String segA1 = "tableA__1__0__123Z"; String segB0 = "tableB__0__0__123Z"; - Set consumingSegments = ImmutableSet.of(segA0, segA1, segB0); + Map> consumingSegments = new HashMap<>(); + consumingSegments.put("tableA_REALTIME", ImmutableSet.of(segA0, segA1)); + consumingSegments.put("tableB_REALTIME", ImmutableSet.of(segB0)); InstanceDataManager instanceDataManager = mock(InstanceDataManager.class); long idleTimeoutMs = 10L; FreshnessBasedConsumptionStatusChecker statusChecker = - new FakeFreshnessBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, 10L, idleTimeoutMs, - 100L); + new FakeFreshnessBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, + ConsumptionStatusCheckerTestUtils.getConsumingSegments(consumingSegments), 10L, idleTimeoutMs, 100L); // TableDataManager is not set up yet assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 3); @@ -270,10 +332,13 @@ public void testSegmentsNeverHealthyWhenIdleTimeoutZeroAndNoOtherCriteriaMet() { String segA0 = "tableA__0__0__123Z"; String segA1 = "tableA__1__0__123Z"; String segB0 = "tableB__0__0__123Z"; - Set consumingSegments = ImmutableSet.of(segA0, segA1, segB0); + Map> consumingSegments = new HashMap<>(); + consumingSegments.put("tableA_REALTIME", ImmutableSet.of(segA0, segA1)); + consumingSegments.put("tableB_REALTIME", ImmutableSet.of(segB0)); InstanceDataManager instanceDataManager = mock(InstanceDataManager.class); FreshnessBasedConsumptionStatusChecker statusChecker = - new FakeFreshnessBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, 10L, 0L, 100L); + new FakeFreshnessBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, + ConsumptionStatusCheckerTestUtils.getConsumingSegments(consumingSegments), 10L, 0L, 100L); // TableDataManager is not set up yet assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 3); @@ -319,10 +384,13 @@ public void segmentBeingCommmitted() { String segA0 = "tableA__0__0__123Z"; String segA1 = "tableA__1__0__123Z"; String segB0 = "tableB__0__0__123Z"; - Set consumingSegments = ImmutableSet.of(segA0, segA1, segB0); + Map> consumingSegments = new HashMap<>(); + consumingSegments.put("tableA_REALTIME", ImmutableSet.of(segA0, segA1)); + consumingSegments.put("tableB_REALTIME", ImmutableSet.of(segB0)); InstanceDataManager instanceDataManager = mock(InstanceDataManager.class); FreshnessBasedConsumptionStatusChecker statusChecker = - new FakeFreshnessBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, 10L, 0L, 100L); + new FakeFreshnessBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, + ConsumptionStatusCheckerTestUtils.getConsumingSegments(consumingSegments), 10L, 0L, 100L); // TableDataManager is not set up yet assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 3); @@ -369,6 +437,8 @@ public void segmentBeingCommmitted() { setupLatestIngestionTimestamp(segMngrA0, 90L); // Unexpected case where latest ingested is somehow after current time setupLatestIngestionTimestamp(segMngrA1, 101L); + assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 1); + consumingSegments.get("tableB_REALTIME").remove(segB0); assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 0); } @@ -377,10 +447,13 @@ public void testCannotGetOffsetsOrFreshness() { String segA0 = "tableA__0__0__123Z"; String segA1 = "tableA__1__0__123Z"; String segB0 = "tableB__0__0__123Z"; - Set consumingSegments = ImmutableSet.of(segA0, segA1, segB0); + Map> consumingSegments = new HashMap<>(); + consumingSegments.put("tableA_REALTIME", ImmutableSet.of(segA0, segA1)); + consumingSegments.put("tableB_REALTIME", ImmutableSet.of(segB0)); InstanceDataManager instanceDataManager = mock(InstanceDataManager.class); FreshnessBasedConsumptionStatusChecker statusChecker = - new FakeFreshnessBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, 10L, 0L, 100L); + new FakeFreshnessBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, + ConsumptionStatusCheckerTestUtils.getConsumingSegments(consumingSegments), 10L, 0L, 100L); // TableDataManager is not set up yet assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 3); diff --git a/pinot-server/src/test/java/org/apache/pinot/server/starter/helix/OffsetBasedConsumptionStatusCheckerTest.java b/pinot-server/src/test/java/org/apache/pinot/server/starter/helix/OffsetBasedConsumptionStatusCheckerTest.java index 88b05b8ff003..2248f731d2d7 100644 --- a/pinot-server/src/test/java/org/apache/pinot/server/starter/helix/OffsetBasedConsumptionStatusCheckerTest.java +++ b/pinot-server/src/test/java/org/apache/pinot/server/starter/helix/OffsetBasedConsumptionStatusCheckerTest.java @@ -20,6 +20,8 @@ package org.apache.pinot.server.starter.helix; import com.google.common.collect.ImmutableSet; +import java.util.HashMap; +import java.util.Map; import java.util.Set; import org.apache.pinot.core.data.manager.InstanceDataManager; import org.apache.pinot.core.data.manager.offline.ImmutableSegmentDataManager; @@ -41,10 +43,13 @@ public void regularCase() { String segA0 = "tableA__0__0__123Z"; String segA1 = "tableA__1__0__123Z"; String segB0 = "tableB__0__0__123Z"; - Set consumingSegments = ImmutableSet.of(segA0, segA1, segB0); + Map> consumingSegments = new HashMap<>(); + consumingSegments.put("tableA_REALTIME", ImmutableSet.of(segA0, segA1)); + consumingSegments.put("tableB_REALTIME", ImmutableSet.of(segB0)); InstanceDataManager instanceDataManager = mock(InstanceDataManager.class); OffsetBasedConsumptionStatusChecker statusChecker = - new OffsetBasedConsumptionStatusChecker(instanceDataManager, consumingSegments); + new OffsetBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, + ConsumptionStatusCheckerTestUtils.getConsumingSegments(consumingSegments)); // setup TableDataMangers TableDataManager tableDataManagerA = mock(TableDataManager.class); @@ -88,11 +93,14 @@ public void dataMangersBeingSetup() { String segA0 = "tableA__0__0__123Z"; String segA1 = "tableA__1__0__123Z"; String segB0 = "tableB__0__0__123Z"; - Set consumingSegments = ImmutableSet.of(segA0, segA1, segB0); + Map> consumingSegments = new HashMap<>(); + consumingSegments.put("tableA_REALTIME", ImmutableSet.of(segA0, segA1)); + consumingSegments.put("tableB_REALTIME", ImmutableSet.of(segB0)); InstanceDataManager instanceDataManager = mock(InstanceDataManager.class); OffsetBasedConsumptionStatusChecker statusChecker = - new OffsetBasedConsumptionStatusChecker(instanceDataManager, consumingSegments); + new OffsetBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, + ConsumptionStatusCheckerTestUtils.getConsumingSegments(consumingSegments)); // TableDataManager is not set up yet assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 3); @@ -149,10 +157,13 @@ public void segmentsBeingCommitted() { String segA0 = "tableA__0__0__123Z"; String segA1 = "tableA__1__0__123Z"; String segB0 = "tableB__0__0__123Z"; - Set consumingSegments = ImmutableSet.of(segA0, segA1, segB0); + Map> consumingSegments = new HashMap<>(); + consumingSegments.put("tableA_REALTIME", ImmutableSet.of(segA0, segA1)); + consumingSegments.put("tableB_REALTIME", ImmutableSet.of(segB0)); InstanceDataManager instanceDataManager = mock(InstanceDataManager.class); OffsetBasedConsumptionStatusChecker statusChecker = - new OffsetBasedConsumptionStatusChecker(instanceDataManager, consumingSegments); + new OffsetBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, + ConsumptionStatusCheckerTestUtils.getConsumingSegments(consumingSegments)); // setup TableDataMangers TableDataManager tableDataManagerA = mock(TableDataManager.class); @@ -190,6 +201,8 @@ public void segmentsBeingCommitted() { // segB0 committed at 1200 1500 when(segMngrA0.getCurrentOffset()).thenReturn(new LongMsgOffset(20)); when(segMngrA1.getCurrentOffset()).thenReturn(new LongMsgOffset(200)); + assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 1); + consumingSegments.get("tableB_REALTIME").remove(segB0); assertEquals(statusChecker.getNumConsumingSegmentsNotReachedIngestionCriteria(), 0); } @@ -199,10 +212,13 @@ public void cannotGetLatestStreamOffset() { String segA0 = "tableA__0__0__123Z"; String segA1 = "tableA__1__0__123Z"; String segB0 = "tableB__0__0__123Z"; - Set consumingSegments = ImmutableSet.of(segA0, segA1, segB0); + Map> consumingSegments = new HashMap<>(); + consumingSegments.put("tableA_REALTIME", ImmutableSet.of(segA0, segA1)); + consumingSegments.put("tableB_REALTIME", ImmutableSet.of(segB0)); InstanceDataManager instanceDataManager = mock(InstanceDataManager.class); OffsetBasedConsumptionStatusChecker statusChecker = - new OffsetBasedConsumptionStatusChecker(instanceDataManager, consumingSegments); + new OffsetBasedConsumptionStatusChecker(instanceDataManager, consumingSegments, + ConsumptionStatusCheckerTestUtils.getConsumingSegments(consumingSegments)); // setup TableDataMangers TableDataManager tableDataManagerA = mock(TableDataManager.class); From dd4f0acabd73a752f4fa09986bd8c933c9cd00e0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 23 Apr 2024 10:43:51 -0700 Subject: [PATCH 18/58] Bump org.jline:jline from 3.24.1 to 3.26.0 (#12991) --- pom.xml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index a4a83c8b287a..9f6838ec78d1 100644 --- a/pom.xml +++ b/pom.xml @@ -243,8 +243,7 @@ 1.9.23 3.9.0 2.0.3 - - 3.24.1 + 3.26.0 1.7.0.Final 1.5.4 9.4.54.v20240208 From 40cf5a7ba82fcc4fc10d8b8efe800d3f732c0655 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 23 Apr 2024 10:50:24 -0700 Subject: [PATCH 19/58] Bump aws.sdk.version from 2.25.35 to 2.25.36 (#12990) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 9f6838ec78d1..f9d4f7f328cc 100644 --- a/pom.xml +++ b/pom.xml @@ -172,7 +172,7 @@ 0.15.0 0.4.4 4.2.2 - 2.25.35 + 2.25.36 2.12.7 3.1.12 7.10.1 From 0caeccfc1399087885205e7e796d1ee8037f7867 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 23 Apr 2024 15:12:07 -0700 Subject: [PATCH 20/58] Bump org.webjars:swagger-ui from 5.15.0 to 5.17.0 (#12989) --- .../main/java/org/apache/pinot/spi/utils/CommonConstants.java | 2 +- pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java index bbf3b30342fa..befd5b57633e 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java @@ -51,7 +51,7 @@ private CommonConstants() { "org.apache.pinot.spi.eventlistener.query.NoOpBrokerQueryEventListener"; public static final String SWAGGER_AUTHORIZATION_KEY = "oauth"; - public static final String CONFIG_OF_SWAGGER_RESOURCES_PATH = "META-INF/resources/webjars/swagger-ui/5.15.0/"; + public static final String CONFIG_OF_SWAGGER_RESOURCES_PATH = "META-INF/resources/webjars/swagger-ui/5.17.0/"; public static final String CONFIG_OF_TIMEZONE = "pinot.timezone"; public static final String DATABASE = "database"; diff --git a/pom.xml b/pom.xml index f9d4f7f328cc..b52922f01e7b 100644 --- a/pom.xml +++ b/pom.xml @@ -147,7 +147,7 @@ 2.6.1 3.30.2-GA 1.6.14 - 5.15.0 + 5.17.0 3.3.6 2.9.0 2.5.1 From 36c4b9a86fcab77e96cb1e90b1900efca0e1ce7c Mon Sep 17 00:00:00 2001 From: deemoliu Date: Tue, 23 Apr 2024 15:20:35 -0700 Subject: [PATCH 21/58] Add Prefix, Suffix and Ngram UDFs (#12392) --- .../function/scalar/StringFunctions.java | 108 ++++++++++++++++++ .../function/scalar/StringFunctionsTest.java | 50 ++++++++ 2 files changed, 158 insertions(+) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java index 374917ec9939..31baeb5d2d44 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java @@ -18,6 +18,8 @@ */ package org.apache.pinot.common.function.scalar; +import it.unimi.dsi.fastutil.objects.ObjectLinkedOpenHashSet; +import it.unimi.dsi.fastutil.objects.ObjectSet; import java.io.UnsupportedEncodingException; import java.net.URLDecoder; import java.net.URLEncoder; @@ -28,6 +30,7 @@ import java.util.UUID; import java.util.regex.Matcher; import java.util.regex.Pattern; +import javax.annotation.Nullable; import org.apache.commons.lang3.StringUtils; import org.apache.pinot.common.utils.RegexpPatternConverterUtils; import org.apache.pinot.spi.annotations.ScalarFunction; @@ -580,6 +583,111 @@ public static String[] split(String input, String delimiter, int limit) { return StringUtils.splitByWholeSeparator(input, delimiter, limit); } + /** + * @param input an input string for prefix strings generations. + * @param maxlength the max length of the prefix strings for the string. + * @return generate an array of prefix strings of the string that are shorter than the specified length. + */ + @ScalarFunction + public static String[] prefixes(String input, int maxlength) { + int arrLength = Math.min(maxlength, input.length()); + String[] prefixArr = new String[arrLength]; + for (int prefixIdx = 1; prefixIdx <= arrLength; prefixIdx++) { + prefixArr[prefixIdx - 1] = input.substring(0, prefixIdx); + } + return prefixArr; + } + + /** + * @param input an input string for prefix strings generations. + * @param maxlength the max length of the prefix strings for the string. + * @param prefix the prefix to be prepended to prefix strings generated. e.g. '^' for regex matching + * @return generate an array of prefix matchers of the string that are shorter than the specified length. + */ + @ScalarFunction(nullableParameters = true, names = {"prefixesWithPrefix", "prefixes_with_prefix"}) + public static String[] prefixesWithPrefix(String input, int maxlength, @Nullable String prefix) { + if (prefix == null) { + return prefixes(input, maxlength); + } + int arrLength = Math.min(maxlength, input.length()); + String[] prefixArr = new String[arrLength]; + for (int prefixIdx = 1; prefixIdx <= arrLength; prefixIdx++) { + prefixArr[prefixIdx - 1] = prefix + input.substring(0, prefixIdx); + } + return prefixArr; + } + + /** + * @param input an input string for suffix strings generations. + * @param maxlength the max length of the suffix strings for the string. + * @return generate an array of suffix strings of the string that are shorter than the specified length. + */ + @ScalarFunction + public static String[] suffixes(String input, int maxlength) { + int arrLength = Math.min(maxlength, input.length()); + String[] suffixArr = new String[arrLength]; + for (int suffixIdx = 1; suffixIdx <= arrLength; suffixIdx++) { + suffixArr[suffixIdx - 1] = input.substring(input.length() - suffixIdx); + } + return suffixArr; + } + + /** + * @param input an input string for suffix strings generations. + * @param maxlength the max length of the suffix strings for the string. + * @param suffix the suffix string to be appended for suffix strings generated. e.g. '$' for regex matching. + * @return generate an array of suffix matchers of the string that are shorter than the specified length. + */ + @ScalarFunction(nullableParameters = true, names = {"suffixesWithSuffix", "suffixes_with_suffix"}) + public static String[] suffixesWithSuffix(String input, int maxlength, @Nullable String suffix) { + if (suffix == null) { + return suffixes(input, maxlength); + } + int arrLength = Math.min(maxlength, input.length()); + String[] suffixArr = new String[arrLength]; + for (int suffixIdx = 1; suffixIdx <= arrLength; suffixIdx++) { + suffixArr[suffixIdx - 1] = input.substring(input.length() - suffixIdx) + suffix; + } + return suffixArr; + } + + /** + * @param input an input string for ngram generations. + * @param length the max length of the ngram for the string. + * @return generate an array of unique ngram of the string that length are exactly matching the specified length. + */ + @ScalarFunction + public static String[] uniqueNgrams(String input, int length) { + if (length == 0 || length > input.length()) { + return new String[0]; + } + ObjectSet ngramSet = new ObjectLinkedOpenHashSet<>(); + for (int i = 0; i < input.length() - length + 1; i++) { + ngramSet.add(input.substring(i, i + length)); + } + return ngramSet.toArray(new String[0]); + } + + /** + * @param input an input string for ngram generations. + * @param minGram the min length of the ngram for the string. + * @param maxGram the max length of the ngram for the string. + * @return generate an array of ngram of the string that length are within the specified range [minGram, maxGram]. + */ + @ScalarFunction + public static String[] uniqueNgrams(String input, int minGram, int maxGram) { + ObjectSet ngramSet = new ObjectLinkedOpenHashSet<>(); + for (int n = minGram; n <= maxGram && n <= input.length(); n++) { + if (n == 0) { + continue; + } + for (int i = 0; i < input.length() - n + 1; i++) { + ngramSet.add(input.substring(i, i + n)); + } + } + return ngramSet.toArray(new String[0]); + } + /** * TODO: Revisit if index should be one-based (both Presto and Postgres use one-based index, which starts with 1) * @param input diff --git a/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java b/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java index d75b8ada435d..6c9fa465f54d 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/function/scalar/StringFunctionsTest.java @@ -77,6 +77,41 @@ public static Object[][] isJsonTestCases() { }; } + @DataProvider(name = "prefixAndSuffixTestCases") + public static Object[][] prefixAndSuffixTestCases() { + return new Object[][]{ + {"abcde", 3, new String[]{"a", "ab", "abc"}, new String[]{"e", "de", "cde"}, new String[]{ + "^a", "^ab", "^abc"}, new String[]{"e$", "de$", "cde$"}}, + {"abcde", 0, new String[]{}, new String[]{}, new String[]{}, new String[]{}}, + {"abcde", 9, new String[]{"a", "ab", "abc", "abcd", "abcde"}, new String[]{"e", "de", "cde", "bcde", "abcde"}, + new String[]{"^a", "^ab", "^abc", "^abcd", "^abcde"}, new String[]{"e$", "de$", "cde$", "bcde$", "abcde$"}}, + {"a", 3, new String[]{"a"}, new String[]{"a"}, new String[]{"^a"}, new String[]{"a$"}}, + {"a", 0, new String[]{}, new String[]{}, new String[]{}, new String[]{}}, + {"a", 9, new String[]{"a"}, new String[]{"a"}, new String[]{"^a"}, new String[]{"a$"}}, + {"", 3, new String[]{}, new String[]{}, new String[]{}, new String[]{}}, + {"", 0, new String[]{}, new String[]{}, new String[]{}, new String[]{}}, + {"", 9, new String[]{}, new String[]{}, new String[]{}, new String[]{}} + }; + } + + @DataProvider(name = "ngramTestCases") + public static Object[][] ngramTestCases() { + return new Object[][]{ + {"abcd", 0, 3, new String[]{"abc", "bcd"}, new String[]{"a", "b", "c", "d", "ab", "bc", "cd", "abc", "bcd"}}, + {"abcd", 2, 2, new String[]{"ab", "bc", "cd"}, new String[]{"ab", "bc", "cd"}}, + {"abcd", 3, 0, new String[]{}, new String[]{}}, + {"abc", 0, 3, new String[]{"abc"}, new String[]{"a", "b", "c", "ab", "bc", "abc"}}, + {"abc", 3, 0, new String[]{}, new String[]{}}, + {"abc", 3, 3, new String[]{"abc"}, new String[]{"abc"}}, + {"a", 0, 3, new String[]{}, new String[]{"a"}}, + {"a", 2, 3, new String[]{}, new String[]{}}, + {"a", 3, 3, new String[]{}, new String[]{}}, + {"", 3, 0, new String[]{}, new String[]{}}, + {"", 3, 3, new String[]{}, new String[]{}}, + {"", 0, 3, new String[]{}, new String[]{}} + }; + } + @Test(dataProvider = "isJson") public void testIsJson(String input, boolean expectedValue) { assertEquals(StringFunctions.isJson(input), expectedValue); @@ -88,4 +123,19 @@ public void testSplitPart(String input, String delimiter, int index, int limit, assertEquals(StringFunctions.splitPart(input, delimiter, index), expectedToken); assertEquals(StringFunctions.splitPart(input, delimiter, limit, index), expectedTokenWithLimitCounts); } + + @Test(dataProvider = "prefixAndSuffixTestCases") + public void testPrefixAndSuffix(String input, int length, String[] expectedPrefix, String[] expectedSuffix, + String[] expectedPrefixWithRegexChar, String[] expectedSuffixWithRegexChar) { + assertEquals(StringFunctions.prefixes(input, length), expectedPrefix); + assertEquals(StringFunctions.suffixes(input, length), expectedSuffix); + assertEquals(StringFunctions.prefixesWithPrefix(input, length, "^"), expectedPrefixWithRegexChar); + assertEquals(StringFunctions.suffixesWithSuffix(input, length, "$"), expectedSuffixWithRegexChar); + } + + @Test(dataProvider = "ngramTestCases") + public void testNGram(String input, int minGram, int maxGram, String[] expectedExactNGram, String[] expectedNGram) { + assertEquals(StringFunctions.uniqueNgrams(input, maxGram), expectedExactNGram); + assertEquals(StringFunctions.uniqueNgrams(input, minGram, maxGram), expectedNGram); + } } From bc9e8ee5413c8611fe2be3ed6c3d7073e750d608 Mon Sep 17 00:00:00 2001 From: "Xiaotian (Jackie) Jiang" <17555551+Jackie-Jiang@users.noreply.github.com> Date: Tue, 23 Apr 2024 21:43:12 -0700 Subject: [PATCH 22/58] Upgrade Pulsar to 3.2.2 (#12967) --- .../pinot-pulsar/pom.xml | 134 ++---------------- .../plugin/stream/pulsar/PulsarUtils.java | 27 ++-- .../stream/pulsar/PulsarConsumerTest.java | 2 +- pinot-tools/pom.xml | 38 ----- pom.xml | 31 ++-- 5 files changed, 37 insertions(+), 195 deletions(-) diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml index f742f52f45a8..cb13fb9bba87 100644 --- a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml +++ b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml @@ -37,137 +37,33 @@ package ${basedir}/../../.. - 0.16.0 - 1.63.0 - 1.63.0 - 2.6.2 - 1.17 + 3.2.2 + 1.19.7 - - org.testcontainers - pulsar - 1.19.7 - test - - - org.mockito - mockito-core - test - org.apache.pulsar - pulsar-client-original - - - commons-configuration - commons-configuration - - - org.bouncycastle - bcpkix-jdk15on - - - org.bouncycastle - bcprov-ext-jdk15on - - - org.eclipse.jetty - jetty-util - - + pulsar-client + ${pulsar.version} + org.apache.pulsar - pulsar-client-admin-original - - - org.glassfish.jersey.core - jersey-server - - - org.glassfish.jersey.containers - jersey-container-grizzly2-http - - - org.glassfish.jersey.containers - jersey-container-servlet-core - - - io.netty - netty-resolver - - - io.prometheus - simpleclient_common - ${simpleclient_common.version} - - - com.google.api.grpc - proto-google-common-protos - - - io.grpc - grpc-context - ${grpc-context.version} - - - io.grpc - grpc-protobuf-lite - ${grpc-protobuf-lite.version} - - - io.prometheus - simpleclient - ${simpleclient_common.version} - - - org.eclipse.jetty - jetty-server - - - org.eclipse.jetty - jetty-servlet - - - com.squareup.okio - okio - - - io.prometheus - simpleclient_hotspot - ${simpleclient_common.version} - - - org.codehaus.mojo - animal-sniffer-annotations - ${codehaus-annotations.version} - - - com.github.ben-manes.caffeine - caffeine - ${caffeine.version} - - - io.netty - netty-codec-socks - - - org.bouncycastle - bcpkix-jdk15to18 - - - org.bouncycastle - bcprov-ext-jdk15to18 + pulsar-client-admin + ${pulsar.version} + test - org.bouncycastle - bcprov-jdk15to18 + org.testcontainers + pulsar + ${testcontainers.pulsar.version} + test - org.apache.pinot - pinot-spi + org.mockito + mockito-core + test diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/main/java/org/apache/pinot/plugin/stream/pulsar/PulsarUtils.java b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/main/java/org/apache/pinot/plugin/stream/pulsar/PulsarUtils.java index 0ccacc304704..e1b7b50c21a4 100644 --- a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/main/java/org/apache/pinot/plugin/stream/pulsar/PulsarUtils.java +++ b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/main/java/org/apache/pinot/plugin/stream/pulsar/PulsarUtils.java @@ -22,6 +22,7 @@ import com.google.common.collect.Maps; import java.nio.ByteBuffer; import java.util.Base64; +import java.util.BitSet; import java.util.Map; import java.util.Set; import org.apache.commons.lang3.StringUtils; @@ -31,12 +32,11 @@ import org.apache.pinot.spi.stream.StreamMessageMetadata; import org.apache.pulsar.client.api.Message; import org.apache.pulsar.client.api.MessageId; +import org.apache.pulsar.client.api.MessageIdAdv; import org.apache.pulsar.client.api.Reader; import org.apache.pulsar.client.api.SubscriptionInitialPosition; -import org.apache.pulsar.client.impl.BatchMessageAcker; import org.apache.pulsar.client.impl.BatchMessageIdImpl; import org.apache.pulsar.client.impl.MessageIdImpl; -import org.apache.pulsar.client.internal.DefaultImplementation; public class PulsarUtils { @@ -119,22 +119,21 @@ static StreamMessageMetadata extractMessageMetadata(Message message, Pul * record in the new ledger. */ public static MessageId getNextMessageId(MessageId messageId) { - MessageIdImpl messageIdImpl = MessageIdImpl.convertToMessageIdImpl(messageId); - long ledgerId = messageIdImpl.getLedgerId(); - long entryId = messageIdImpl.getEntryId(); - int partitionIndex = messageIdImpl.getPartitionIndex(); - if (messageIdImpl instanceof BatchMessageIdImpl) { - BatchMessageIdImpl batchMessageIdImpl = (BatchMessageIdImpl) messageIdImpl; - int batchIndex = batchMessageIdImpl.getBatchIndex(); - int batchSize = batchMessageIdImpl.getBatchSize(); - BatchMessageAcker acker = batchMessageIdImpl.getAcker(); + MessageIdAdv messageIdAdv = (MessageIdAdv) messageId; + long ledgerId = messageIdAdv.getLedgerId(); + long entryId = messageIdAdv.getEntryId(); + int partitionIndex = messageIdAdv.getPartitionIndex(); + int batchSize = messageIdAdv.getBatchSize(); + if (batchSize > 0) { + int batchIndex = messageIdAdv.getBatchIndex(); + BitSet ackSet = messageIdAdv.getAckSet(); if (batchIndex < batchSize - 1) { - return new BatchMessageIdImpl(ledgerId, entryId, partitionIndex, batchIndex + 1, batchSize, acker); + return new BatchMessageIdImpl(ledgerId, entryId, partitionIndex, batchIndex + 1, batchSize, ackSet); } else { - return new BatchMessageIdImpl(ledgerId, entryId + 1, partitionIndex, 0, batchSize, acker); + return new BatchMessageIdImpl(ledgerId, entryId + 1, partitionIndex, 0, batchSize, ackSet); } } else { - return DefaultImplementation.getDefaultImplementation().newMessageId(ledgerId, entryId + 1, partitionIndex); + return new MessageIdImpl(ledgerId, entryId + 1, partitionIndex); } } diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/test/java/org/apache/pinot/plugin/stream/pulsar/PulsarConsumerTest.java b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/test/java/org/apache/pinot/plugin/stream/pulsar/PulsarConsumerTest.java index 1baf212f170e..0ee9eb062332 100644 --- a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/test/java/org/apache/pinot/plugin/stream/pulsar/PulsarConsumerTest.java +++ b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/src/test/java/org/apache/pinot/plugin/stream/pulsar/PulsarConsumerTest.java @@ -54,7 +54,7 @@ public class PulsarConsumerTest { - private static final DockerImageName PULSAR_IMAGE = DockerImageName.parse("apachepulsar/pulsar:2.11.4"); + private static final DockerImageName PULSAR_IMAGE = DockerImageName.parse("apachepulsar/pulsar:3.2.2"); public static final String TABLE_NAME_WITH_TYPE = "tableName_REALTIME"; public static final String TEST_TOPIC = "test-topic"; public static final String TEST_TOPIC_BATCH = "test-topic-batch"; diff --git a/pinot-tools/pom.xml b/pinot-tools/pom.xml index 26c466ee0c5c..2e9dcf3b87b8 100644 --- a/pinot-tools/pom.xml +++ b/pinot-tools/pom.xml @@ -146,40 +146,6 @@ pinot-pulsar ${project.version} runtime - - - com.google.errorprone - error_prone_annotations - - - org.codehaus.mojo - animal-sniffer-annotations - - - com.google.api.grpc - proto-google-common-protos - - - org.glassfish.jersey.containers - jersey-container-servlet-core - - - io.grpc - grpc-protobuf-lite - - - io.grpc - grpc-context - - - com.typesafe.netty - netty-reactive-streams - - - com.beust - jcommander - - org.apache.pinot @@ -208,10 +174,6 @@ info.picocli picocli - - io.airlift - aircompressor - org.testng testng diff --git a/pom.xml b/pom.xml index b52922f01e7b..6a75bfd73ecc 100644 --- a/pom.xml +++ b/pom.xml @@ -240,6 +240,7 @@ 1.14.6 + 1.9.23 3.9.0 2.0.3 @@ -249,8 +250,7 @@ 9.4.54.v20240208 9.37.3 1.78 - 0.26 - 2.11.4 + 0.26 @@ -1561,19 +1561,7 @@ ${sslcontext.kickstart.version} - - - org.apache.pulsar - pulsar-client-original - ${pulsar.version} - - - org.apache.pulsar - pulsar-client-admin-original - ${pulsar.version} - - - + org.bouncycastle bcpkix-jdk18on @@ -1586,27 +1574,24 @@ org.bouncycastle - bcpkix-jdk15to18 + bcutil-jdk18on ${bouncycastle.version} org.bouncycastle - bcprov-ext-jdk15to18 - ${bouncycastle.version} - - - org.bouncycastle - bcprov-jdk15to18 + bcprov-ext-jdk18on ${bouncycastle.version} + io.airlift aircompressor - ${airlift.version} + ${aircompressor.version} + clean install From 33b8c88d2976e5be599630873349413bd832299c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 24 Apr 2024 10:19:11 -0700 Subject: [PATCH 23/58] Bump org.apache.maven.plugins:maven-shade-plugin from 3.5.2 to 3.5.3 (#12996) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 6a75bfd73ecc..98dc157f8adc 100644 --- a/pom.xml +++ b/pom.xml @@ -131,7 +131,7 @@ org.apache.pinot.shaded 3.4.1 - 3.5.2 + 3.5.3 none 1.11.3 From cb16cd7c415afb0a26391c406a98aed5124875a1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 24 Apr 2024 10:19:37 -0700 Subject: [PATCH 24/58] Bump com.github.luben:zstd-jni from 1.5.6-2 to 1.5.6-3 (#12999) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 98dc157f8adc..adc9d1f26eff 100644 --- a/pom.xml +++ b/pom.xml @@ -159,7 +159,7 @@ 4.2.25 1.1.10.5 - 1.5.6-2 + 1.5.6-3 1.8.0 0.20.0 2.23.1 From d602ffdad6402ad44bcd198a8aebff381b28a79b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 24 Apr 2024 10:22:37 -0700 Subject: [PATCH 25/58] Bump aws.sdk.version from 2.25.36 to 2.25.37 (#12994) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index adc9d1f26eff..21b221ef2095 100644 --- a/pom.xml +++ b/pom.xml @@ -172,7 +172,7 @@ 0.15.0 0.4.4 4.2.2 - 2.25.36 + 2.25.37 2.12.7 3.1.12 7.10.1 From 5adb02fc33aff219e9a677785fceed5733b28174 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 24 Apr 2024 10:23:03 -0700 Subject: [PATCH 26/58] Bump com.azure:azure-storage-file-datalake from 12.18.3 to 12.18.4 (#12995) --- pinot-plugins/pinot-file-system/pinot-adls/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pinot-plugins/pinot-file-system/pinot-adls/pom.xml b/pinot-plugins/pinot-file-system/pinot-adls/pom.xml index bd5219be623e..f2f31cb65f6a 100644 --- a/pinot-plugins/pinot-file-system/pinot-adls/pom.xml +++ b/pinot-plugins/pinot-file-system/pinot-adls/pom.xml @@ -39,7 +39,7 @@ com.azure azure-storage-file-datalake - 12.18.3 + 12.18.4 com.azure From 73f162005648142031cbfbc7843bb1f53e73d987 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 24 Apr 2024 10:23:25 -0700 Subject: [PATCH 27/58] Bump org.jline:jline from 3.26.0 to 3.26.1 (#12997) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 21b221ef2095..b3367df70bf4 100644 --- a/pom.xml +++ b/pom.xml @@ -244,7 +244,7 @@ 1.9.23 3.9.0 2.0.3 - 3.26.0 + 3.26.1 1.7.0.Final 1.5.4 9.4.54.v20240208 From 2ca6666b7e6951cb9f95eac6ec9f7bae144701f1 Mon Sep 17 00:00:00 2001 From: "Xiaotian (Jackie) Jiang" <17555551+Jackie-Jiang@users.noreply.github.com> Date: Wed, 24 Apr 2024 12:20:50 -0700 Subject: [PATCH 28/58] Pull pulsar version definitaion into root POM (#13002) --- pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml | 1 - pom.xml | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml index cb13fb9bba87..aff1bd4da918 100644 --- a/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml +++ b/pinot-plugins/pinot-stream-ingestion/pinot-pulsar/pom.xml @@ -37,7 +37,6 @@ package ${basedir}/../../.. - 3.2.2 1.19.7 diff --git a/pom.xml b/pom.xml index b3367df70bf4..13f99490d1ce 100644 --- a/pom.xml +++ b/pom.xml @@ -179,6 +179,10 @@ 6.6.2 8.3.4 + 7.6.1 + 3.2.2 + 1.14.6 + 3.14.0 4.4 @@ -232,14 +236,10 @@ 3.25.2 1.61.1 - 7.6.1 - 2.12.18 2.12 - 1.14.6 - 1.9.23 3.9.0 From 099a86cff0ad16a4d1a798efaf1b2118cf8e0cfb Mon Sep 17 00:00:00 2001 From: Rekha Seethamraju Date: Wed, 24 Apr 2024 12:30:36 -0700 Subject: [PATCH 29/58] Add schema as input to the decoder. (#12981) --- .../realtime/RealtimeSegmentDataManager.java | 24 ++++++++- .../spi/stream/StreamDecoderProvider.java | 52 ------------------- .../spi/stream/StreamMessageDecoder.java | 21 ++++++-- 3 files changed, 40 insertions(+), 57 deletions(-) delete mode 100644 pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamDecoderProvider.java diff --git a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeSegmentDataManager.java b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeSegmentDataManager.java index 6771e038d14d..3290c5e4f3c0 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeSegmentDataManager.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeSegmentDataManager.java @@ -75,6 +75,7 @@ import org.apache.pinot.spi.data.Schema; import org.apache.pinot.spi.data.readers.GenericRow; import org.apache.pinot.spi.metrics.PinotMeter; +import org.apache.pinot.spi.plugin.PluginManager; import org.apache.pinot.spi.recordenricher.RecordEnricherPipeline; import org.apache.pinot.spi.stream.ConsumerPartitionState; import org.apache.pinot.spi.stream.LongMsgOffset; @@ -91,7 +92,6 @@ import org.apache.pinot.spi.stream.StreamDataDecoder; import org.apache.pinot.spi.stream.StreamDataDecoderImpl; import org.apache.pinot.spi.stream.StreamDataDecoderResult; -import org.apache.pinot.spi.stream.StreamDecoderProvider; import org.apache.pinot.spi.stream.StreamMessage; import org.apache.pinot.spi.stream.StreamMessageDecoder; import org.apache.pinot.spi.stream.StreamMessageMetadata; @@ -1505,7 +1505,7 @@ public RealtimeSegmentDataManager(SegmentZKMetadata segmentZKMetadata, TableConf // Create message decoder Set fieldsToRead = IngestionUtils.getFieldsForRecordExtractor(_tableConfig.getIngestionConfig(), _schema); try { - StreamMessageDecoder streamMessageDecoder = StreamDecoderProvider.create(_streamConfig, fieldsToRead); + StreamMessageDecoder streamMessageDecoder = createMessageDecoder(fieldsToRead); _streamDataDecoder = new StreamDataDecoderImpl(streamMessageDecoder); } catch (Exception e) { _realtimeTableDataManager.addSegmentError(_segmentNameStr, @@ -1780,6 +1780,26 @@ private void updateCurrentDocumentCountMetrics() { } } + /** + * Creates a {@link StreamMessageDecoder} using properties in {@link StreamConfig}. + * + * @param streamConfig The stream config from the table config + * @param fieldsToRead The fields to read from the source stream + * @return The initialized StreamMessageDecoder + */ + private StreamMessageDecoder createMessageDecoder(Set fieldsToRead) { + String decoderClass = _streamConfig.getDecoderClass(); + try { + Map decoderProperties = _streamConfig.getDecoderProperties(); + StreamMessageDecoder decoder = PluginManager.get().createInstance(decoderClass); + decoder.init(fieldsToRead, _streamConfig, _tableConfig, _schema); + return decoder; + } catch (Exception e) { + throw new RuntimeException( + "Caught exception while creating StreamMessageDecoder from stream config: " + _streamConfig, e); + } + } + @Override public MutableSegment getSegment() { return _realtimeSegment; diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamDecoderProvider.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamDecoderProvider.java deleted file mode 100644 index fdb97093de87..000000000000 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamDecoderProvider.java +++ /dev/null @@ -1,52 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.pinot.spi.stream; - -import java.util.Map; -import java.util.Set; -import org.apache.pinot.spi.plugin.PluginManager; - - -/** - * Provider for {@link StreamMessageDecoder} - */ -public class StreamDecoderProvider { - private StreamDecoderProvider() { - } - - /** - * Creates a {@link StreamMessageDecoder} using properties in {@link StreamConfig}. - * - * @param streamConfig The stream config from the table config - * @param fieldsToRead The fields to read from the source stream - * @return The initialized StreamMessageDecoder - */ - public static StreamMessageDecoder create(StreamConfig streamConfig, Set fieldsToRead) { - String decoderClass = streamConfig.getDecoderClass(); - Map decoderProperties = streamConfig.getDecoderProperties(); - try { - StreamMessageDecoder decoder = PluginManager.get().createInstance(decoderClass); - decoder.init(decoderProperties, fieldsToRead, streamConfig.getTopicName()); - return decoder; - } catch (Exception e) { - throw new RuntimeException( - "Caught exception while creating StreamMessageDecoder from stream config: " + streamConfig, e); - } - } -} diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMessageDecoder.java b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMessageDecoder.java index 89312f06b613..b736e975d1b2 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMessageDecoder.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/stream/StreamMessageDecoder.java @@ -23,9 +23,10 @@ import javax.annotation.Nullable; import org.apache.pinot.spi.annotations.InterfaceAudience; import org.apache.pinot.spi.annotations.InterfaceStability; +import org.apache.pinot.spi.config.table.TableConfig; +import org.apache.pinot.spi.data.Schema; import org.apache.pinot.spi.data.readers.GenericRow; - /** * Interface for a decoder of messages fetched from the stream * @param @@ -46,8 +47,22 @@ public interface StreamMessageDecoder { * @param topicName Topic name of the stream * @throws Exception If an error occurs */ - void init(Map props, Set fieldsToRead, String topicName) - throws Exception; + default void init(Map props, Set fieldsToRead, String topicName) + throws Exception { + throw new UnsupportedOperationException("init method not implemented"); + } + + /** + * Initializes the decoder. + * @param streamConfig Can be derived from tableConfig but is passed explicitly to avoid redundant computation + * @param tableConfig Table Config of the table + * @param schema Schema of the table + * @throws Exception + */ + default void init(Set fieldsToRead, StreamConfig streamConfig, TableConfig tableConfig, Schema schema) + throws Exception { + init(streamConfig.getDecoderProperties(), fieldsToRead, streamConfig.getTopicName()); + } /** * Decodes a row. From 99a41803305a39887805e1106f90d07e4b6af978 Mon Sep 17 00:00:00 2001 From: Xuanyi Li Date: Wed, 24 Apr 2024 17:03:25 -0700 Subject: [PATCH 30/58] avoid useless intermediate byte array allocation for VarChunkV4Reader's getStringMV (#12978) --- .../VarByteChunkForwardIndexReaderV4.java | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/VarByteChunkForwardIndexReaderV4.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/VarByteChunkForwardIndexReaderV4.java index f0a3658cb3bf..a7fadab8c356 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/VarByteChunkForwardIndexReaderV4.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/forward/VarByteChunkForwardIndexReaderV4.java @@ -207,29 +207,29 @@ public double[] getDoubleMV(int docId, VarByteChunkForwardIndexReaderV4.ReaderCo @Override public int getStringMV(int docId, String[] valueBuffer, VarByteChunkForwardIndexReaderV4.ReaderContext context) { - ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + byte[] bytes = context.getValue(docId); + ByteBuffer byteBuffer = ByteBuffer.wrap(bytes); int numValues = byteBuffer.getInt(); - byteBuffer.position((numValues + 1) * Integer.BYTES); + int offset = (numValues + 1) * Integer.BYTES; for (int i = 0; i < numValues; i++) { int length = byteBuffer.getInt((i + 1) * Integer.BYTES); - byte[] bytes = new byte[length]; - byteBuffer.get(bytes); - valueBuffer[i] = new String(bytes, StandardCharsets.UTF_8); + valueBuffer[i] = new String(bytes, offset, length, StandardCharsets.UTF_8); + offset += length; } return numValues; } @Override public String[] getStringMV(int docId, VarByteChunkForwardIndexReaderV4.ReaderContext context) { - ByteBuffer byteBuffer = ByteBuffer.wrap(context.getValue(docId)); + byte[] bytes = context.getValue(docId); + ByteBuffer byteBuffer = ByteBuffer.wrap(bytes); int numValues = byteBuffer.getInt(); - byteBuffer.position((numValues + 1) * Integer.BYTES); + int offset = (numValues + 1) * Integer.BYTES; String[] valueBuffer = new String[numValues]; for (int i = 0; i < numValues; i++) { int length = byteBuffer.getInt((i + 1) * Integer.BYTES); - byte[] bytes = new byte[length]; - byteBuffer.get(bytes); - valueBuffer[i] = new String(bytes, StandardCharsets.UTF_8); + valueBuffer[i] = new String(bytes, offset, length, StandardCharsets.UTF_8); + offset += length; } return valueBuffer; } From 49da7985806d31d7cdf63a76b888c74cd0bc816b Mon Sep 17 00:00:00 2001 From: Abhishek Sharma Date: Thu, 25 Apr 2024 12:41:19 -0400 Subject: [PATCH 31/58] Upgrade scala maven plugin to 4.9.0 (#13007) --- pom.xml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pom.xml b/pom.xml index 13f99490d1ce..f353f4712a79 100644 --- a/pom.xml +++ b/pom.xml @@ -1884,7 +1884,7 @@ net.alchim31.maven scala-maven-plugin - 3.2.2 + 4.9.0 add-source @@ -1920,6 +1920,9 @@ ${scala.version} + ${jdk.version} + ${jdk.version} + ${jdk.version} -unchecked -deprecation @@ -1930,10 +1933,6 @@ -Xmx1024m - -source - ${jdk.version} - -target - ${jdk.version} -Xlint:all,-serial,-path From 3f0b748e140c1f178c9fef16de4d9bf6f64d6b74 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 25 Apr 2024 09:44:47 -0700 Subject: [PATCH 32/58] Bump aws.sdk.version from 2.25.37 to 2.25.38 (#13006) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index f353f4712a79..d40fe84eb0e3 100644 --- a/pom.xml +++ b/pom.xml @@ -172,7 +172,7 @@ 0.15.0 0.4.4 4.2.2 - 2.25.37 + 2.25.38 2.12.7 3.1.12 7.10.1 From 84a4c70327fcdb7078662d9e697610809d29df1c Mon Sep 17 00:00:00 2001 From: Yash Mayya Date: Fri, 26 Apr 2024 03:46:59 +0530 Subject: [PATCH 33/58] Re-enable the Spotless plugin for Java 21 (#12992) --- pinot-common/pom.xml | 38 +++++++++++++------------------------- pom.xml | 19 ++++--------------- 2 files changed, 17 insertions(+), 40 deletions(-) diff --git a/pinot-common/pom.xml b/pinot-common/pom.xml index 2381f024d46b..32cd2eb9dbce 100644 --- a/pinot-common/pom.xml +++ b/pinot-common/pom.xml @@ -62,6 +62,19 @@ protobuf-maven-plugin + + com.diffplug.spotless + spotless-maven-plugin + + + + src/main/java/org/apache/pinot/common/request/*.java + src/main/java/org/apache/pinot/common/response/ProcessingException.java + + + + + @@ -129,31 +142,6 @@ - - - - - com.diffplug.spotless - spotless-maven-plugin - - - - src/main/java/**/*.java - src/test/java/**/*.java - - - src/main/java/org/apache/pinot/common/request/*.java - src/main/java/org/apache/pinot/common/response/ProcessingException.java - - - ,\# - - - - - - - diff --git a/pom.xml b/pom.xml index d40fe84eb0e3..978320c58331 100644 --- a/pom.xml +++ b/pom.xml @@ -254,20 +254,6 @@ - - not-java-21 - - !21 - - - - - com.diffplug.spotless - spotless-maven-plugin - - - - github-actions @@ -1609,7 +1595,6 @@ 2.43.0 - verify check @@ -2063,6 +2048,10 @@ sonar-maven-plugin 2.7.1 + + com.diffplug.spotless + spotless-maven-plugin + com.mycila license-maven-plugin From fc98ce1d53710d333ca652304cd0f1c1f1fa8e1b Mon Sep 17 00:00:00 2001 From: Gonzalo Ortiz Jaureguizar Date: Fri, 26 Apr 2024 00:20:02 +0200 Subject: [PATCH 34/58] Use ArrayList instead of LinkedList in SortOperator (#12783) --- .../query/runtime/operator/SortOperator.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/SortOperator.java b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/SortOperator.java index ce4ddf130f91..b0a1923c808b 100644 --- a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/SortOperator.java +++ b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/operator/SortOperator.java @@ -21,7 +21,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; import java.util.ArrayList; -import java.util.LinkedList; +import java.util.Arrays; import java.util.List; import java.util.PriorityQueue; import javax.annotation.Nullable; @@ -129,16 +129,16 @@ private TransferableBlock produceSortedBlock() { return TransferableBlockUtils.getEndOfStreamTransferableBlock(); } } else { - LinkedList rows = new LinkedList<>(); - while (_priorityQueue.size() > _offset) { - Object[] row = _priorityQueue.poll(); - rows.addFirst(row); - } - if (rows.size() == 0) { + int resultSize = _priorityQueue.size() - _offset; + if (resultSize <= 0) { return TransferableBlockUtils.getEndOfStreamTransferableBlock(); - } else { - return new TransferableBlock(rows, _dataSchema, DataBlock.Type.ROW); } + Object[][] rowsArr = new Object[resultSize][]; + for (int i = resultSize - 1; i >= 0; i--) { + Object[] row = _priorityQueue.poll(); + rowsArr[i] = row; + } + return new TransferableBlock(Arrays.asList(rowsArr), _dataSchema, DataBlock.Type.ROW); } } From 97a2e6d95eb87c67405cd661371a51acc478adec Mon Sep 17 00:00:00 2001 From: Christopher Peck <27231838+itschrispeck@users.noreply.github.com> Date: Thu, 25 Apr 2024 16:14:26 -0700 Subject: [PATCH 35/58] fix TextMatchFilterOperator boolean grouping (#13009) --- .../filter/TextMatchFilterOptimizer.java | 2 +- .../query/optimizer/QueryOptimizerTest.java | 29 ++++++++++--------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TextMatchFilterOptimizer.java b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TextMatchFilterOptimizer.java index aca4e2d5ccf0..8c742cfc98a4 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TextMatchFilterOptimizer.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TextMatchFilterOptimizer.java @@ -162,7 +162,7 @@ private Expression getNewFilter(String operator, List newChildren, mergedTextMatchFilter = String.join(SPACE + operator + SPACE, literals); } Expression mergedTextMatchExpression = RequestUtils.getFunctionExpression(FilterKind.TEXT_MATCH.name()); - Expression mergedTextMatchFilterExpression = RequestUtils.getLiteralExpression(mergedTextMatchFilter); + Expression mergedTextMatchFilterExpression = RequestUtils.getLiteralExpression("(" + mergedTextMatchFilter + ")"); mergedTextMatchExpression.getFunctionCall() .setOperands(Arrays.asList(entry.getKey(), mergedTextMatchFilterExpression)); diff --git a/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/QueryOptimizerTest.java b/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/QueryOptimizerTest.java index 337075f9746c..848f458742d2 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/QueryOptimizerTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/QueryOptimizerTest.java @@ -178,7 +178,7 @@ public void testMergeTextMatchFilter() { List operands = filterFunction.getOperands(); assertEquals(operands.size(), 2); assertEquals(operands.get(0), RequestUtils.getIdentifierExpression("string")); - assertEquals(operands.get(1), RequestUtils.getLiteralExpression("foo AND bar OR baz")); + assertEquals(operands.get(1), RequestUtils.getLiteralExpression("((foo AND bar) OR baz)")); } private static Expression getRangeFilterExpression(String column, String rangeString) { @@ -268,32 +268,35 @@ public void testQueries() { // TextMatchFilterOptimizer testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo') AND TEXT_MATCH(string, 'bar')", - "SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo AND bar')"); + "SELECT * FROM testTable WHERE TEXT_MATCH(string, '(foo AND bar)')"); testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string, '\"foo bar\"') AND TEXT_MATCH(string, 'baz')", - "SELECT * FROM testTable WHERE TEXT_MATCH(string, '\"foo bar\" AND baz')"); + "SELECT * FROM testTable WHERE TEXT_MATCH(string, '(\"foo bar\" AND baz)')"); testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string, '\"foo bar\"') AND TEXT_MATCH(string, '/.*ooba.*/')", - "SELECT * FROM testTable WHERE TEXT_MATCH(string, '\"foo bar\" AND /.*ooba.*/')"); + "SELECT * FROM testTable WHERE TEXT_MATCH(string, '(\"foo bar\" AND /.*ooba.*/)')"); testQuery("SELECT * FROM testTable WHERE int = 1 AND TEXT_MATCH(string, 'foo') AND TEXT_MATCH(string, 'bar')", - "SELECT * FROM testTable WHERE int = 1 AND TEXT_MATCH(string, 'foo AND bar')"); + "SELECT * FROM testTable WHERE int = 1 AND TEXT_MATCH(string, '(foo AND bar)')"); testQuery("SELECT * FROM testTable WHERE int = 1 OR TEXT_MATCH(string, 'foo') AND TEXT_MATCH(string, 'bar')", - "SELECT * FROM testTable WHERE int = 1 OR TEXT_MATCH(string, 'foo AND bar')"); + "SELECT * FROM testTable WHERE int = 1 OR TEXT_MATCH(string, '(foo AND bar)')"); testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo') AND NOT TEXT_MATCH(string, 'bar')", - "SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo AND NOT bar')"); + "SELECT * FROM testTable WHERE TEXT_MATCH(string, '(foo AND NOT bar)')"); testQuery("SELECT * FROM testTable WHERE NOT TEXT_MATCH(string, 'foo') AND TEXT_MATCH(string, 'bar')", - "SELECT * FROM testTable WHERE TEXT_MATCH(string, 'NOT foo AND bar')"); + "SELECT * FROM testTable WHERE TEXT_MATCH(string, '(NOT foo AND bar)')"); testQuery("SELECT * FROM testTable WHERE NOT TEXT_MATCH(string, 'foo') AND NOT TEXT_MATCH(string, 'bar')", - "SELECT * FROM testTable WHERE NOT TEXT_MATCH(string, 'foo OR bar')"); + "SELECT * FROM testTable WHERE NOT TEXT_MATCH(string, '(foo OR bar)')"); testQuery("SELECT * FROM testTable WHERE NOT TEXT_MATCH(string, 'foo') OR NOT TEXT_MATCH(string, 'bar')", - "SELECT * FROM testTable WHERE NOT TEXT_MATCH(string, 'foo AND bar')"); + "SELECT * FROM testTable WHERE NOT TEXT_MATCH(string, '(foo AND bar)')"); testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo') AND TEXT_MATCH(string, 'bar') OR " - + "TEXT_MATCH(string, 'baz')", "SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo AND bar OR baz')"); + + "TEXT_MATCH(string, 'baz')", "SELECT * FROM testTable WHERE TEXT_MATCH(string, '((foo AND bar) OR baz)')"); + testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string, 'foo') AND (TEXT_MATCH(string, 'bar') OR " + + "TEXT_MATCH(string, 'baz'))", "SELECT * FROM testTable WHERE TEXT_MATCH(string, '(foo AND (bar OR baz))')"); testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string1, 'foo1') AND TEXT_MATCH(string1, 'bar1') OR " + "TEXT_MATCH(string1, 'baz1') AND TEXT_MATCH(string2, 'foo')", - "SELECT * FROM testTable WHERE TEXT_MATCH(string1, 'foo1 AND bar1') OR TEXT_MATCH(string1, 'baz1') AND " + "SELECT * FROM testTable WHERE TEXT_MATCH(string1, '(foo1 AND bar1)') OR TEXT_MATCH(string1, 'baz1') AND " + "TEXT_MATCH(string2, 'foo')"); testQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string1, 'foo1') AND TEXT_MATCH(string1, 'bar1')" + "AND TEXT_MATCH(string2, 'foo2') AND TEXT_MATCH(string2, 'bar2')", - "SELECT * FROM testTable WHERE TEXT_MATCH(string1, 'foo1 AND bar1') AND TEXT_MATCH(string2, 'foo2 AND bar2')"); + "SELECT * FROM testTable WHERE TEXT_MATCH(string1, '(foo1 AND bar1)') AND TEXT_MATCH(string2, '(foo2 AND " + + "bar2)')"); testCannotOptimizeQuery("SELECT * FROM testTable WHERE TEXT_MATCH(string1, 'foo') OR TEXT_MATCH(string2, 'bar')"); testCannotOptimizeQuery( "SELECT * FROM testTable WHERE int = 1 AND TEXT_MATCH(string, 'foo') OR TEXT_MATCH(string, 'bar')"); From 2fb30c0c0f74b3a7108f2a5749b1b332156b4fc3 Mon Sep 17 00:00:00 2001 From: Gonzalo Ortiz Jaureguizar Date: Fri, 26 Apr 2024 01:15:42 +0200 Subject: [PATCH 36/58] Add some multi-stage metrics (#12982) --- .../MultiStageBrokerRequestHandler.java | 6 ++++++ .../pinot/common/metrics/BrokerMeter.java | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/MultiStageBrokerRequestHandler.java b/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/MultiStageBrokerRequestHandler.java index 01e4884d6a08..01bfe456b5b1 100644 --- a/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/MultiStageBrokerRequestHandler.java +++ b/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/MultiStageBrokerRequestHandler.java @@ -170,6 +170,12 @@ protected BrokerResponse handleRequest(long requestId, String query, @Nullable S DispatchableSubPlan dispatchableSubPlan = queryPlanResult.getQueryPlan(); Set tableNames = queryPlanResult.getTableNames(); + + _brokerMetrics.addMeteredGlobalValue(BrokerMeter.MULTI_STAGE_QUERIES_GLOBAL, 1); + for (String tableName : tableNames) { + _brokerMetrics.addMeteredTableValue(tableName, BrokerMeter.MULTI_STAGE_QUERIES, 1); + } + requestContext.setTableNames(List.copyOf(tableNames)); // Compilation Time. This includes the time taken for parsing, compiling, create stage plans and assigning workers. diff --git a/pinot-common/src/main/java/org/apache/pinot/common/metrics/BrokerMeter.java b/pinot-common/src/main/java/org/apache/pinot/common/metrics/BrokerMeter.java index bb76591ab003..006ee458ebb6 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/metrics/BrokerMeter.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/metrics/BrokerMeter.java @@ -29,7 +29,26 @@ public enum BrokerMeter implements AbstractMetrics.Meter { UNCAUGHT_POST_EXCEPTIONS("exceptions", true), HEALTHCHECK_BAD_CALLS("healthcheck", true), HEALTHCHECK_OK_CALLS("healthcheck", true), + /** + * Number of queries executed. + *

+ * At this moment this counter does not include queries executed in multi-stage mode. + */ QUERIES("queries", false), + /** + * Number of multi-stage queries that have been started. + *

+ * Unlike {@link #MULTI_STAGE_QUERIES}, this metric is global and not attached to a particular table. + * That means it can be used to know how many multi-stage queries have been started in total. + */ + MULTI_STAGE_QUERIES_GLOBAL("queries", true), + /** + * Number of multi-stage queries that have been started touched a given table. + *

+ * In case the query touch multiple tables (ie using joins)1, this metric will be incremented for each table, so the + * sum of this metric across all tables should be greater or equal than {@link #MULTI_STAGE_QUERIES_GLOBAL}. + */ + MULTI_STAGE_QUERIES("queries", false), // These metrics track the exceptions caught during query execution in broker side. // Query rejected by Jersey thread pool executor From 571214daf14780cb4b832e142b2c65a395d86a0a Mon Sep 17 00:00:00 2001 From: Shounak kulkarni Date: Fri, 26 Apr 2024 11:44:13 +0500 Subject: [PATCH 37/58] Metric for count of tables configured with various tier backends (#12940) * Metric for count of tables using various tier backends * avoid multi tiered double counting * remove unused import * Handle metrics deletion * minor * Metric for count of tables using various tier backends * avoid multi tiered double counting * remove unused import * Handle metrics deletion * minor * reformat metric naming * Revert "Merge branch 'tier-backend-metric' of https://github.com/shounakmk219/pinot into tier-backend-metric" This reverts commit 9646c83517db2afd1a0a95805ea0c73f668b2a8e, reversing changes made to a3f3d10eb2e8f42ecef56eff9f345fd34292e61c. --- .../configs/controller.yml | 3 ++ .../pinot/common/metrics/AbstractMetrics.java | 4 +++ .../pinot/common/metrics/ControllerGauge.java | 1 + .../helix/SegmentStatusChecker.java | 28 +++++++++++++++++++ 4 files changed, 36 insertions(+) diff --git a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml index a441f714e4a6..a036a130533e 100644 --- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml +++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml @@ -85,6 +85,9 @@ rules: - pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_offlineTableCount_$1" cache: true +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" + name: "pinot_controller_tierBackendTableCount_$1_$2" + cache: true - pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_validateion_$4_$5" cache: true diff --git a/pinot-common/src/main/java/org/apache/pinot/common/metrics/AbstractMetrics.java b/pinot-common/src/main/java/org/apache/pinot/common/metrics/AbstractMetrics.java index ee13493e15db..dfdc2abb0f29 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/metrics/AbstractMetrics.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/metrics/AbstractMetrics.java @@ -766,6 +766,10 @@ private String composeTableGaugeName(final String tableName, final String key, f return gauge.getGaugeName() + "." + getTableName(tableName) + "." + key; } + public String composePluginGaugeName(String pluginName, Gauge gauge) { + return gauge.getGaugeName() + "." + pluginName; + } + /** * Remove gauge from Pinot metrics. * @param gaugeName gauge name diff --git a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java index 938242ef7885..82c4e666e16a 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java @@ -65,6 +65,7 @@ public enum ControllerGauge implements AbstractMetrics.Gauge { NUM_MINION_SUBTASKS_ERROR("NumMinionSubtasksError", true), PERCENT_MINION_SUBTASKS_IN_QUEUE("PercentMinionSubtasksInQueue", true), PERCENT_MINION_SUBTASKS_IN_ERROR("PercentMinionSubtasksInError", true), + TIER_BACKEND_TABLE_COUNT("TierBackendTableCount", true), // Pinot controller leader PINOT_CONTROLLER_LEADER("PinotControllerLeader", true), diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java index fa265c4f52f7..4bcdc2f51649 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java @@ -19,6 +19,7 @@ package org.apache.pinot.controller.helix; import com.google.common.annotations.VisibleForTesting; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -50,6 +51,7 @@ import org.apache.pinot.controller.util.TableSizeReader; import org.apache.pinot.spi.config.table.TableConfig; import org.apache.pinot.spi.config.table.TableType; +import org.apache.pinot.spi.config.table.TierConfig; import org.apache.pinot.spi.stream.StreamConfig; import org.apache.pinot.spi.utils.IngestionConfigUtils; import org.apache.pinot.spi.utils.builder.TableNameBuilder; @@ -78,6 +80,7 @@ public class SegmentStatusChecker extends ControllerPeriodicTask _tierBackendGauges = new HashSet<>(); /** * Constructs the segment status checker. @@ -135,6 +138,17 @@ protected void postprocess(Context context) { _controllerMetrics.setValueOfGlobalGauge(ControllerGauge.UPSERT_TABLE_COUNT, context._upsertTableCount); _controllerMetrics.setValueOfGlobalGauge(ControllerGauge.DISABLED_TABLE_COUNT, context._disabledTables.size()); + _tierBackendGauges.forEach(_controllerMetrics::removeGauge); + // metric for total number of tables using a particular tier backend + context._tierBackendTableCountMap.forEach((tier, count) -> { + String gaugeName = _controllerMetrics.composePluginGaugeName(tier, ControllerGauge.TIER_BACKEND_TABLE_COUNT); + _tierBackendGauges.add(gaugeName); + _controllerMetrics.setOrUpdateGauge(gaugeName, count); + }); + // metric for total number of tables having tier backend configured + _controllerMetrics.setOrUpdateGauge(ControllerGauge.TIER_BACKEND_TABLE_COUNT.getGaugeName(), + context._tierBackendConfiguredTableCount); + //emit a 0 for tables that are not paused/disabled. This makes alert expressions simpler as we don't have to deal // with missing metrics context._processedTables.forEach(tableNameWithType -> { @@ -171,6 +185,18 @@ private void updateTableConfigMetrics(String tableNameWithType, TableConfig tabl if (tableConfig.isUpsertEnabled()) { context._upsertTableCount++; } + List tierConfigList = tableConfig.getTierConfigsList(); + if (tierConfigList != null && !tierConfigList.isEmpty()) { + Set tierBackendSet = new HashSet<>(tierConfigList.size()); + for (TierConfig config : tierConfigList) { + if (config.getTierBackend() != null) { + tierBackendSet.add(config.getTierBackend()); + } + } + tierBackendSet.forEach(tierBackend -> context._tierBackendTableCountMap.put(tierBackend, + context._tierBackendTableCountMap.getOrDefault(tierBackend, 0) + 1)); + context._tierBackendConfiguredTableCount += tierBackendSet.isEmpty() ? 0 : 1; + } int replication = tableConfig.getReplication(); _controllerMetrics.setValueOfTableGauge(tableNameWithType, ControllerGauge.REPLICATION_FROM_CONFIG, replication); } @@ -391,6 +417,8 @@ public static final class Context { private int _realTimeTableCount; private int _offlineTableCount; private int _upsertTableCount; + private int _tierBackendConfiguredTableCount; + private Map _tierBackendTableCountMap = new HashMap<>(); private Set _processedTables = new HashSet<>(); private Set _disabledTables = new HashSet<>(); private Set _pausedTables = new HashSet<>(); From e9cba4991ad4542f1c5a62c197533ffbea1e48bb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 26 Apr 2024 10:16:48 -0700 Subject: [PATCH 38/58] Bump aws.sdk.version from 2.25.38 to 2.25.39 (#13012) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 978320c58331..8a9e80ae6e45 100644 --- a/pom.xml +++ b/pom.xml @@ -172,7 +172,7 @@ 0.15.0 0.4.4 4.2.2 - 2.25.38 + 2.25.39 2.12.7 3.1.12 7.10.1 From cb687834c18d39fc7e59025188ceb68634b56789 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 26 Apr 2024 10:17:07 -0700 Subject: [PATCH 39/58] Bump circe.version from 0.14.6 to 0.14.7 (#13013) --- pinot-connectors/pinot-spark-common/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pinot-connectors/pinot-spark-common/pom.xml b/pinot-connectors/pinot-spark-common/pom.xml index 4da9dd3d5571..f26f6a0de25f 100644 --- a/pinot-connectors/pinot-spark-common/pom.xml +++ b/pinot-connectors/pinot-spark-common/pom.xml @@ -33,7 +33,7 @@ https://pinot.apache.org/ ${basedir}/../.. - 0.14.6 + 0.14.7 2.8 2.3.0 3.2.18 From 5fc89ce4530f856756a3ca6357d90deea9365032 Mon Sep 17 00:00:00 2001 From: "Xiaotian (Jackie) Jiang" <17555551+Jackie-Jiang@users.noreply.github.com> Date: Fri, 26 Apr 2024 13:06:25 -0700 Subject: [PATCH 40/58] Support NOT in StarTree Index (#12988) --- .../SortedIndexBasedFilterOperator.java | 6 +- ...BaseDictionaryBasedPredicateEvaluator.java | 55 ++++++-- .../predicate/BasePredicateEvaluator.java | 10 -- .../EqualsPredicateEvaluatorFactory.java | 16 +-- ...TBasedRegexpPredicateEvaluatorFactory.java | 30 ++--- .../InPredicateEvaluatorFactory.java | 37 ++---- .../NotEqualsPredicateEvaluatorFactory.java | 41 +----- .../NotInPredicateEvaluatorFactory.java | 62 +++------ .../filter/predicate/PredicateEvaluator.java | 23 +--- .../filter/predicate/PredicateUtils.java | 34 +++++ .../RangePredicateEvaluatorFactory.java | 96 ++++++++------ .../RegexpLikePredicateEvaluatorFactory.java | 22 +-- .../startree/CompositePredicateEvaluator.java | 17 +-- .../pinot/core/startree/StarTreeUtils.java | 125 ++++++++++++------ .../operator/StarTreeFilterOperator.java | 68 +++++++--- .../core/startree/v2/BaseStarTreeV2Test.java | 20 ++- .../perf/BenchmarkScanDocIdIterators.java | 10 -- 17 files changed, 357 insertions(+), 315 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/SortedIndexBasedFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/SortedIndexBasedFilterOperator.java index d32c68c7e53e..09144af9ff6d 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/SortedIndexBasedFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/SortedIndexBasedFilterOperator.java @@ -20,7 +20,6 @@ import com.google.common.base.Preconditions; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.List; import org.apache.pinot.core.common.BlockDocIdSet; @@ -90,10 +89,7 @@ protected BlockDocIdSet getNextBlockWithoutNullHandling() { return new SortedDocIdSet(Collections.singletonList(docIdRange)); } } else { - // Sort the dictIds in ascending order so that their respective docIdRanges are adjacent if they are adjacent - Arrays.sort(dictIds); - - // Merge adjacent docIdRanges + // Merge adjacent docIdRanges (dictIds are already sorted) List docIdRanges = new ArrayList<>(); IntPair lastDocIdRange = _sortedIndexReader.getDocIds(dictIds[0]); for (int i = 1; i < numDictIds; i++) { diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseDictionaryBasedPredicateEvaluator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseDictionaryBasedPredicateEvaluator.java index 92050c3cefa0..18d15d367353 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseDictionaryBasedPredicateEvaluator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BaseDictionaryBasedPredicateEvaluator.java @@ -18,17 +18,34 @@ */ package org.apache.pinot.core.operator.filter.predicate; +import it.unimi.dsi.fastutil.ints.IntArrayList; +import it.unimi.dsi.fastutil.ints.IntList; import java.math.BigDecimal; import org.apache.pinot.common.request.context.predicate.Predicate; +import org.apache.pinot.segment.spi.index.reader.Dictionary; import org.apache.pinot.spi.data.FieldSpec.DataType; public abstract class BaseDictionaryBasedPredicateEvaluator extends BasePredicateEvaluator { + protected final Dictionary _dictionary; protected boolean _alwaysTrue; protected boolean _alwaysFalse; + protected int[] _matchingDictIds; + protected int[] _nonMatchingDictIds; - protected BaseDictionaryBasedPredicateEvaluator(Predicate predicate) { + protected BaseDictionaryBasedPredicateEvaluator(Predicate predicate, Dictionary dictionary) { super(predicate); + _dictionary = dictionary; + } + + @Override + public final boolean isDictionaryBased() { + return true; + } + + @Override + public DataType getDataType() { + return DataType.INT; } @Override @@ -42,13 +59,33 @@ public boolean isAlwaysFalse() { } @Override - public final boolean isDictionaryBased() { - return true; + public int[] getMatchingDictIds() { + if (_matchingDictIds == null) { + _matchingDictIds = calculateMatchingDictIds(); + } + return _matchingDictIds; } - @Override - public DataType getDataType() { - return DataType.INT; + protected int[] calculateMatchingDictIds() { + IntList matchingDictIds = new IntArrayList(); + int dictionarySize = _dictionary.length(); + for (int dictId = 0; dictId < dictionarySize; dictId++) { + if (applySV(dictId)) { + matchingDictIds.add(dictId); + } + } + return matchingDictIds.toIntArray(); + } + + public int[] getNonMatchingDictIds() { + if (_nonMatchingDictIds == null) { + _nonMatchingDictIds = calculateNonMatchingDictIds(); + } + return _nonMatchingDictIds; + } + + protected int[] calculateNonMatchingDictIds() { + return PredicateUtils.flipDictIds(getMatchingDictIds(), _dictionary.length()); } @Override @@ -106,12 +143,6 @@ public final boolean applyMV(byte[][] values, int length) { throw new UnsupportedOperationException(); } - // NOTE: override it for exclusive predicate - @Override - public int[] getNonMatchingDictIds() { - throw new UnsupportedOperationException(); - } - /** * Apply a single-value entry to the predicate. * diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BasePredicateEvaluator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BasePredicateEvaluator.java index 0e04954675c7..407e619ae33d 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BasePredicateEvaluator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/BasePredicateEvaluator.java @@ -42,14 +42,4 @@ public Predicate.Type getPredicateType() { public final boolean isExclusive() { return getPredicateType().isExclusive(); } - - @Override - public int getNumMatchingDictIds() { - return getMatchingDictIds().length; - } - - @Override - public int getNumNonMatchingDictIds() { - return getNonMatchingDictIds().length; - } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/EqualsPredicateEvaluatorFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/EqualsPredicateEvaluatorFactory.java index 14616b36be78..bf99e6c933e5 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/EqualsPredicateEvaluatorFactory.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/EqualsPredicateEvaluatorFactory.java @@ -62,8 +62,7 @@ public static BaseDictionaryBasedPredicateEvaluator newDictionaryBasedEvaluator( * @param dataType Data type for the column * @return Raw value based EQ predicate evaluator */ - public static EqRawPredicateEvaluator newRawValueBasedEvaluator(EqPredicate eqPredicate, - DataType dataType) { + public static EqRawPredicateEvaluator newRawValueBasedEvaluator(EqPredicate eqPredicate, DataType dataType) { String value = eqPredicate.getValue(); switch (dataType) { case INT: @@ -92,10 +91,9 @@ public static EqRawPredicateEvaluator newRawValueBasedEvaluator(EqPredicate eqPr private static final class DictionaryBasedEqPredicateEvaluator extends BaseDictionaryBasedPredicateEvaluator implements IntValue { final int _matchingDictId; - final int[] _matchingDictIds; DictionaryBasedEqPredicateEvaluator(EqPredicate eqPredicate, Dictionary dictionary, DataType dataType) { - super(eqPredicate); + super(eqPredicate, dictionary); String predicateValue = PredicateUtils.getStoredValue(eqPredicate.getValue(), dataType); _matchingDictId = dictionary.indexOf(predicateValue); if (_matchingDictId >= 0) { @@ -109,6 +107,11 @@ private static final class DictionaryBasedEqPredicateEvaluator extends BaseDicti } } + @Override + protected int[] calculateNonMatchingDictIds() { + return PredicateUtils.getDictIds(_dictionary.length(), _matchingDictId); + } + @Override public int getNumMatchingItems() { return 1; @@ -132,11 +135,6 @@ public int applySV(int limit, int[] docIds, int[] values) { return matches; } - @Override - public int[] getMatchingDictIds() { - return _matchingDictIds; - } - @Override public int getInt() { return _matchingDictId; diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/FSTBasedRegexpPredicateEvaluatorFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/FSTBasedRegexpPredicateEvaluatorFactory.java index b1a0559a92a2..11dbe7aa995c 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/FSTBasedRegexpPredicateEvaluatorFactory.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/FSTBasedRegexpPredicateEvaluatorFactory.java @@ -50,30 +50,29 @@ public static BaseDictionaryBasedPredicateEvaluator newFSTBasedEvaluator(RegexpL * Matches regexp query using FSTIndexReader. */ private static class FSTBasedRegexpPredicateEvaluator extends BaseDictionaryBasedPredicateEvaluator { - final Dictionary _dictionary; - final ImmutableRoaringBitmap _dictIds; + final ImmutableRoaringBitmap _matchingDictIdBitmap; public FSTBasedRegexpPredicateEvaluator(RegexpLikePredicate regexpLikePredicate, TextIndexReader fstIndexReader, Dictionary dictionary) { - super(regexpLikePredicate); - _dictionary = dictionary; + super(regexpLikePredicate, dictionary); String searchQuery = RegexpPatternConverterUtils.regexpLikeToLuceneRegExp(regexpLikePredicate.getValue()); - _dictIds = fstIndexReader.getDictIds(searchQuery); - } - - @Override - public boolean isAlwaysFalse() { - return _dictIds.isEmpty(); + _matchingDictIdBitmap = fstIndexReader.getDictIds(searchQuery); + int numMatchingDictIds = _matchingDictIdBitmap.getCardinality(); + if (numMatchingDictIds == 0) { + _alwaysFalse = true; + } else if (dictionary.length() == numMatchingDictIds) { + _alwaysTrue = true; + } } @Override - public boolean isAlwaysTrue() { - return _dictIds.getCardinality() == _dictionary.length(); + protected int[] calculateMatchingDictIds() { + return _matchingDictIdBitmap.toArray(); } @Override public boolean applySV(int dictId) { - return _dictIds.contains(dictId); + return _matchingDictIdBitmap.contains(dictId); } @Override @@ -88,10 +87,5 @@ public int applySV(int limit, int[] docIds, int[] values) { } return matches; } - - @Override - public int[] getMatchingDictIds() { - return _dictIds.toArray(); - } } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/InPredicateEvaluatorFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/InPredicateEvaluatorFactory.java index 9ad0a78014c2..5ebc9a1a6beb 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/InPredicateEvaluatorFactory.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/InPredicateEvaluatorFactory.java @@ -71,8 +71,7 @@ public static BaseDictionaryBasedPredicateEvaluator newDictionaryBasedEvaluator( * @param dataType Data type for the column * @return Raw value based IN predicate evaluator */ - public static InRawPredicateEvaluator newRawValueBasedEvaluator(InPredicate inPredicate, - DataType dataType) { + public static InRawPredicateEvaluator newRawValueBasedEvaluator(InPredicate inPredicate, DataType dataType) { switch (dataType) { case INT: { int[] intValues = inPredicate.getIntValues(); @@ -157,42 +156,34 @@ public static InRawPredicateEvaluator newRawValueBasedEvaluator(InPredicate inPr private static final class DictionaryBasedInPredicateEvaluator extends BaseDictionaryBasedPredicateEvaluator { final IntSet _matchingDictIdSet; - final int _numMatchingDictIds; - int[] _matchingDictIds; DictionaryBasedInPredicateEvaluator(InPredicate inPredicate, Dictionary dictionary, DataType dataType, @Nullable QueryContext queryContext) { - super(inPredicate); + super(inPredicate, dictionary); _matchingDictIdSet = PredicateUtils.getDictIdSet(inPredicate, dictionary, dataType, queryContext); - _numMatchingDictIds = _matchingDictIdSet.size(); - if (_numMatchingDictIds == 0) { + int numMatchingDictIds = _matchingDictIdSet.size(); + if (numMatchingDictIds == 0) { _alwaysFalse = true; - } else if (dictionary.length() == _numMatchingDictIds) { + } else if (dictionary.length() == numMatchingDictIds) { _alwaysTrue = true; } } @Override - public boolean applySV(int dictId) { - return _matchingDictIdSet.contains(dictId); - } - - @Override - public int getNumMatchingDictIds() { - return _numMatchingDictIds; + protected int[] calculateMatchingDictIds() { + int[] matchingDictIds = _matchingDictIdSet.toIntArray(); + Arrays.sort(matchingDictIds); + return matchingDictIds; } @Override public int getNumMatchingItems() { - return getNumMatchingDictIds(); + return _matchingDictIdSet.size(); } @Override - public int[] getMatchingDictIds() { - if (_matchingDictIds == null) { - _matchingDictIds = _matchingDictIdSet.toIntArray(); - } - return _matchingDictIds; + public boolean applySV(int dictId) { + return _matchingDictIdSet.contains(dictId); } @Override @@ -477,9 +468,7 @@ public boolean applySV(byte[] value) { @Override public R accept(MultiValueVisitor visitor) { - byte[][] bytes = _matchingValues.stream() - .map(ByteArray::getBytes) - .toArray(byte[][]::new); + byte[][] bytes = _matchingValues.stream().map(ByteArray::getBytes).toArray(byte[][]::new); return visitor.visitBytes(bytes); } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotEqualsPredicateEvaluatorFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotEqualsPredicateEvaluatorFactory.java index 54ce7df58cb1..fdcff7c579c3 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotEqualsPredicateEvaluatorFactory.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotEqualsPredicateEvaluatorFactory.java @@ -58,8 +58,7 @@ public static BaseDictionaryBasedPredicateEvaluator newDictionaryBasedEvaluator( * @param dataType Data type for the column * @return Raw value based NOT_EQ predicate evaluator */ - public static NeqRawPredicateEvaluator newRawValueBasedEvaluator(NotEqPredicate notEqPredicate, - DataType dataType) { + public static NeqRawPredicateEvaluator newRawValueBasedEvaluator(NotEqPredicate notEqPredicate, DataType dataType) { String value = notEqPredicate.getValue(); switch (dataType) { case INT: @@ -87,12 +86,9 @@ public static NeqRawPredicateEvaluator newRawValueBasedEvaluator(NotEqPredicate private static final class DictionaryBasedNeqPredicateEvaluator extends BaseDictionaryBasedPredicateEvaluator { final int _nonMatchingDictId; - final int[] _nonMatchingDictIds; - final Dictionary _dictionary; - int[] _matchingDictIds; DictionaryBasedNeqPredicateEvaluator(NotEqPredicate notEqPredicate, Dictionary dictionary, DataType dataType) { - super(notEqPredicate); + super(notEqPredicate, dictionary); String predicateValue = PredicateUtils.getStoredValue(notEqPredicate.getValue(), dataType); _nonMatchingDictId = dictionary.indexOf(predicateValue); if (_nonMatchingDictId >= 0) { @@ -104,7 +100,11 @@ private static final class DictionaryBasedNeqPredicateEvaluator extends BaseDict _nonMatchingDictIds = new int[0]; _alwaysTrue = true; } - _dictionary = dictionary; + } + + @Override + protected int[] calculateMatchingDictIds() { + return PredicateUtils.getDictIds(_dictionary.length(), _nonMatchingDictId); } @Override @@ -129,33 +129,6 @@ public int applySV(int limit, int[] docIds, int[] values) { } return matches; } - - @Override - public int[] getMatchingDictIds() { - if (_matchingDictIds == null) { - int dictionarySize = _dictionary.length(); - if (_nonMatchingDictId >= 0) { - _matchingDictIds = new int[dictionarySize - 1]; - int index = 0; - for (int dictId = 0; dictId < dictionarySize; dictId++) { - if (dictId != _nonMatchingDictId) { - _matchingDictIds[index++] = dictId; - } - } - } else { - _matchingDictIds = new int[dictionarySize]; - for (int dictId = 0; dictId < dictionarySize; dictId++) { - _matchingDictIds[dictId] = dictId; - } - } - } - return _matchingDictIds; - } - - @Override - public int[] getNonMatchingDictIds() { - return _nonMatchingDictIds; - } } public static abstract class NeqRawPredicateEvaluator extends BaseRawValueBasedPredicateEvaluator { diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotInPredicateEvaluatorFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotInPredicateEvaluatorFactory.java index 5fe7b51d3571..4682225aa7e1 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotInPredicateEvaluatorFactory.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/NotInPredicateEvaluatorFactory.java @@ -71,8 +71,7 @@ public static BaseDictionaryBasedPredicateEvaluator newDictionaryBasedEvaluator( * @param dataType Data type for the column * @return Raw value based NOT_IN predicate evaluator */ - public static NotInRawPredicateEvaluator newRawValueBasedEvaluator(NotInPredicate notInPredicate, - DataType dataType) { + public static NotInRawPredicateEvaluator newRawValueBasedEvaluator(NotInPredicate notInPredicate, DataType dataType) { switch (dataType) { case INT: { int[] intValues = notInPredicate.getIntValues(); @@ -157,27 +156,34 @@ public static NotInRawPredicateEvaluator newRawValueBasedEvaluator(NotInPredicat public static final class DictionaryBasedNotInPredicateEvaluator extends BaseDictionaryBasedPredicateEvaluator { final IntSet _nonMatchingDictIdSet; - final int _numNonMatchingDictIds; - final Dictionary _dictionary; - int[] _matchingDictIds; - int[] _nonMatchingDictIds; DictionaryBasedNotInPredicateEvaluator(NotInPredicate notInPredicate, Dictionary dictionary, DataType dataType, @Nullable QueryContext queryContext) { - super(notInPredicate); + super(notInPredicate, dictionary); _nonMatchingDictIdSet = PredicateUtils.getDictIdSet(notInPredicate, dictionary, dataType, queryContext); - _numNonMatchingDictIds = _nonMatchingDictIdSet.size(); - if (_numNonMatchingDictIds == 0) { + int numNonMatchingDictIds = _nonMatchingDictIdSet.size(); + if (numNonMatchingDictIds == 0) { _alwaysTrue = true; - } else if (dictionary.length() == _numNonMatchingDictIds) { + } else if (dictionary.length() == numNonMatchingDictIds) { _alwaysFalse = true; } - _dictionary = dictionary; + } + + @Override + protected int[] calculateMatchingDictIds() { + return PredicateUtils.flipDictIds(getNonMatchingDictIds(), _dictionary.length()); + } + + @Override + protected int[] calculateNonMatchingDictIds() { + int[] nonMatchingDictIds = _nonMatchingDictIdSet.toIntArray(); + Arrays.sort(nonMatchingDictIds); + return nonMatchingDictIds; } @Override public int getNumMatchingItems() { - return -_numNonMatchingDictIds; + return -_nonMatchingDictIdSet.size(); } @Override @@ -197,34 +203,6 @@ public int applySV(int limit, int[] docIds, int[] values) { } return matches; } - - @Override - public int[] getMatchingDictIds() { - if (_matchingDictIds == null) { - int dictionarySize = _dictionary.length(); - _matchingDictIds = new int[dictionarySize - _numNonMatchingDictIds]; - int index = 0; - for (int dictId = 0; dictId < dictionarySize; dictId++) { - if (!_nonMatchingDictIdSet.contains(dictId)) { - _matchingDictIds[index++] = dictId; - } - } - } - return _matchingDictIds; - } - - @Override - public int getNumNonMatchingDictIds() { - return _numNonMatchingDictIds; - } - - @Override - public int[] getNonMatchingDictIds() { - if (_nonMatchingDictIds == null) { - _nonMatchingDictIds = _nonMatchingDictIdSet.toIntArray(); - } - return _nonMatchingDictIds; - } } public static abstract class NotInRawPredicateEvaluator extends BaseRawValueBasedPredicateEvaluator { @@ -491,9 +469,7 @@ public boolean applySV(byte[] value) { @Override public R accept(MultiValueVisitor visitor) { - byte[][] bytes = _nonMatchingValues.stream() - .map(ByteArray::getBytes) - .toArray(byte[][]::new); + byte[][] bytes = _nonMatchingValues.stream().map(ByteArray::getBytes).toArray(byte[][]::new); return visitor.visitBytes(bytes); } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateEvaluator.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateEvaluator.java index 889e28710794..09b420f48f04 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateEvaluator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateEvaluator.java @@ -102,35 +102,24 @@ default int applySV(int limit, int[] docIds, int[] values) { boolean applyMV(int[] values, int length); /** - * APIs for dictionary based predicate evaluator - */ - - /** - * return the number of matching items specified by predicate - * negative number indicates exclusive (not eq, not in) match - * return {@code Integer.MIN_VALUE} for not applicable + * Get the number of matching items. Negative number indicates exclusive (e.g. NOT_EQ, NOT_IN) match. Returns + * {@code Integer.MIN_VALUE} if not applicable. */ default int getNumMatchingItems() { return Integer.MIN_VALUE; - }; + } /** - * Get the number of matching dictionary ids. + * APIs for dictionary based predicate evaluator */ - int getNumMatchingDictIds(); /** - * Get the matching dictionary ids. + * Get the matching dictionary ids. The returned ids should be sorted. */ int[] getMatchingDictIds(); /** - * Get the number of non-matching dictionary ids. - */ - int getNumNonMatchingDictIds(); - - /** - * Get the non-matching dictionary ids. + * Get the non-matching dictionary ids. The returned ids should be sorted. */ int[] getNonMatchingDictIds(); diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateUtils.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateUtils.java index 9135a85f785b..c7b93cf086c7 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateUtils.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/PredicateUtils.java @@ -190,4 +190,38 @@ public static IntSet getDictIdSet(BaseInPredicate inPredicate, Dictionary dictio } return dictIdSet; } + + public static int[] flipDictIds(int[] dictIds, int length) { + int numDictIds = dictIds.length; + int[] flippedDictIds = new int[length - numDictIds]; + int flippedDictIdsIndex = 0; + int dictIdsIndex = 0; + for (int dictId = 0; dictId < length; dictId++) { + if (dictIdsIndex < numDictIds && dictId == dictIds[dictIdsIndex]) { + dictIdsIndex++; + } else { + flippedDictIds[flippedDictIdsIndex++] = dictId; + } + } + return flippedDictIds; + } + + public static int[] getDictIds(int length, int excludeId) { + int[] dictIds; + if (excludeId >= 0) { + dictIds = new int[length - 1]; + int index = 0; + for (int dictId = 0; dictId < length; dictId++) { + if (dictId != excludeId) { + dictIds[index++] = dictId; + } + } + } else { + dictIds = new int[length]; + for (int dictId = 0; dictId < length; dictId++) { + dictIds[dictId] = dictId; + } + } + return dictIds; + } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RangePredicateEvaluatorFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RangePredicateEvaluatorFactory.java index ca8e1f126a2c..e9bd3b4b0a7d 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RangePredicateEvaluatorFactory.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RangePredicateEvaluatorFactory.java @@ -122,11 +122,10 @@ public static final class SortedDictionaryBasedRangePredicateEvaluator extends B // Exclusive final int _endDictId; final int _numMatchingDictIds; - int[] _matchingDictIds; SortedDictionaryBasedRangePredicateEvaluator(RangePredicate rangePredicate, Dictionary dictionary, DataType dataType) { - super(rangePredicate); + super(rangePredicate, dictionary); String lowerBound = rangePredicate.getLowerBound(); String upperBound = rangePredicate.getUpperBound(); boolean lowerInclusive = rangePredicate.isLowerInclusive(); @@ -161,8 +160,8 @@ public static final class SortedDictionaryBasedRangePredicateEvaluator extends B } } - _numMatchingDictIds = _endDictId - _startDictId; - if (_numMatchingDictIds <= 0) { + _numMatchingDictIds = Integer.max(_endDictId - _startDictId, 0); + if (_numMatchingDictIds == 0) { _alwaysFalse = true; } else if (dictionary.length() == _numMatchingDictIds) { _alwaysTrue = true; @@ -178,46 +177,61 @@ public int getEndDictId() { } @Override - public boolean applySV(int dictId) { - return _startDictId <= dictId && _endDictId > dictId; + protected int[] calculateMatchingDictIds() { + if (_numMatchingDictIds == 0) { + return new int[0]; + } else { + int[] matchingDictIds = new int[_numMatchingDictIds]; + for (int i = 0; i < _numMatchingDictIds; i++) { + matchingDictIds[i] = _startDictId + i; + } + return matchingDictIds; + } } @Override - public int applySV(int limit, int[] docIds, int[] dictIds) { - // reimplemented here to ensure applySV can be inlined - int matches = 0; - for (int i = 0; i < limit; i++) { - int dictId = dictIds[i]; - if (applySV(dictId)) { - docIds[matches++] = docIds[i]; + protected int[] calculateNonMatchingDictIds() { + int dictionarySize = _dictionary.length(); + if (_numMatchingDictIds == 0) { + int[] nonMatchingDictIds = new int[dictionarySize]; + for (int i = 0; i < dictionarySize; i++) { + nonMatchingDictIds[i] = i; + } + return nonMatchingDictIds; + } else { + int[] nonMatchingDictIds = new int[dictionarySize - _numMatchingDictIds]; + int index = 0; + for (int i = 0; i < _startDictId; i++) { + nonMatchingDictIds[index++] = i; + } + for (int i = _endDictId; i < dictionarySize; i++) { + nonMatchingDictIds[index++] = i; } + return nonMatchingDictIds; } - return matches; } @Override - public int getNumMatchingDictIds() { + public int getNumMatchingItems() { return _numMatchingDictIds; } @Override - public int[] getMatchingDictIds() { - if (_matchingDictIds == null) { - if (_numMatchingDictIds <= 0) { - _matchingDictIds = new int[0]; - } else { - _matchingDictIds = new int[_numMatchingDictIds]; - for (int i = 0; i < _numMatchingDictIds; i++) { - _matchingDictIds[i] = _startDictId + i; - } - } - } - return _matchingDictIds; + public boolean applySV(int dictId) { + return _startDictId <= dictId && _endDictId > dictId; } @Override - public int getNumMatchingItems() { - return Math.max(_numMatchingDictIds, 0); + public int applySV(int limit, int[] docIds, int[] dictIds) { + // reimplemented here to ensure applySV can be inlined + int matches = 0; + for (int i = 0; i < limit; i++) { + int dictId = dictIds[i]; + if (applySV(dictId)) { + docIds[matches++] = docIds[i]; + } + } + return matches; } @Override @@ -238,15 +252,13 @@ private static final class UnsortedDictionaryBasedRangePredicateEvaluator // TODO: Tune this threshold private static final int DICT_ID_SET_BASED_CARDINALITY_THRESHOLD = 1000; - final Dictionary _dictionary; final boolean _dictIdSetBased; final IntSet _matchingDictIdSet; final BaseRawValueBasedPredicateEvaluator _rawValueBasedEvaluator; UnsortedDictionaryBasedRangePredicateEvaluator(RangePredicate rangePredicate, Dictionary dictionary, DataType dataType) { - super(rangePredicate); - _dictionary = dictionary; + super(rangePredicate, dictionary); int cardinality = dictionary.length(); if (cardinality < DICT_ID_SET_BASED_CARDINALITY_THRESHOLD) { _dictIdSetBased = true; @@ -274,6 +286,16 @@ private static final class UnsortedDictionaryBasedRangePredicateEvaluator } } + @Override + public int[] getMatchingDictIds() { + throw new UnsupportedOperationException(); + } + + @Override + public int getNumMatchingItems() { + return _matchingDictIdSet != null ? _matchingDictIdSet.size() : Integer.MIN_VALUE; + } + @Override public boolean applySV(int dictId) { if (_dictIdSetBased) { @@ -299,16 +321,6 @@ public boolean applySV(int dictId) { } } } - - @Override - public int getNumMatchingItems() { - return _matchingDictIdSet == null ? super.getNumMatchingItems() : _matchingDictIdSet.size(); - } - - @Override - public int[] getMatchingDictIds() { - throw new UnsupportedOperationException(); - } } private static final class IntRawValueBasedRangePredicateEvaluator extends BaseRawValueBasedPredicateEvaluator diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RegexpLikePredicateEvaluatorFactory.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RegexpLikePredicateEvaluatorFactory.java index 82477c35600f..09ffdb62f0ac 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RegexpLikePredicateEvaluatorFactory.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/predicate/RegexpLikePredicateEvaluatorFactory.java @@ -19,8 +19,6 @@ package org.apache.pinot.core.operator.filter.predicate; import com.google.common.base.Preconditions; -import it.unimi.dsi.fastutil.ints.IntArrayList; -import it.unimi.dsi.fastutil.ints.IntList; import java.util.regex.Matcher; import org.apache.pinot.common.request.context.predicate.RegexpLikePredicate; import org.apache.pinot.segment.spi.index.reader.Dictionary; @@ -66,12 +64,9 @@ private static final class DictionaryBasedRegexpLikePredicateEvaluator extends B // Reuse matcher to avoid excessive allocation. This is safe to do because the evaluator is always used // within the scope of a single thread. final Matcher _matcher; - final Dictionary _dictionary; - int[] _matchingDictIds; public DictionaryBasedRegexpLikePredicateEvaluator(RegexpLikePredicate regexpLikePredicate, Dictionary dictionary) { - super(regexpLikePredicate); - _dictionary = dictionary; + super(regexpLikePredicate, dictionary); _matcher = regexpLikePredicate.getPattern().matcher(""); } @@ -92,21 +87,6 @@ public int applySV(int limit, int[] docIds, int[] values) { } return matches; } - - @Override - public int[] getMatchingDictIds() { - if (_matchingDictIds == null) { - IntList matchingDictIds = new IntArrayList(); - int dictionarySize = _dictionary.length(); - for (int dictId = 0; dictId < dictionarySize; dictId++) { - if (applySV(dictId)) { - matchingDictIds.add(dictId); - } - } - _matchingDictIds = matchingDictIds.toIntArray(); - } - return _matchingDictIds; - } } private static final class RawValueBasedRegexpLikePredicateEvaluator extends BaseRawValueBasedPredicateEvaluator { diff --git a/pinot-core/src/main/java/org/apache/pinot/core/startree/CompositePredicateEvaluator.java b/pinot-core/src/main/java/org/apache/pinot/core/startree/CompositePredicateEvaluator.java index 9424bc0cdbd1..1725364aeeec 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/startree/CompositePredicateEvaluator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/startree/CompositePredicateEvaluator.java @@ -18,6 +18,7 @@ */ package org.apache.pinot.core.startree; +import it.unimi.dsi.fastutil.objects.ObjectBooleanPair; import java.util.List; import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluator; @@ -26,19 +27,19 @@ * Represents a composite predicate. * * A composite predicate evaluator represents a single predicate evaluator or multiple predicate evaluators conjoined - * with OR. - * Consider the given predicate: (d1 > 10 OR d1 < 50). A composite predicate will represent two predicates -- (d1 > 10) - * and (d1 < 50) and represent that they are related by the operator OR. + * with OR. Each predicate evaluator is associated with a boolean value indicating whether the predicate is negated. + * Consider the given predicate: (d1 > 10 OR NOT d1 > 50). A composite predicate will represent two predicates: + * (d1 > 10) and NOT(d1 > 50) and represent that they are related by the operator OR. */ public class CompositePredicateEvaluator { - private final List _predicateEvaluators; + private final List> _predicateEvaluators; - public CompositePredicateEvaluator(List predicateEvaluators) { + public CompositePredicateEvaluator(List> predicateEvaluators) { assert !predicateEvaluators.isEmpty(); _predicateEvaluators = predicateEvaluators; } - public List getPredicateEvaluators() { + public List> getPredicateEvaluators() { return _predicateEvaluators; } @@ -47,8 +48,8 @@ public List getPredicateEvaluators() { * predicate evaluator, {@code false} otherwise. */ public boolean apply(int dictId) { - for (PredicateEvaluator predicateEvaluator : _predicateEvaluators) { - if (predicateEvaluator.applySV(dictId)) { + for (ObjectBooleanPair predicateEvaluator : _predicateEvaluators) { + if (predicateEvaluator.left().applySV(dictId) != predicateEvaluator.rightBoolean()) { return true; } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/startree/StarTreeUtils.java b/pinot-core/src/main/java/org/apache/pinot/core/startree/StarTreeUtils.java index f79070ae9f6a..68bd26e7801a 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/startree/StarTreeUtils.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/startree/StarTreeUtils.java @@ -18,6 +18,7 @@ */ package org.apache.pinot.core.startree; +import it.unimi.dsi.fastutil.objects.ObjectBooleanPair; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Collections; @@ -75,13 +76,13 @@ public static AggregationFunctionColumnPair[] extractAggregationFunctionPairs( } /** - * Extracts a map from the column to a list of {@link PredicateEvaluator}s for it. Returns {@code null} if the filter - * cannot be solved by the star-tree. + * Extracts a map from the column to a list of {@link CompositePredicateEvaluator}s for it. Returns {@code null} if + * the filter cannot be solved by the star-tree. * * A predicate can be simple (d1 > 10) or composite (d1 > 10 AND d2 < 50) or multi levelled - * (d1 > 50 AND (d2 > 10 OR d2 < 35)). + * (d1 > 50 AND (d2 > 10 OR NOT d2 > 35)). * This method represents a list of CompositePredicates per dimension. For each dimension, all CompositePredicates in - * the list are implicitly ANDed together. Any OR predicates are nested within a CompositePredicate. + * the list are implicitly ANDed together. Any OR and NOT predicates are nested within a CompositePredicate. * * A map from predicates to their evaluators is passed in to accelerate the computation. */ @@ -102,21 +103,50 @@ public static Map> extractPredicateEva queue.addAll(filterNode.getChildren()); break; case OR: - Pair> pair = + Pair pair = isOrClauseValidForStarTree(indexSegment, filterNode, predicateEvaluatorMapping); if (pair == null) { return null; } - List predicateEvaluators = pair.getRight(); - // NOTE: Empty list means always true - if (!predicateEvaluators.isEmpty()) { - predicateEvaluatorsMap.computeIfAbsent(pair.getLeft(), k -> new ArrayList<>()) - .add(new CompositePredicateEvaluator(predicateEvaluators)); + // NOTE: Null identifier means always true + if (pair.getLeft() != null) { + predicateEvaluatorsMap.computeIfAbsent(pair.getLeft(), k -> new ArrayList<>()).add(pair.getRight()); } break; case NOT: - // TODO: Support NOT in star-tree - return null; + boolean negated = true; + FilterContext negatedChild = filterNode.getChildren().get(0); + while (true) { + FilterContext.Type type = negatedChild.getType(); + if (type == FilterContext.Type.PREDICATE) { + Predicate predicate = negatedChild.getPredicate(); + PredicateEvaluator predicateEvaluator = + getPredicateEvaluator(indexSegment, predicate, predicateEvaluatorMapping); + // Do not use star-tree when the predicate cannot be solved with star-tree + if (predicateEvaluator == null) { + return null; + } + // Do not use star-tree when the predicate is always false + if ((predicateEvaluator.isAlwaysTrue() && negated) || (predicateEvaluator.isAlwaysFalse() && !negated)) { + return null; + } + // Skip adding always true predicate + if ((predicateEvaluator.isAlwaysTrue() && !negated) || (predicateEvaluator.isAlwaysFalse() && negated)) { + break; + } + predicateEvaluatorsMap.computeIfAbsent(predicate.getLhs().getIdentifier(), k -> new ArrayList<>()) + .add(new CompositePredicateEvaluator(List.of(ObjectBooleanPair.of(predicateEvaluator, negated)))); + break; + } + if (type == FilterContext.Type.NOT) { + negated = !negated; + negatedChild = negatedChild.getChildren().get(0); + continue; + } + // Do not allow nested AND/OR under NOT + return null; + } + break; case PREDICATE: Predicate predicate = filterNode.getPredicate(); PredicateEvaluator predicateEvaluator = @@ -127,7 +157,7 @@ public static Map> extractPredicateEva } if (!predicateEvaluator.isAlwaysTrue()) { predicateEvaluatorsMap.computeIfAbsent(predicate.getLhs().getIdentifier(), k -> new ArrayList<>()) - .add(new CompositePredicateEvaluator(Collections.singletonList(predicateEvaluator))); + .add(new CompositePredicateEvaluator(List.of(ObjectBooleanPair.of(predicateEvaluator, false)))); } break; default: @@ -177,70 +207,91 @@ public static boolean isFitForStarTree(StarTreeV2Metadata starTreeV2Metadata, * StarTree supports OR predicates on a single dimension only (d1 < 10 OR d1 > 50). * * @return The pair of single identifier and predicate evaluators applied to it if true; {@code null} if the OR clause - * cannot be solved with star-tree; empty predicate evaluator list if the OR clause always evaluates to true. + * cannot be solved with star-tree; a pair of nulls if the OR clause always evaluates to true. */ @Nullable - private static Pair> isOrClauseValidForStarTree(IndexSegment indexSegment, + private static Pair isOrClauseValidForStarTree(IndexSegment indexSegment, FilterContext filter, List> predicateEvaluatorMapping) { assert filter.getType() == FilterContext.Type.OR; - List predicates = new ArrayList<>(); + List> predicates = new ArrayList<>(); if (!extractOrClausePredicates(filter, predicates)) { return null; } String identifier = null; - List predicateEvaluators = new ArrayList<>(); - for (Predicate predicate : predicates) { - PredicateEvaluator predicateEvaluator = getPredicateEvaluator(indexSegment, predicate, predicateEvaluatorMapping); + List> predicateEvaluators = new ArrayList<>(); + for (ObjectBooleanPair predicate : predicates) { + PredicateEvaluator predicateEvaluator = + getPredicateEvaluator(indexSegment, predicate.left(), predicateEvaluatorMapping); if (predicateEvaluator == null) { // The predicate cannot be solved with star-tree return null; } - if (predicateEvaluator.isAlwaysTrue()) { - // Use empty predicate evaluators to represent always true - return Pair.of(null, Collections.emptyList()); + boolean negated = predicate.rightBoolean(); + // Use a pair of null values to represent always true + if ((predicateEvaluator.isAlwaysTrue() && !negated) || (predicateEvaluator.isAlwaysFalse() && negated)) { + return Pair.of(null, null); } - if (!predicateEvaluator.isAlwaysFalse()) { - String predicateIdentifier = predicate.getLhs().getIdentifier(); - if (identifier == null) { - identifier = predicateIdentifier; - } else { - if (!identifier.equals(predicateIdentifier)) { - // The predicates are applied to multiple columns - return null; - } + // Skip the always false predicate + if ((predicateEvaluator.isAlwaysTrue() && negated) || (predicateEvaluator.isAlwaysFalse() && !negated)) { + continue; + } + String predicateIdentifier = predicate.left().getLhs().getIdentifier(); + if (identifier == null) { + identifier = predicateIdentifier; + } else { + if (!identifier.equals(predicateIdentifier)) { + // The predicates are applied to multiple columns + return null; } - predicateEvaluators.add(predicateEvaluator); } + predicateEvaluators.add(ObjectBooleanPair.of(predicateEvaluator, negated)); } // When all predicates are always false, do not use star-tree if (predicateEvaluators.isEmpty()) { return null; } - return Pair.of(identifier, predicateEvaluators); + return Pair.of(identifier, new CompositePredicateEvaluator(predicateEvaluators)); } /** * Extracts the predicates under the given OR clause, returns {@code false} if there is nested AND or NOT under OR * clause. - * TODO: Support NOT in star-tree */ - private static boolean extractOrClausePredicates(FilterContext filter, List predicates) { + private static boolean extractOrClausePredicates(FilterContext filter, + List> predicates) { assert filter.getType() == FilterContext.Type.OR; for (FilterContext child : filter.getChildren()) { switch (child.getType()) { case AND: - case NOT: return false; case OR: if (!extractOrClausePredicates(child, predicates)) { return false; } break; + case NOT: + boolean negated = true; + FilterContext negatedChild = child.getChildren().get(0); + while (true) { + FilterContext.Type type = negatedChild.getType(); + if (type == FilterContext.Type.PREDICATE) { + predicates.add(ObjectBooleanPair.of(negatedChild.getPredicate(), negated)); + break; + } + if (type == FilterContext.Type.NOT) { + negated = !negated; + negatedChild = negatedChild.getChildren().get(0); + continue; + } + // Do not allow nested AND/OR under NOT + return false; + } + break; case PREDICATE: - predicates.add(child.getPredicate()); + predicates.add(ObjectBooleanPair.of(child.getPredicate(), false)); break; default: throw new IllegalStateException(); diff --git a/pinot-core/src/main/java/org/apache/pinot/core/startree/operator/StarTreeFilterOperator.java b/pinot-core/src/main/java/org/apache/pinot/core/startree/operator/StarTreeFilterOperator.java index 583dad5e0ac8..107390fecd60 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/startree/operator/StarTreeFilterOperator.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/startree/operator/StarTreeFilterOperator.java @@ -18,9 +18,11 @@ */ package org.apache.pinot.core.startree.operator; +import it.unimi.dsi.fastutil.ints.IntImmutableList; import it.unimi.dsi.fastutil.ints.IntIterator; import it.unimi.dsi.fastutil.ints.IntOpenHashSet; import it.unimi.dsi.fastutil.ints.IntSet; +import it.unimi.dsi.fastutil.objects.ObjectBooleanPair; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Collections; @@ -175,19 +177,17 @@ private BaseFilterOperator getFilterOperator() { _predicateEvaluatorsMap.get(remainingPredicateColumn); DataSource dataSource = _starTreeV2.getDataSource(remainingPredicateColumn); for (CompositePredicateEvaluator compositePredicateEvaluator : compositePredicateEvaluators) { - List predicateEvaluators = compositePredicateEvaluator.getPredicateEvaluators(); + List> predicateEvaluators = + compositePredicateEvaluator.getPredicateEvaluators(); int numPredicateEvaluators = predicateEvaluators.size(); if (numPredicateEvaluators == 1) { // Single predicate evaluator - childFilterOperators.add( - FilterOperatorUtils.getLeafFilterOperator(_queryContext, predicateEvaluators.get(0), dataSource, - numDocs)); + childFilterOperators.add(getFilterOperator(predicateEvaluators.get(0), dataSource, numDocs)); } else { // Predicate evaluators conjoined with OR List orChildFilterOperators = new ArrayList<>(numPredicateEvaluators); - for (PredicateEvaluator childPredicateEvaluator : predicateEvaluators) { - orChildFilterOperators.add( - FilterOperatorUtils.getLeafFilterOperator(_queryContext, childPredicateEvaluator, dataSource, numDocs)); + for (ObjectBooleanPair childPredicateEvaluator : predicateEvaluators) { + orChildFilterOperators.add(getFilterOperator(childPredicateEvaluator, dataSource, numDocs)); } childFilterOperators.add( FilterOperatorUtils.getOrFilterOperator(_queryContext, orChildFilterOperators, numDocs)); @@ -198,6 +198,17 @@ private BaseFilterOperator getFilterOperator() { return FilterOperatorUtils.getAndFilterOperator(_queryContext, childFilterOperators, numDocs); } + private BaseFilterOperator getFilterOperator(ObjectBooleanPair predicateEvaluator, + DataSource dataSource, int numDocs) { + BaseFilterOperator leafFilterOperator = + FilterOperatorUtils.getLeafFilterOperator(_queryContext, predicateEvaluator.left(), dataSource, numDocs); + if (predicateEvaluator.rightBoolean()) { + return FilterOperatorUtils.getNotFilterOperator(_queryContext, leafFilterOperator, numDocs); + } else { + return leafFilterOperator; + } + } + /** * Helper method to traverse the star tree, get matching documents and keep track of all the predicate columns that * are not matched. Returns {@code null} if no matching dictionary id found for a column (i.e. the result for the @@ -386,24 +397,28 @@ public int compare(CompositePredicateEvaluator o1, CompositePredicateEvaluator o } int getPriority(CompositePredicateEvaluator compositePredicateEvaluator) { - List predicateEvaluators = compositePredicateEvaluator.getPredicateEvaluators(); + List> predicateEvaluators = + compositePredicateEvaluator.getPredicateEvaluators(); if (predicateEvaluators.size() == 1) { - switch (predicateEvaluators.get(0).getPredicateType()) { + ObjectBooleanPair predicateEvaluator = predicateEvaluators.get(0); + boolean negated = predicateEvaluator.rightBoolean(); + switch (predicateEvaluator.left().getPredicateType()) { case EQ: - return 1; + return negated ? 5 : 1; case IN: - return 2; + return negated ? 4 : 2; case RANGE: return 3; - case NOT_EQ: case NOT_IN: - return 4; + return negated ? 2 : 4; + case NOT_EQ: + return negated ? 1 : 5; default: - throw new UnsupportedOperationException(); + throw new IllegalStateException(); } } else { // Process OR at last - return 5; + return 6; } } }); @@ -433,12 +448,25 @@ int getPriority(CompositePredicateEvaluator compositePredicateEvaluator) { * Returns the matching dictionary ids for the given composite predicate evaluator. */ private IntSet getMatchingDictIds(CompositePredicateEvaluator compositePredicateEvaluator) { - IntSet matchingDictIds = new IntOpenHashSet(); - for (PredicateEvaluator predicateEvaluator : compositePredicateEvaluator.getPredicateEvaluators()) { - for (int matchingDictId : predicateEvaluator.getMatchingDictIds()) { - matchingDictIds.add(matchingDictId); + List> predicateEvaluators = + compositePredicateEvaluator.getPredicateEvaluators(); + if (predicateEvaluators.size() == 1) { + ObjectBooleanPair predicateEvaluator = predicateEvaluators.get(0); + if (predicateEvaluator.rightBoolean()) { + return new IntOpenHashSet(predicateEvaluator.left().getNonMatchingDictIds()); + } else { + return new IntOpenHashSet(predicateEvaluator.left().getMatchingDictIds()); } + } else { + IntSet matchingDictIds = new IntOpenHashSet(); + for (ObjectBooleanPair predicateEvaluator : predicateEvaluators) { + if (predicateEvaluator.rightBoolean()) { + matchingDictIds.addAll(new IntImmutableList(predicateEvaluator.left().getNonMatchingDictIds())); + } else { + matchingDictIds.addAll(new IntImmutableList(predicateEvaluator.left().getMatchingDictIds())); + } + } + return matchingDictIds; } - return matchingDictIds; } } diff --git a/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/BaseStarTreeV2Test.java b/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/BaseStarTreeV2Test.java index d4e6d3da4694..d323f6d55042 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/BaseStarTreeV2Test.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/BaseStarTreeV2Test.java @@ -103,20 +103,25 @@ abstract class BaseStarTreeV2Test { private static final String QUERY_FILTER_AND = " WHERE d1__COLUMN_NAME = 0 AND __d2 < 10"; // StarTree supports OR predicates only on a single dimension private static final String QUERY_FILTER_OR = " WHERE d1__COLUMN_NAME > 10 OR d1__COLUMN_NAME < 50"; + private static final String QUERY_FILTER_NOT = " WHERE NOT d1__COLUMN_NAME > 10"; + private static final String QUERY_FILTER_AND_NOT = " WHERE d1__COLUMN_NAME > 10 AND NOT __d2 < 10"; + private static final String QUERY_FILTER_OR_NOT = " WHERE d1__COLUMN_NAME > 50 OR NOT d1__COLUMN_NAME > 10"; + private static final String QUERY_NOT_NOT = " WHERE NOT NOT d1__COLUMN_NAME > 10"; private static final String QUERY_FILTER_COMPLEX_OR_MULTIPLE_DIMENSIONS = - " WHERE __d2 < 95 AND (d1__COLUMN_NAME > 10 OR d1__COLUMN_NAME < 50)"; + " WHERE __d2 < 95 AND (NOT d1__COLUMN_NAME > 10 OR d1__COLUMN_NAME > 50)"; private static final String QUERY_FILTER_COMPLEX_AND_MULTIPLE_DIMENSIONS_THREE_PREDICATES = - " WHERE __d2 < 95 AND __d2 > 25 AND (d1__COLUMN_NAME > 10 OR d1__COLUMN_NAME < 50)"; + " WHERE __d2 < 95 AND NOT __d2 < 25 AND (d1__COLUMN_NAME > 10 OR d1__COLUMN_NAME < 50)"; private static final String QUERY_FILTER_COMPLEX_OR_MULTIPLE_DIMENSIONS_THREE_PREDICATES = " WHERE (__d2 > 95 OR __d2 < 25) AND (d1__COLUMN_NAME > 10 OR d1__COLUMN_NAME < 50)"; private static final String QUERY_FILTER_COMPLEX_OR_SINGLE_DIMENSION = - " WHERE d1__COLUMN_NAME = 95 AND (d1__COLUMN_NAME > 90 OR d1__COLUMN_NAME < 100)"; + " WHERE NOT d1__COLUMN_NAME = 95 AND (d1__COLUMN_NAME > 90 OR d1__COLUMN_NAME < 100)"; // Unsupported filters private static final String QUERY_FILTER_OR_MULTIPLE_DIMENSIONS = " WHERE d1__COLUMN_NAME > 10 OR __d2 < 50"; private static final String QUERY_FILTER_OR_ON_AND = " WHERE (d1__COLUMN_NAME > 10 AND d1__COLUMN_NAME < 50) OR d1__COLUMN_NAME < 50"; - private static final String QUERY_FILTER_OR_ON_NOT = " WHERE (NOT d1__COLUMN_NAME > 10) OR d1__COLUMN_NAME < 50"; + private static final String QUERY_FILTER_NOT_ON_AND = " WHERE NOT (d1__COLUMN_NAME > 10 AND d1__COLUMN_NAME < 50)"; + private static final String QUERY_FILTER_NOT_ON_OR = " WHERE NOT (d1__COLUMN_NAME < 10 OR d1__COLUMN_NAME > 50)"; // Always false filters private static final String QUERY_FILTER_ALWAYS_FALSE = " WHERE d1__COLUMN_NAME > 100"; private static final String QUERY_FILTER_OR_ALWAYS_FALSE = " WHERE d1__COLUMN_NAME > 100 OR d1__COLUMN_NAME < 0"; @@ -199,7 +204,8 @@ public void testUnsupportedFilters() { String query = String.format("SELECT %s FROM %s", _aggregation, TABLE_NAME); testUnsupportedFilter(query + QUERY_FILTER_OR_MULTIPLE_DIMENSIONS); testUnsupportedFilter(query + QUERY_FILTER_OR_ON_AND); - testUnsupportedFilter(query + QUERY_FILTER_OR_ON_NOT); + testUnsupportedFilter(query + QUERY_FILTER_NOT_ON_AND); + testUnsupportedFilter(query + QUERY_FILTER_NOT_ON_OR); testUnsupportedFilter(query + QUERY_FILTER_ALWAYS_FALSE); testUnsupportedFilter(query + QUERY_FILTER_OR_ALWAYS_FALSE); } @@ -213,6 +219,10 @@ public void testQueries() testQuery(query); testQuery(query + QUERY_FILTER_AND); testQuery(query + QUERY_FILTER_OR); + testQuery(query + QUERY_FILTER_NOT); + testQuery(query + QUERY_FILTER_AND_NOT); + testQuery(query + QUERY_FILTER_OR_NOT); + testQuery(query + QUERY_NOT_NOT); testQuery(query + QUERY_FILTER_COMPLEX_OR_MULTIPLE_DIMENSIONS); testQuery(query + QUERY_FILTER_COMPLEX_AND_MULTIPLE_DIMENSIONS_THREE_PREDICATES); testQuery(query + QUERY_FILTER_COMPLEX_OR_MULTIPLE_DIMENSIONS_THREE_PREDICATES); diff --git a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkScanDocIdIterators.java b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkScanDocIdIterators.java index 718d99e9baf5..9669b82d91bb 100644 --- a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkScanDocIdIterators.java +++ b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkScanDocIdIterators.java @@ -179,21 +179,11 @@ public boolean applyMV(int[] values, int length) { return false; } - @Override - public int getNumMatchingDictIds() { - return 0; - } - @Override public int[] getMatchingDictIds() { return new int[0]; } - @Override - public int getNumNonMatchingDictIds() { - return 0; - } - @Override public int[] getNonMatchingDictIds() { return new int[0]; From 0be51ca9558580e5f03979a1b65972459d83aaf7 Mon Sep 17 00:00:00 2001 From: Xiang Fu Date: Sat, 27 Apr 2024 14:22:37 +0800 Subject: [PATCH 41/58] Allow apply both environment variables and system properties to user and table configs, Environment variables take precedence over system properties (#13011) --- .../common/metadata/ZKMetadataProvider.java | 4 +- .../apache/pinot/spi/config/ConfigUtils.java | 41 ++++++++++++------- .../pinot/spi/config/ConfigUtilsTest.java | 30 +++++++++++--- 3 files changed, 52 insertions(+), 23 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/metadata/ZKMetadataProvider.java b/pinot-common/src/main/java/org/apache/pinot/common/metadata/ZKMetadataProvider.java index d69d386a261f..0fdf94388a0b 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/metadata/ZKMetadataProvider.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/metadata/ZKMetadataProvider.java @@ -313,7 +313,7 @@ public static UserConfig getUserConfig(ZkHelixPropertyStore propertySt } try { UserConfig userConfig = AccessControlUserConfigUtils.fromZNRecord(znRecord); - return ConfigUtils.applyConfigWithEnvVariables(userConfig); + return ConfigUtils.applyConfigWithEnvVariablesAndSystemProperties(userConfig); } catch (Exception e) { LOGGER.error("Caught exception while getting user configuration for user: {}", username, e); return null; @@ -422,7 +422,7 @@ private static TableConfig toTableConfig(@Nullable ZNRecord znRecord) { } try { TableConfig tableConfig = TableConfigUtils.fromZNRecord(znRecord); - return ConfigUtils.applyConfigWithEnvVariables(tableConfig); + return ConfigUtils.applyConfigWithEnvVariablesAndSystemProperties(tableConfig); } catch (Exception e) { LOGGER.error("Caught exception while creating table config from ZNRecord: {}", znRecord.getId(), e); return null; diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/config/ConfigUtils.java b/pinot-spi/src/main/java/org/apache/pinot/spi/config/ConfigUtils.java index 5bd637411bbd..289eef7db00a 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/config/ConfigUtils.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/config/ConfigUtils.java @@ -23,6 +23,7 @@ import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.JsonNodeType; import java.io.IOException; +import java.util.HashMap; import java.util.Iterator; import java.util.Map; import org.apache.pinot.spi.utils.JsonUtils; @@ -35,37 +36,47 @@ private ConfigUtils() { private static final Map ENVIRONMENT_VARIABLES = System.getenv(); /** - * Apply environment variables to any given BaseJsonConfig. + * Apply system properties and environment variables to any given BaseJsonConfig. + * Environment variables take precedence over system properties. + * Since the System properties are mutable, this method will read it at runtime. * - * @return Config with environment variable applied. + * @return Config with both system properties and environment variables applied. */ - public static T applyConfigWithEnvVariables(T config) { - return applyConfigWithEnvVariables(ENVIRONMENT_VARIABLES, config); + public static T applyConfigWithEnvVariablesAndSystemProperties(T config) { + Map combinedMap = new HashMap<>(); + // Add all system properties to the map + System.getProperties().forEach((key, value) -> combinedMap.put(String.valueOf(key), String.valueOf(value))); + // Add all environment variables to the map, potentially overwriting system properties + combinedMap.putAll(ENVIRONMENT_VARIABLES); + return applyConfigWithEnvVariablesAndSystemProperties(combinedMap, config); } /** - * Apply environment variables to any given BaseJsonConfig. + * Apply a map of config to any given BaseJsonConfig with templates. * - * @return Config with environment variable applied. + * @return Config with the configs applied. */ - public static T applyConfigWithEnvVariables(Map environment, T config) { + public static T applyConfigWithEnvVariablesAndSystemProperties( + Map configValues, T configTemplate) { JsonNode jsonNode; try { - jsonNode = applyConfigWithEnvVariables(environment, config.toJsonNode()); + jsonNode = applyConfigWithEnvVariablesAndSystemProperties(configValues, configTemplate.toJsonNode()); } catch (RuntimeException e) { throw new RuntimeException(String - .format("Unable to apply environment variables on json config class [%s].", config.getClass().getName()), e); + .format("Unable to apply environment variables on json config class [%s].", + configTemplate.getClass().getName()), e); } try { - return (T) JsonUtils.jsonNodeToObject(jsonNode, config.getClass()); + return (T) JsonUtils.jsonNodeToObject(jsonNode, configTemplate.getClass()); } catch (IOException e) { throw new RuntimeException(String .format("Unable to read JsonConfig to class [%s] after applying environment variables, jsonConfig is: '%s'.", - config.getClass().getName(), jsonNode.toString()), e); + configTemplate.getClass().getName(), jsonNode.toString()), e); } } - private static JsonNode applyConfigWithEnvVariables(Map environment, JsonNode jsonNode) { + private static JsonNode applyConfigWithEnvVariablesAndSystemProperties(Map configValues, + JsonNode jsonNode) { final JsonNodeType nodeType = jsonNode.getNodeType(); switch (nodeType) { case OBJECT: @@ -73,7 +84,7 @@ private static JsonNode applyConfigWithEnvVariables(Map environm Iterator> iterator = jsonNode.fields(); while (iterator.hasNext()) { final Map.Entry next = iterator.next(); - next.setValue(applyConfigWithEnvVariables(environment, next.getValue())); + next.setValue(applyConfigWithEnvVariablesAndSystemProperties(configValues, next.getValue())); } } break; @@ -82,7 +93,7 @@ private static JsonNode applyConfigWithEnvVariables(Map environm ArrayNode arrayNode = (ArrayNode) jsonNode; for (int i = 0; i < arrayNode.size(); i++) { JsonNode arrayElement = arrayNode.get(i); - arrayNode.set(i, applyConfigWithEnvVariables(environment, arrayElement)); + arrayNode.set(i, applyConfigWithEnvVariablesAndSystemProperties(configValues, arrayElement)); } } break; @@ -91,7 +102,7 @@ private static JsonNode applyConfigWithEnvVariables(Map environm if (field.startsWith("${") && field.endsWith("}")) { String[] envVarSplits = field.substring(2, field.length() - 1).split(":", 2); String envVarKey = envVarSplits[0]; - String value = environment.get(envVarKey); + String value = configValues.get(envVarKey); if (value != null) { return JsonNodeFactory.instance.textNode(value); } else if (envVarSplits.length > 1) { diff --git a/pinot-spi/src/test/java/org/apache/pinot/spi/config/ConfigUtilsTest.java b/pinot-spi/src/test/java/org/apache/pinot/spi/config/ConfigUtilsTest.java index 07028659faf1..296bd00d2774 100644 --- a/pinot-spi/src/test/java/org/apache/pinot/spi/config/ConfigUtilsTest.java +++ b/pinot-spi/src/test/java/org/apache/pinot/spi/config/ConfigUtilsTest.java @@ -39,6 +39,25 @@ public class ConfigUtilsTest { @Test public void testIndexing() { + Map environment = + ImmutableMap.of("LOAD_MODE", "MMAP", "AWS_ACCESS_KEY", "default_aws_access_key", "AWS_SECRET_KEY", + "default_aws_secret_key"); + testIndexingWithConfig(environment); + } + + @Test + public void testIndexingWithSystemProperties() { + // Use default System properties + System.setProperty("LOAD_MODE", "MMAP"); + System.setProperty("AWS_ACCESS_KEY", "default_aws_access_key"); + System.setProperty("AWS_SECRET_KEY", "default_aws_secret_key"); + testIndexingWithConfig(null); + System.clearProperty("LOAD_MODE"); + System.clearProperty("AWS_ACCESS_KEY"); + System.clearProperty("AWS_SECRET_KEY"); + } + + private void testIndexingWithConfig(Map configOverride) { IndexingConfig indexingConfig = new IndexingConfig(); indexingConfig.setLoadMode("${LOAD_MODE}"); indexingConfig.setAggregateMetrics(true); @@ -80,12 +99,11 @@ public void testIndexing() { streamConfigMap.put(StreamConfigProperties.constructStreamProperty(streamType, "aws.secretKey"), "${AWS_SECRET_KEY}"); indexingConfig.setStreamConfigs(streamConfigMap); - - Map environment = - ImmutableMap.of("LOAD_MODE", "MMAP", "AWS_ACCESS_KEY", "default_aws_access_key", "AWS_SECRET_KEY", - "default_aws_secret_key"); - - indexingConfig = ConfigUtils.applyConfigWithEnvVariables(environment, indexingConfig); + if (configOverride != null) { + indexingConfig = ConfigUtils.applyConfigWithEnvVariablesAndSystemProperties(configOverride, indexingConfig); + } else { + indexingConfig = ConfigUtils.applyConfigWithEnvVariablesAndSystemProperties(indexingConfig); + } assertEquals(indexingConfig.getLoadMode(), "MMAP"); assertTrue(indexingConfig.isAggregateMetrics()); assertEquals(indexingConfig.getInvertedIndexColumns(), invertedIndexColumns); From fc967d0d15ff0296b58c52d28d472b95569ffd37 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Apr 2024 11:03:13 -0700 Subject: [PATCH 42/58] Bump org.testng:testng from 7.10.1 to 7.10.2 (#13021) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 8a9e80ae6e45..a3128a7db4d8 100644 --- a/pom.xml +++ b/pom.xml @@ -175,7 +175,7 @@ 2.25.39 2.12.7 3.1.12 - 7.10.1 + 7.10.2 6.6.2 8.3.4 From 2a7f3205935db8d637c2512652ebe97eebf74f9d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Apr 2024 11:04:42 -0700 Subject: [PATCH 43/58] Bump aws.sdk.version from 2.25.39 to 2.25.40 (#13022) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index a3128a7db4d8..0a3a2940b4fb 100644 --- a/pom.xml +++ b/pom.xml @@ -172,7 +172,7 @@ 0.15.0 0.4.4 4.2.2 - 2.25.39 + 2.25.40 2.12.7 3.1.12 7.10.2 From e2cadfabf11ea2effdc0b45cdec96c4d86819676 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Apr 2024 11:05:04 -0700 Subject: [PATCH 44/58] Bump com.google.errorprone:error_prone_annotations from 2.26.1 to 2.27.0 (#13023) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 0a3a2940b4fb..6de9cfc63a94 100644 --- a/pom.xml +++ b/pom.xml @@ -884,7 +884,7 @@ com.google.errorprone error_prone_annotations - 2.26.1 + 2.27.0 From bbf63c7d72c6aa0aaf79e282d8e373f26a0a5bfa Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Apr 2024 11:05:25 -0700 Subject: [PATCH 45/58] Bump org.apache.datasketches:datasketches-java from 5.0.2 to 6.0.0 (#13024) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 6de9cfc63a94..b0e6fa2cb885 100644 --- a/pom.xml +++ b/pom.xml @@ -1223,7 +1223,7 @@ org.apache.datasketches datasketches-java - 5.0.2 + 6.0.0 com.dynatrace.hash4j From bdfb34a4b82f2c700dfd476c7734bb6cb076ec18 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Apr 2024 11:05:42 -0700 Subject: [PATCH 46/58] Bump commons-codec:commons-codec from 1.16.1 to 1.17.0 (#13025) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index b0e6fa2cb885..656d28d4baef 100644 --- a/pom.xml +++ b/pom.xml @@ -192,7 +192,7 @@ 1.10.0 2.10.1 2.16.1 - 1.16.1 + 1.17.0 1.7.0 3.10.0 1.8.0 From 14651a2fef171141848f603cc1997972cabd7f66 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Apr 2024 11:13:04 -0700 Subject: [PATCH 47/58] Bump com.puppycrawl.tools:checkstyle from 10.15.0 to 10.16.0 (#13027) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 656d28d4baef..e362a240fced 100644 --- a/pom.xml +++ b/pom.xml @@ -1942,7 +1942,7 @@ com.puppycrawl.tools checkstyle - 10.15.0 + 10.16.0 From 7b06b9f5c3b945c585eba0fa3a7118845e58802e Mon Sep 17 00:00:00 2001 From: Aditya Mahajan Date: Tue, 30 Apr 2024 01:54:15 +0530 Subject: [PATCH 48/58] Issue #12367 (#12922) --- .../function/CaseTransformFunctionTest.java | 31 ++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/CaseTransformFunctionTest.java b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/CaseTransformFunctionTest.java index 79a415e5a92f..315b53e9e08d 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/CaseTransformFunctionTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/CaseTransformFunctionTest.java @@ -36,6 +36,7 @@ import org.testng.annotations.Test; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotEquals; public class CaseTransformFunctionTest extends BaseTransformFunctionTest { @@ -106,7 +107,8 @@ public void testCaseTransformFunctionWithIntResults() { testCaseQueries(String.format("%s(%s, %s)", functionType.getName(), LONG_SV_COLUMN, String.format("%d", _longSVValues[INDEX_TO_COMPARE])), getPredicateResults(LONG_SV_COLUMN, functionType)); testCaseQueries(String.format("%s(%s, %s)", functionType.getName(), FLOAT_SV_COLUMN, - String.format("%f", _floatSVValues[INDEX_TO_COMPARE])), getPredicateResults(FLOAT_SV_COLUMN, functionType)); + "CAST(" + String.format("%f", _floatSVValues[INDEX_TO_COMPARE]) + " AS FLOAT)"), + getPredicateResults(FLOAT_SV_COLUMN, functionType)); testCaseQueries(String.format("%s(%s, %s)", functionType.getName(), DOUBLE_SV_COLUMN, String.format("%.20f", _doubleSVValues[INDEX_TO_COMPARE])), getPredicateResults(DOUBLE_SV_COLUMN, functionType)); @@ -116,6 +118,33 @@ public void testCaseTransformFunctionWithIntResults() { } } + @Test + public void testCaseTransformFunctionWithoutCastForFloatValues() { + boolean[] predicateResults = new boolean[1]; + Arrays.fill(predicateResults, true); + int[] expectedValues = new int[1]; + int index = -1; + for (int i = 0; i < NUM_ROWS; i++) { + if (Double.compare(_floatSVValues[i], Double.parseDouble(String.format("%f", _floatSVValues[i]))) != 0) { + index = i; + expectedValues[0] = predicateResults[0] ? _intSVValues[i] : 10; + break; + } + } + + if (index != -1) { + String predicate = String.format("%s(%s, %s)", TransformFunctionType.EQUALS, FLOAT_SV_COLUMN, + String.format("%f", _floatSVValues[index])); + String expression = String.format("CASE WHEN %s THEN %s ELSE 10 END", predicate, INT_SV_COLUMN); + ExpressionContext expressionContext = RequestContextUtils.getExpression(expression); + TransformFunction transformFunction = TransformFunctionFactory.get(expressionContext, _dataSourceMap); + Assert.assertTrue(transformFunction instanceof CaseTransformFunction); + assertEquals(transformFunction.getResultMetadata().getDataType(), DataType.INT); + int[] intValues = transformFunction.transformToIntValuesSV(_projectionBlock); + assertNotEquals(intValues[index], expectedValues[0]); + } + } + @DataProvider public static String[] illegalExpressions() { //@formatter:off From 475708f14f3f27a3a8b03ae710d84e2cb70eda74 Mon Sep 17 00:00:00 2001 From: Yash Mayya Date: Wed, 1 May 2024 00:31:22 +0530 Subject: [PATCH 49/58] Use try-with-resources to close file walk stream in LocalPinotFS (#13029) --- .../apache/pinot/spi/filesystem/LocalPinotFS.java | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/filesystem/LocalPinotFS.java b/pinot-spi/src/main/java/org/apache/pinot/spi/filesystem/LocalPinotFS.java index 5eae4d92671f..7fd8ca5906ea 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/filesystem/LocalPinotFS.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/filesystem/LocalPinotFS.java @@ -32,6 +32,7 @@ import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; +import java.util.stream.Stream; import org.apache.commons.io.FileUtils; import org.apache.pinot.spi.env.PinotConfiguration; @@ -112,8 +113,9 @@ public String[] listFiles(URI fileUri, boolean recursive) if (!recursive) { return Arrays.stream(file.list()).map(s -> new File(file, s)).map(File::getAbsolutePath).toArray(String[]::new); } else { - return Files.walk(Paths.get(fileUri)). - filter(s -> !s.equals(file.toPath())).map(Path::toString).toArray(String[]::new); + try (Stream pathStream = Files.walk(Paths.get(fileUri))) { + return pathStream.filter(s -> !s.equals(file.toPath())).map(Path::toString).toArray(String[]::new); + } } } @@ -124,8 +126,10 @@ public List listFilesWithMetadata(URI fileUri, boolean recursive) if (!recursive) { return Arrays.stream(file.list()).map(s -> getFileMetadata(new File(file, s))).collect(Collectors.toList()); } else { - return Files.walk(Paths.get(fileUri)).filter(s -> !s.equals(file.toPath())).map(p -> getFileMetadata(p.toFile())) - .collect(Collectors.toList()); + try (Stream pathStream = Files.walk(Paths.get(fileUri))) { + return pathStream.filter(s -> !s.equals(file.toPath())).map(p -> getFileMetadata(p.toFile())) + .collect(Collectors.toList()); + } } } From 7413e993ee784e34dcc00942cb24f594333d206e Mon Sep 17 00:00:00 2001 From: "Xiaotian (Jackie) Jiang" <17555551+Jackie-Jiang@users.noreply.github.com> Date: Tue, 30 Apr 2024 12:29:03 -0700 Subject: [PATCH 50/58] Upgrade s3mock to 2.17.0 (#13028) --- pinot-plugins/pinot-file-system/pinot-s3/pom.xml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pinot-plugins/pinot-file-system/pinot-s3/pom.xml b/pinot-plugins/pinot-file-system/pinot-s3/pom.xml index 0f4d2eea78f0..d94330574fa8 100644 --- a/pinot-plugins/pinot-file-system/pinot-s3/pom.xml +++ b/pinot-plugins/pinot-file-system/pinot-s3/pom.xml @@ -35,15 +35,10 @@ https://pinot.apache.org ${basedir}/../../.. - 2.12.2 + 2.17.0 - - org.apache.pinot - pinot-spi - - software.amazon.awssdk s3 From ea0c71b3c88e800ba2fb610826a85e188d896150 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 30 Apr 2024 12:37:53 -0700 Subject: [PATCH 51/58] Bump org.scala-lang:scala-library from 2.11.11 to 2.11.12 and from 2.12.18 to 2.12.19 (#13034) --- .../pinot-batch-ingestion-spark-2.4/pom.xml | 2 +- pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-2.4/pom.xml b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-2.4/pom.xml index 748d6de20c34..bf0d055afa3e 100644 --- a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-2.4/pom.xml +++ b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-2.4/pom.xml @@ -38,7 +38,7 @@ package 2.11 2.4.6 - 2.11.11 + 2.11.12 diff --git a/pom.xml b/pom.xml index e362a240fced..9683078cbf3d 100644 --- a/pom.xml +++ b/pom.xml @@ -237,7 +237,7 @@ 1.61.1 - 2.12.18 + 2.12.19 2.12 From f1530112ec7d5a07d564637e425dc611f557a93c Mon Sep 17 00:00:00 2001 From: Abhishek Sharma Date: Tue, 30 Apr 2024 17:16:29 -0400 Subject: [PATCH 52/58] Upgrade jna to version 5.14.0 for Mac M1/M2 local execution support (#13018) --- LICENSE-binary | 4 ++-- pinot-plugins/pinot-file-system/pinot-adls/pom.xml | 10 ---------- pom.xml | 8 +++++++- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index 5944cc2bba50..0303ffe9c16b 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -371,8 +371,8 @@ jakarta.validation:jakarta.validation-api:2.0.2 javax.inject:javax.inject:1 javax.validation:validation-api:2.0.1.Final joda-time:joda-time:2.12.5 -net.java.dev.jna:jna-platform:5.6.0 -net.java.dev.jna:jna:5.5.0 +net.java.dev.jna:jna-platform:5.14.0 +net.java.dev.jna:jna:5.14.0 net.minidev:accessors-smart:2.5.0 net.minidev:json-smart:2.5.0 net.openhft:chronicle-analytics:2.24ea0 diff --git a/pinot-plugins/pinot-file-system/pinot-adls/pom.xml b/pinot-plugins/pinot-file-system/pinot-adls/pom.xml index f2f31cb65f6a..33767b2f63f1 100644 --- a/pinot-plugins/pinot-file-system/pinot-adls/pom.xml +++ b/pinot-plugins/pinot-file-system/pinot-adls/pom.xml @@ -64,16 +64,6 @@ reactor-core 3.6.5 - - net.java.dev.jna - jna-platform - 5.14.0 - - - net.java.dev.jna - jna - 5.6.0 - com.microsoft.azure msal4j diff --git a/pom.xml b/pom.xml index 9683078cbf3d..e3d1f26a9bfc 100644 --- a/pom.xml +++ b/pom.xml @@ -251,6 +251,7 @@ 9.37.3 1.78 0.26 + 5.14.0 @@ -1442,10 +1443,15 @@ asm 9.7 + + net.java.dev.jna + jna-platform + ${jna.version} + net.java.dev.jna jna - 5.5.0 + ${jna.version} From 087fca37d03bf18930ecaad55b2955ef5d20ecac Mon Sep 17 00:00:00 2001 From: "Xiaotian (Jackie) Jiang" <17555551+Jackie-Jiang@users.noreply.github.com> Date: Tue, 30 Apr 2024 14:17:08 -0700 Subject: [PATCH 53/58] Ensure all the lists used in PinotQuery are ArrayList (#13017) --- .../BaseBrokerRequestHandler.java | 20 +- .../common/utils/request/RequestUtils.java | 39 ++ .../pinot/sql/parsers/CalciteSqlParser.java | 83 +-- .../sql/parsers/rewriter/ClpRewriter.java | 77 +-- .../parsers/rewriter/ExprMinMaxRewriter.java | 4 +- ...egationGroupByToDistinctQueryRewriter.java | 11 +- .../sql/parsers/rewriter/OrdinalsUpdater.java | 8 +- .../rewriter/PredicateComparisonRewriter.java | 20 +- .../sql/parsers/CalciteSqlCompilerTest.java | 654 +++++++++--------- .../filter/MergeEqInFilterOptimizer.java | 9 +- .../filter/MergeRangeFilterOptimizer.java | 7 +- .../filter/TextMatchFilterOptimizer.java | 22 +- .../filter/TimePredicateFilterOptimizer.java | 19 +- .../maker/QueryOverrideWithHintsTest.java | 12 +- .../query/optimizer/QueryOptimizerTest.java | 12 +- .../plan/server/ServerPlanRequestUtils.java | 48 +- 16 files changed, 505 insertions(+), 540 deletions(-) diff --git a/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java b/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java index b2d4e24d3f32..2fdc36e1ea3f 100644 --- a/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java +++ b/pinot-broker/src/main/java/org/apache/pinot/broker/requesthandler/BaseBrokerRequestHandler.java @@ -24,7 +24,6 @@ import com.google.common.collect.ImmutableMap; import java.net.URI; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.List; @@ -1361,8 +1360,7 @@ private static void handleApproximateFunctionOverride(Expression expression) { try { int percentile = Integer.parseInt(remainingFunctionName); function.setOperator("percentilesmarttdigest"); - function.setOperands( - Arrays.asList(function.getOperands().get(0), RequestUtils.getLiteralExpression(percentile))); + function.addToOperands(RequestUtils.getLiteralExpression(percentile)); } catch (Exception e) { throw new BadQueryRequestException("Illegal function name: " + functionName); } @@ -1370,8 +1368,7 @@ private static void handleApproximateFunctionOverride(Expression expression) { try { int percentile = Integer.parseInt(remainingFunctionName.substring(0, remainingFunctionName.length() - 2)); function.setOperator("percentilesmarttdigest"); - function.setOperands( - Arrays.asList(function.getOperands().get(0), RequestUtils.getLiteralExpression(percentile))); + function.addToOperands(RequestUtils.getLiteralExpression(percentile)); } catch (Exception e) { throw new BadQueryRequestException("Illegal function name: " + functionName); } @@ -1849,18 +1846,17 @@ static void validateRequest(PinotQuery pinotQuery, int queryResponseLimit) { */ private static void attachTimeBoundary(PinotQuery pinotQuery, TimeBoundaryInfo timeBoundaryInfo, boolean isOfflineRequest) { + String functionName = isOfflineRequest ? FilterKind.LESS_THAN_OR_EQUAL.name() : FilterKind.GREATER_THAN.name(); String timeColumn = timeBoundaryInfo.getTimeColumn(); String timeValue = timeBoundaryInfo.getTimeValue(); - Expression timeFilterExpression = RequestUtils.getFunctionExpression( - isOfflineRequest ? FilterKind.LESS_THAN_OR_EQUAL.name() : FilterKind.GREATER_THAN.name()); - timeFilterExpression.getFunctionCall().setOperands( - Arrays.asList(RequestUtils.getIdentifierExpression(timeColumn), RequestUtils.getLiteralExpression(timeValue))); + Expression timeFilterExpression = + RequestUtils.getFunctionExpression(functionName, RequestUtils.getIdentifierExpression(timeColumn), + RequestUtils.getLiteralExpression(timeValue)); Expression filterExpression = pinotQuery.getFilterExpression(); if (filterExpression != null) { - Expression andFilterExpression = RequestUtils.getFunctionExpression(FilterKind.AND.name()); - andFilterExpression.getFunctionCall().setOperands(Arrays.asList(filterExpression, timeFilterExpression)); - pinotQuery.setFilterExpression(andFilterExpression); + pinotQuery.setFilterExpression( + RequestUtils.getFunctionExpression(FilterKind.AND.name(), filterExpression, timeFilterExpression)); } else { pinotQuery.setFilterExpression(timeFilterExpression); } diff --git a/pinot-common/src/main/java/org/apache/pinot/common/utils/request/RequestUtils.java b/pinot-common/src/main/java/org/apache/pinot/common/utils/request/RequestUtils.java index 42abb0b80dd9..f6818371a059 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/utils/request/RequestUtils.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/utils/request/RequestUtils.java @@ -24,8 +24,10 @@ import com.google.common.base.Splitter; import com.google.common.collect.ImmutableSet; import java.math.BigDecimal; +import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -301,6 +303,43 @@ public static Expression getLiteralExpression(Object object) { return RequestUtils.getLiteralExpression(object.toString()); } + public static Function getFunction(String canonicalName, List operands) { + Function function = new Function(canonicalName); + function.setOperands(operands); + return function; + } + + public static Function getFunction(String canonicalName, Expression operand) { + // NOTE: Create an ArrayList because we might need to modify the list later + List operands = new ArrayList<>(1); + operands.add(operand); + return getFunction(canonicalName, operands); + } + + public static Function getFunction(String canonicalName, Expression... operands) { + // NOTE: Create an ArrayList because we might need to modify the list later + return getFunction(canonicalName, new ArrayList<>(Arrays.asList(operands))); + } + + public static Expression getFunctionExpression(Function function) { + Expression expression = new Expression(ExpressionType.FUNCTION); + expression.setFunctionCall(function); + return expression; + } + + public static Expression getFunctionExpression(String canonicalName, List operands) { + return getFunctionExpression(getFunction(canonicalName, operands)); + } + + public static Expression getFunctionExpression(String canonicalName, Expression operand) { + return getFunctionExpression(getFunction(canonicalName, operand)); + } + + public static Expression getFunctionExpression(String canonicalName, Expression... operands) { + return getFunctionExpression(getFunction(canonicalName, operands)); + } + + @Deprecated public static Expression getFunctionExpression(String canonicalName) { assert canonicalName.equalsIgnoreCase(canonicalizeFunctionNamePreservingSpecialKey(canonicalName)); Expression expression = new Expression(ExpressionType.FUNCTION); diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java index 3232cfad2cc9..0787aacf9fe8 100644 --- a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java +++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/CalciteSqlParser.java @@ -23,10 +23,8 @@ import com.google.common.collect.ImmutableSet; import java.io.StringReader; import java.util.ArrayList; -import java.util.Collections; import java.util.HashMap; import java.util.HashSet; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; @@ -379,8 +377,7 @@ public static Set extractIdentifiers(List expressions, boole Function function = expression.getFunctionCall(); if (function != null) { if (excludeAs && function.getOperator().equals("as")) { - identifiers.addAll( - extractIdentifiers(new ArrayList<>(Collections.singletonList(function.getOperands().get(0))), true)); + identifiers.addAll(extractIdentifiers(List.of(function.getOperands().get(0)), true)); } else { identifiers.addAll(extractIdentifiers(function.getOperands(), excludeAs)); } @@ -589,25 +586,22 @@ private static Map extractOptionsMap(List optionsStateme } private static List convertDistinctSelectList(SqlNodeList selectList) { - List selectExpr = new ArrayList<>(); + // NOTE: Create an ArrayList because we might need to modify the list later + List selectExpr = new ArrayList<>(1); selectExpr.add(convertDistinctAndSelectListToFunctionExpression(selectList)); return selectExpr; } private static List convertSelectList(SqlNodeList selectList) { - List selectExpr = new ArrayList<>(); - - final Iterator iterator = selectList.iterator(); - while (iterator.hasNext()) { - final SqlNode next = iterator.next(); - selectExpr.add(toExpression(next)); + List selectExpr = new ArrayList<>(selectList.size()); + for (SqlNode sqlNode : selectList) { + selectExpr.add(toExpression(sqlNode)); } - return selectExpr; } private static List convertOrderByList(SqlNodeList orderList) { - List orderByExpr = new ArrayList<>(); + List orderByExpr = new ArrayList<>(orderList.size()); for (SqlNode sqlNode : orderList) { orderByExpr.add(convertOrderBy(sqlNode, true)); } @@ -621,19 +615,17 @@ private static Expression convertOrderBy(SqlNode node, boolean createAscExpressi Expression expression; if (node.getKind() == SqlKind.NULLS_LAST) { SqlBasicCall basicCall = (SqlBasicCall) node; - expression = RequestUtils.getFunctionExpression(NULLS_LAST); - expression.getFunctionCall().addToOperands(convertOrderBy(basicCall.getOperandList().get(0), true)); + expression = + RequestUtils.getFunctionExpression(NULLS_LAST, convertOrderBy(basicCall.getOperandList().get(0), true)); } else if (node.getKind() == SqlKind.NULLS_FIRST) { SqlBasicCall basicCall = (SqlBasicCall) node; - expression = RequestUtils.getFunctionExpression(NULLS_FIRST); - expression.getFunctionCall().addToOperands(convertOrderBy(basicCall.getOperandList().get(0), true)); + expression = + RequestUtils.getFunctionExpression(NULLS_FIRST, convertOrderBy(basicCall.getOperandList().get(0), true)); } else if (node.getKind() == SqlKind.DESCENDING) { SqlBasicCall basicCall = (SqlBasicCall) node; - expression = RequestUtils.getFunctionExpression(DESC); - expression.getFunctionCall().addToOperands(convertOrderBy(basicCall.getOperandList().get(0), false)); + expression = RequestUtils.getFunctionExpression(DESC, convertOrderBy(basicCall.getOperandList().get(0), false)); } else if (createAscExpression) { - expression = RequestUtils.getFunctionExpression(ASC); - expression.getFunctionCall().addToOperands(toExpression(node)); + expression = RequestUtils.getFunctionExpression(ASC, toExpression(node)); } else { return toExpression(node); } @@ -648,7 +640,7 @@ private static Expression convertOrderBy(SqlNode node, boolean createAscExpressi * @return DISTINCT function expression */ private static Expression convertDistinctAndSelectListToFunctionExpression(SqlNodeList selectList) { - Expression functionExpression = RequestUtils.getFunctionExpression("distinct"); + List operands = new ArrayList<>(selectList.size()); for (SqlNode node : selectList) { Expression columnExpression = toExpression(node); if (columnExpression.getType() == ExpressionType.IDENTIFIER && columnExpression.getIdentifier().getName() @@ -662,9 +654,9 @@ private static Expression convertDistinctAndSelectListToFunctionExpression(SqlNo "Syntax error: Use of DISTINCT with aggregation functions is not supported"); } } - functionExpression.getFunctionCall().addToOperands(columnExpression); + operands.add(columnExpression); } - return functionExpression; + return RequestUtils.getFunctionExpression("distinct", operands); } private static Expression toExpression(SqlNode node) { @@ -705,10 +697,7 @@ private static Expression toExpression(SqlNode node) { return leftExpr; } } - Expression asFuncExpr = RequestUtils.getFunctionExpression("as"); - asFuncExpr.getFunctionCall().addToOperands(leftExpr); - asFuncExpr.getFunctionCall().addToOperands(rightExpr); - return asFuncExpr; + return RequestUtils.getFunctionExpression("as", leftExpr, rightExpr); case CASE: // CASE WHEN Statement is model as a function with variable length parameters. // Assume N is number of WHEN Statements, total number of parameters is (2 * N + 1). @@ -717,26 +706,22 @@ private static Expression toExpression(SqlNode node) { // - 1: Convert ELSE Statement into an Expression. SqlCase caseSqlNode = (SqlCase) node; SqlNodeList whenOperands = caseSqlNode.getWhenOperands(); + int numWhenOperands = whenOperands.size(); SqlNodeList thenOperands = caseSqlNode.getThenOperands(); + Preconditions.checkState(numWhenOperands == thenOperands.size()); SqlNode elseOperand = caseSqlNode.getElseOperand(); - Expression caseFuncExpr = RequestUtils.getFunctionExpression("case"); - Preconditions.checkState(whenOperands.size() == thenOperands.size()); - for (int i = 0; i < whenOperands.size(); i++) { - SqlNode whenSqlNode = whenOperands.get(i); - Expression whenExpression = toExpression(whenSqlNode); - caseFuncExpr.getFunctionCall().addToOperands(whenExpression); - - SqlNode thenSqlNode = thenOperands.get(i); - Expression thenExpression = toExpression(thenSqlNode); - caseFuncExpr.getFunctionCall().addToOperands(thenExpression); + List caseOperands = new ArrayList<>(2 * numWhenOperands + 1); + for (int i = 0; i < numWhenOperands; i++) { + caseOperands.add(toExpression(whenOperands.get(i))); + caseOperands.add(toExpression(thenOperands.get(i))); } Expression elseExpression = toExpression(elseOperand); if (isAggregateExpression(elseExpression)) { throw new SqlCompilationException( "Aggregation functions inside ELSE Clause is not supported - " + elseExpression); } - caseFuncExpr.getFunctionCall().addToOperands(elseExpression); - return caseFuncExpr; + caseOperands.add(elseExpression); + return RequestUtils.getFunctionExpression("case", caseOperands); default: if (node instanceof SqlDataTypeSpec) { // This is to handle expression like: CAST(col AS INT) @@ -808,15 +793,9 @@ private static Expression compileFunctionExpression(SqlBasicCall functionNode) { } } ParserUtils.validateFunction(canonicalName, operands); - Expression functionExpression = RequestUtils.getFunctionExpression(canonicalName); - functionExpression.getFunctionCall().setOperands(operands); + Expression functionExpression = RequestUtils.getFunctionExpression(canonicalName, operands); if (negated) { - Expression negatedFunctionExpression = RequestUtils.getFunctionExpression(FilterKind.NOT.name()); - // Do not use `Collections.singletonList()` because we might modify the operand later - List negatedFunctionOperands = new ArrayList<>(1); - negatedFunctionOperands.add(functionExpression); - negatedFunctionExpression.getFunctionCall().setOperands(negatedFunctionOperands); - return negatedFunctionExpression; + return RequestUtils.getFunctionExpression(FilterKind.NOT.name(), functionExpression); } else { return functionExpression; } @@ -886,9 +865,7 @@ private static Expression compileAndExpression(SqlBasicCall andNode) { operands.add(toExpression(childNode)); } } - Expression andExpression = RequestUtils.getFunctionExpression(FilterKind.AND.name()); - andExpression.getFunctionCall().setOperands(operands); - return andExpression; + return RequestUtils.getFunctionExpression(FilterKind.AND.name(), operands); } /** @@ -904,9 +881,7 @@ private static Expression compileOrExpression(SqlBasicCall orNode) { operands.add(toExpression(childNode)); } } - Expression andExpression = RequestUtils.getFunctionExpression(FilterKind.OR.name()); - andExpression.getFunctionCall().setOperands(operands); - return andExpression; + return RequestUtils.getFunctionExpression(FilterKind.OR.name(), operands); } public static boolean isLiteralOnlyExpression(Expression e) { diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/ClpRewriter.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/ClpRewriter.java index 4fdddac57d79..599545897520 100644 --- a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/ClpRewriter.java +++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/ClpRewriter.java @@ -23,6 +23,7 @@ import com.yscope.clp.compressorfrontend.ByteSegment; import com.yscope.clp.compressorfrontend.EightByteClpEncodedSubquery; import com.yscope.clp.compressorfrontend.EightByteClpWildcardQueryEncoder; +import java.util.ArrayList; import java.util.List; import javax.annotation.Nullable; import org.apache.calcite.sql.SqlKind; @@ -403,58 +404,39 @@ private void rewriteCLPDecodeFunction(Expression expression) { private ClpSqlSubqueryGenerationResult convertSubqueryToSql(String logtypeColumnName, String dictionaryVarsColumnName, String encodedVarsColumnName, String wildcardQuery, int subqueryIdx, EightByteClpEncodedSubquery[] subqueries) { EightByteClpEncodedSubquery subquery = subqueries[subqueryIdx]; - + Function logtypeMatchFunction = createLogtypeMatchFunction(logtypeColumnName, subquery.getLogtypeQueryAsString(), + subquery.logtypeQueryContainsWildcards()); if (!subquery.containsVariables()) { - Function f = createLogtypeMatchFunction(logtypeColumnName, subquery.getLogtypeQueryAsString(), - subquery.logtypeQueryContainsWildcards()); - return new ClpSqlSubqueryGenerationResult(false, f); + return new ClpSqlSubqueryGenerationResult(false, logtypeMatchFunction); } - Function subqueryFunc = new Function(SqlKind.AND.name()); - - Expression e; + List subqueryFunctionOperands = new ArrayList<>(); // Add logtype query - Function f = createLogtypeMatchFunction(logtypeColumnName, subquery.getLogtypeQueryAsString(), - subquery.logtypeQueryContainsWildcards()); - e = new Expression(ExpressionType.FUNCTION); - e.setFunctionCall(f); - subqueryFunc.addToOperands(e); + subqueryFunctionOperands.add(RequestUtils.getFunctionExpression(logtypeMatchFunction)); // Add any dictionary variables int numDictVars = 0; for (ByteSegment dictVar : subquery.getDictVars()) { - f = createStringColumnMatchFunction(SqlKind.EQUALS.name(), dictionaryVarsColumnName, dictVar.toString()); - e = new Expression(ExpressionType.FUNCTION); - e.setFunctionCall(f); - subqueryFunc.addToOperands(e); - - ++numDictVars; + subqueryFunctionOperands.add(RequestUtils.getFunctionExpression( + createStringColumnMatchFunction(SqlKind.EQUALS.name(), dictionaryVarsColumnName, dictVar.toString()))); + numDictVars++; } // Add any encoded variables int numEncodedVars = 0; for (long encodedVar : subquery.getEncodedVars()) { - f = new Function(SqlKind.EQUALS.name()); - f.addToOperands(RequestUtils.getIdentifierExpression(encodedVarsColumnName)); - f.addToOperands(RequestUtils.getLiteralExpression(encodedVar)); - - e = new Expression(ExpressionType.FUNCTION); - e.setFunctionCall(f); - subqueryFunc.addToOperands(e); - - ++numEncodedVars; + subqueryFunctionOperands.add(RequestUtils.getFunctionExpression(SqlKind.EQUALS.name(), + RequestUtils.getIdentifierExpression(encodedVarsColumnName), RequestUtils.getLiteralExpression(encodedVar))); + numEncodedVars++; } // Add any wildcard dictionary variables for (VariableWildcardQuery varWildcardQuery : subquery.getDictVarWildcardQueries()) { - f = createStringColumnMatchFunction(_REGEXP_LIKE_LOWERCASE_FUNCTION_NAME, dictionaryVarsColumnName, - wildcardQueryToRegex(varWildcardQuery.getQuery().toString())); - e = new Expression(ExpressionType.FUNCTION); - e.setFunctionCall(f); - subqueryFunc.addToOperands(e); - - ++numDictVars; + subqueryFunctionOperands.add(RequestUtils.getFunctionExpression( + createStringColumnMatchFunction(_REGEXP_LIKE_LOWERCASE_FUNCTION_NAME, dictionaryVarsColumnName, + wildcardQueryToRegex(varWildcardQuery.getQuery().toString())))); + numDictVars++; } // Add any wildcard encoded variables @@ -464,20 +446,14 @@ private ClpSqlSubqueryGenerationResult convertSubqueryToSql(String logtypeColumn // Create call to clpEncodedVarsMatch Expression clpEncodedVarsExp = RequestUtils.getFunctionExpression( RequestUtils.canonicalizeFunctionNamePreservingSpecialKey( - TransformFunctionType.CLP_ENCODED_VARS_MATCH.getName())); - f = clpEncodedVarsExp.getFunctionCall(); - f.addToOperands(RequestUtils.getIdentifierExpression(logtypeColumnName)); - f.addToOperands(RequestUtils.getIdentifierExpression(encodedVarsColumnName)); - f.addToOperands(RequestUtils.getLiteralExpression(wildcardQuery)); - f.addToOperands(RequestUtils.getLiteralExpression(subqueryIdx)); + TransformFunctionType.CLP_ENCODED_VARS_MATCH.getName()), + RequestUtils.getIdentifierExpression(logtypeColumnName), + RequestUtils.getIdentifierExpression(encodedVarsColumnName), RequestUtils.getLiteralExpression(wildcardQuery), + RequestUtils.getLiteralExpression(subqueryIdx)); // Create `clpEncodedVarsMatch(...) = true` - e = RequestUtils.getFunctionExpression(SqlKind.EQUALS.name()); - f = e.getFunctionCall(); - f.addToOperands(clpEncodedVarsExp); - f.addToOperands(RequestUtils.getLiteralExpression(true)); - - subqueryFunc.addToOperands(e); + subqueryFunctionOperands.add(RequestUtils.getFunctionExpression(SqlKind.EQUALS.name(), clpEncodedVarsExp, + RequestUtils.getLiteralExpression(true))); } // We require a decompress and match in the following cases: @@ -494,7 +470,8 @@ private ClpSqlSubqueryGenerationResult convertSubqueryToSql(String logtypeColumn // value "user dv123 joined" but it could also match "user dv456 joined dv123". boolean requiresDecompAndMatch = !(numDictVars < 2 && numEncodedVars < 2 && !subquery.logtypeQueryContainsWildcards()); - return new ClpSqlSubqueryGenerationResult(requiresDecompAndMatch, subqueryFunc); + return new ClpSqlSubqueryGenerationResult(requiresDecompAndMatch, + RequestUtils.getFunction(SqlKind.AND.name(), subqueryFunctionOperands)); } private Function createLogtypeMatchFunction(String columnName, String query, boolean containsWildcards) { @@ -511,10 +488,8 @@ private Function createLogtypeMatchFunction(String columnName, String query, boo } private Function createStringColumnMatchFunction(String canonicalName, String columnName, String query) { - Function func = new Function(canonicalName); - func.addToOperands(RequestUtils.getIdentifierExpression(columnName)); - func.addToOperands(RequestUtils.getLiteralExpression(query)); - return func; + return RequestUtils.getFunction(canonicalName, RequestUtils.getIdentifierExpression(columnName), + RequestUtils.getLiteralExpression(query)); } /** diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/ExprMinMaxRewriter.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/ExprMinMaxRewriter.java index 2363ae5c42ac..62ce44540a74 100644 --- a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/ExprMinMaxRewriter.java +++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/ExprMinMaxRewriter.java @@ -101,14 +101,12 @@ private void appendParentExprMinMaxFunctions(boolean isMax, List sel for (Map.Entry, Set> entry : exprMinMaxFunctionMap.entrySet()) { List measuringColumns = entry.getKey(); Set projectionColumns = entry.getValue(); - Expression functionExpression = RequestUtils.getFunctionExpression(isMax ? EXPR_MAX_PARENT : EXPR_MIN_PARENT); List operands = new ArrayList<>(2 + measuringColumns.size() + projectionColumns.size()); operands.add(RequestUtils.getLiteralExpression((int) exprMinMaxFunctionIDMap.get(measuringColumns))); operands.add(RequestUtils.getLiteralExpression(measuringColumns.size())); operands.addAll(measuringColumns); operands.addAll(projectionColumns); - functionExpression.getFunctionCall().setOperands(operands); - selectList.add(functionExpression); + selectList.add(RequestUtils.getFunctionExpression(isMax ? EXPR_MAX_PARENT : EXPR_MIN_PARENT, operands)); } } diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/NonAggregationGroupByToDistinctQueryRewriter.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/NonAggregationGroupByToDistinctQueryRewriter.java index 8005dbd759b4..2a51829f7ebf 100644 --- a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/NonAggregationGroupByToDistinctQueryRewriter.java +++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/NonAggregationGroupByToDistinctQueryRewriter.java @@ -18,8 +18,9 @@ */ package org.apache.pinot.sql.parsers.rewriter; -import java.util.Collections; +import java.util.ArrayList; import java.util.HashSet; +import java.util.List; import java.util.Set; import org.apache.pinot.common.request.Expression; import org.apache.pinot.common.request.Function; @@ -76,9 +77,11 @@ public PinotQuery rewrite(PinotQuery pinotQuery) { } Set groupByExpressions = new HashSet<>(pinotQuery.getGroupByList()); if (selectExpressions.equals(groupByExpressions)) { - Expression distinct = RequestUtils.getFunctionExpression("distinct"); - distinct.getFunctionCall().setOperands(pinotQuery.getSelectList()); - pinotQuery.setSelectList(Collections.singletonList(distinct)); + Expression distinct = RequestUtils.getFunctionExpression("distinct", pinotQuery.getSelectList()); + // NOTE: Create an ArrayList because we might need to modify the list later + List newSelectList = new ArrayList<>(1); + newSelectList.add(distinct); + pinotQuery.setSelectList(newSelectList); pinotQuery.setGroupByList(null); return pinotQuery; } else { diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/OrdinalsUpdater.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/OrdinalsUpdater.java index 204ccca2b3da..605f9488a589 100644 --- a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/OrdinalsUpdater.java +++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/OrdinalsUpdater.java @@ -18,7 +18,7 @@ */ package org.apache.pinot.sql.parsers.rewriter; -import java.util.Collections; +import java.util.ArrayList; import java.util.List; import org.apache.pinot.common.request.Expression; import org.apache.pinot.common.request.Function; @@ -49,8 +49,10 @@ public PinotQuery rewrite(PinotQuery pinotQuery) { if (isNullsLast != null) { functionToSet = functionToSet.getOperands().get(0).getFunctionCall(); } - functionToSet.setOperands( - Collections.singletonList(getExpressionFromOrdinal(pinotQuery.getSelectList(), ordinal))); + // NOTE: Create an ArrayList because we might need to modify the list later + List newOperands = new ArrayList<>(1); + newOperands.add(getExpressionFromOrdinal(pinotQuery.getSelectList(), ordinal)); + functionToSet.setOperands(newOperands); } } return pinotQuery; diff --git a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/PredicateComparisonRewriter.java b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/PredicateComparisonRewriter.java index c59b5126ec4a..929ef701a3bb 100644 --- a/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/PredicateComparisonRewriter.java +++ b/pinot-common/src/main/java/org/apache/pinot/sql/parsers/rewriter/PredicateComparisonRewriter.java @@ -19,8 +19,6 @@ package org.apache.pinot.sql.parsers.rewriter; import com.google.common.base.Preconditions; -import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import org.apache.commons.lang3.EnumUtils; import org.apache.pinot.common.request.Expression; @@ -96,10 +94,7 @@ private static Expression updateFunctionExpression(Expression expression) { case AND: case OR: case NOT: - for (int i = 0; i < operands.size(); i++) { - Expression operand = operands.get(i); - operands.set(i, updatePredicate(operand)); - } + operands.replaceAll(PredicateComparisonRewriter::updatePredicate); break; case EQUALS: case NOT_EQUALS: @@ -122,8 +117,7 @@ private static Expression updateFunctionExpression(Expression expression) { // Handle predicate like 'a > b' -> 'a - b > 0' if (!secondOperand.isSetLiteral()) { - Expression minusExpression = RequestUtils.getFunctionExpression("minus"); - minusExpression.getFunctionCall().setOperands(Arrays.asList(firstOperand, secondOperand)); + Expression minusExpression = RequestUtils.getFunctionExpression("minus", firstOperand, secondOperand); operands.set(0, minusExpression); operands.set(1, RequestUtils.getLiteralExpression(0)); break; @@ -181,14 +175,8 @@ private static Expression updateFunctionExpression(Expression expression) { * @return Rewritten expression */ private static Expression convertPredicateToEqualsBooleanExpression(Expression expression) { - Expression newExpression; - newExpression = RequestUtils.getFunctionExpression(FilterKind.EQUALS.name()); - List operands = new ArrayList<>(); - operands.add(expression); - operands.add(RequestUtils.getLiteralExpression(true)); - newExpression.getFunctionCall().setOperands(operands); - - return newExpression; + return RequestUtils.getFunctionExpression(FilterKind.EQUALS.name(), expression, + RequestUtils.getLiteralExpression(true)); } /** diff --git a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java index 55d607b6ed09..574d6ad429da 100644 --- a/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/sql/parsers/CalciteSqlCompilerTest.java @@ -21,6 +21,7 @@ import java.time.Instant; import java.time.ZoneId; import java.time.format.DateTimeFormatter; +import java.util.ArrayList; import java.util.List; import java.util.concurrent.TimeUnit; import org.apache.calcite.sql.SqlNumericLiteral; @@ -48,21 +49,66 @@ public class CalciteSqlCompilerTest { private static final long ONE_HOUR_IN_MS = TimeUnit.HOURS.toMillis(1); + /* Verify all lists in PinotQuery are ArrayLists because we might need to modify them during query optimization */ + + private Expression compileToExpression(String expressionStr) { + Expression expression = CalciteSqlParser.compileToExpression(expressionStr); + verifyListInExpression(expression); + return expression; + } + + private void verifyListInExpression(Expression expression) { + Function function = expression.getFunctionCall(); + if (function != null) { + verifyListInExpressions(function.getOperands()); + } + } + + private void verifyListInExpressions(List expressions) { + Assert.assertTrue(expressions instanceof ArrayList); + for (Expression expression : expressions) { + verifyListInExpression(expression); + } + } + + private PinotQuery compileToPinotQuery(String sql) { + PinotQuery query = CalciteSqlParser.compileToPinotQuery(sql); + List selectList = query.getSelectList(); + verifyListInExpressions(selectList); + Expression filterExpression = query.getFilterExpression(); + if (filterExpression != null) { + verifyListInExpression(filterExpression); + } + List groupByList = query.getGroupByList(); + if (groupByList != null) { + verifyListInExpressions(groupByList); + } + List orderByList = query.getOrderByList(); + if (orderByList != null) { + verifyListInExpressions(orderByList); + } + Expression havingExpression = query.getHavingExpression(); + if (havingExpression != null) { + verifyListInExpression(havingExpression); + } + return query; + } + @Test public void testCanonicalFunctionName() { - Expression expression = CalciteSqlParser.compileToExpression("dIsTiNcT_cOuNt(AbC)"); + Expression expression = compileToExpression("dIsTiNcT_cOuNt(AbC)"); Function function = expression.getFunctionCall(); Assert.assertEquals(function.getOperator(), AggregationFunctionType.DISTINCTCOUNT.name().toLowerCase()); Assert.assertEquals(function.getOperands().size(), 1); Assert.assertEquals(function.getOperands().get(0).getIdentifier().getName(), "AbC"); - expression = CalciteSqlParser.compileToExpression("ReGeXpLiKe(AbC)"); + expression = compileToExpression("ReGeXpLiKe(AbC)"); function = expression.getFunctionCall(); Assert.assertEquals(function.getOperator(), FilterKind.REGEXP_LIKE.name()); Assert.assertEquals(function.getOperands().size(), 1); Assert.assertEquals(function.getOperands().get(0).getIdentifier().getName(), "AbC"); - expression = CalciteSqlParser.compileToExpression("aBc > DeF"); + expression = compileToExpression("aBc > DeF"); function = expression.getFunctionCall(); Assert.assertEquals(function.getOperator(), FilterKind.GREATER_THAN.name()); Assert.assertEquals(function.getOperands().size(), 2); @@ -73,7 +119,7 @@ public void testCanonicalFunctionName() { @Test public void testCaseWhenStatements() { //@formatter:off - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery( + PinotQuery pinotQuery = compileToPinotQuery( "SELECT OrderID, Quantity,\n" + "CASE\n" + " WHEN Quantity > 30 THEN 'The quantity is greater than 30'\n" @@ -102,7 +148,7 @@ public void testCaseWhenStatements() { Assert.assertEquals(caseFunc.getOperands().get(4).getLiteral().getFieldValue(), "The quantity is under 30"); //@formatter:off - pinotQuery = CalciteSqlParser.compileToPinotQuery( + pinotQuery = compileToPinotQuery( "SELECT Quantity,\n" + "SUM(CASE\n" + " WHEN Quantity > 30 THEN 3\n" @@ -141,7 +187,7 @@ public void testCaseWhenStatements() { @Test public void testAggregationInCaseWhenStatementsWithGroupBy() { //@formatter:off - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery( + PinotQuery pinotQuery = compileToPinotQuery( "SELECT OrderID, SUM(Quantity),\n" + "CASE\n" + " WHEN sum(Quantity) > 30 THEN 'The quantity is greater than 30'\n" @@ -168,7 +214,7 @@ public void testAggregationInCaseWhenStatementsWithGroupBy() { @Test public void testAggregationInCaseWhenStatements() { //@formatter:off - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery( + PinotQuery pinotQuery = compileToPinotQuery( "SELECT sum(Quantity),\n" + "CASE\n" + " WHEN sum(Quantity) > 30 THEN 'The quantity is greater than 30'\n" @@ -193,24 +239,22 @@ public void testAggregationInCaseWhenStatements() { @Test public void testQuotedStrings() { - PinotQuery pinotQuery = - CalciteSqlParser.compileToPinotQuery("select * from vegetables where origin = 'Martha''s Vineyard'"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetables where origin = 'Martha''s Vineyard'"); Assert.assertEquals( pinotQuery.getFilterExpression().getFunctionCall().getOperands().get(1).getLiteral().getStringValue(), "Martha's Vineyard"); - pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetables where origin = 'Martha\"\"s Vineyard'"); + pinotQuery = compileToPinotQuery("select * from vegetables where origin = 'Martha\"\"s Vineyard'"); Assert.assertEquals( pinotQuery.getFilterExpression().getFunctionCall().getOperands().get(1).getLiteral().getStringValue(), "Martha\"\"s Vineyard"); - pinotQuery = - CalciteSqlParser.compileToPinotQuery("select * from vegetables where origin = \"Martha\"\"s Vineyard\""); + pinotQuery = compileToPinotQuery("select * from vegetables where origin = \"Martha\"\"s Vineyard\""); Assert.assertEquals( pinotQuery.getFilterExpression().getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(1) .getIdentifier().getName(), "Martha\"s Vineyard"); - pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetables where origin = \"Martha''s Vineyard\""); + pinotQuery = compileToPinotQuery("select * from vegetables where origin = \"Martha''s Vineyard\""); Assert.assertEquals( pinotQuery.getFilterExpression().getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(1) .getIdentifier().getName(), "Martha''s Vineyard"); @@ -220,28 +264,28 @@ public void testQuotedStrings() { public void testExtract() { { // Case 1 -- Year and date format ('2017-06-15') - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery("SELECT EXTRACT(YEAR FROM '2017-06-15')"); + PinotQuery pinotQuery = compileToPinotQuery("SELECT EXTRACT(YEAR FROM '2017-06-15')"); Function function = pinotQuery.getSelectList().get(0).getFunctionCall(); Assert.assertEquals(function.getOperands().get(0).getLiteral().getStringValue(), "YEAR"); Assert.assertEquals(function.getOperands().get(1).getLiteral().getStringValue(), "2017-06-15"); } { // Case 2 -- date format ('2017-06-15 09:34:21') - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery("SELECT EXTRACT(YEAR FROM '2017-06-15 09:34:21')"); + PinotQuery pinotQuery = compileToPinotQuery("SELECT EXTRACT(YEAR FROM '2017-06-15 09:34:21')"); Function function = pinotQuery.getSelectList().get(0).getFunctionCall(); Assert.assertEquals(function.getOperands().get(0).getLiteral().getStringValue(), "YEAR"); Assert.assertEquals(function.getOperands().get(1).getLiteral().getStringValue(), "2017-06-15 09:34:21"); } { // Case 3 -- Month - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery("SELECT EXTRACT(MONTH FROM '2017-06-15')"); + PinotQuery pinotQuery = compileToPinotQuery("SELECT EXTRACT(MONTH FROM '2017-06-15')"); Function function = pinotQuery.getSelectList().get(0).getFunctionCall(); Assert.assertEquals(function.getOperands().get(0).getLiteral().getStringValue(), "MONTH"); Assert.assertEquals(function.getOperands().get(1).getLiteral().getStringValue(), "2017-06-15"); } { // Case 4 -- Day - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery("SELECT EXTRACT(DAY FROM '2017-06-15')"); + PinotQuery pinotQuery = compileToPinotQuery("SELECT EXTRACT(DAY FROM '2017-06-15')"); Function function = pinotQuery.getSelectList().get(0).getFunctionCall(); Assert.assertEquals(function.getOperands().get(0).getLiteral().getStringValue(), "DAY"); Assert.assertEquals(function.getOperands().get(1).getLiteral().getStringValue(), "2017-06-15"); @@ -251,7 +295,7 @@ public void testExtract() { @Test public void testFilterClauses() { { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetables where a > 1.5"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetables where a > 1.5"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.GREATER_THAN.name()); Assert.assertEquals(func.getOperands().get(0).getIdentifier().getName(), "a"); @@ -259,7 +303,7 @@ public void testFilterClauses() { } { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetables where b < 100"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetables where b < 100"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.LESS_THAN.name()); Assert.assertEquals(func.getOperands().get(0).getIdentifier().getName(), "b"); @@ -267,7 +311,7 @@ public void testFilterClauses() { } { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetables where c >= 10"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetables where c >= 10"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.GREATER_THAN_OR_EQUAL.name()); Assert.assertEquals(func.getOperands().get(0).getIdentifier().getName(), "c"); @@ -275,7 +319,7 @@ public void testFilterClauses() { } { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetables where d <= 50"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetables where d <= 50"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.LESS_THAN_OR_EQUAL.name()); Assert.assertEquals(func.getOperands().get(0).getIdentifier().getName(), "d"); @@ -283,8 +327,7 @@ public void testFilterClauses() { } { - PinotQuery pinotQuery = - CalciteSqlParser.compileToPinotQuery("select * from vegetables where e BETWEEN 70 AND 80"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetables where e BETWEEN 70 AND 80"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.BETWEEN.name()); Assert.assertEquals(func.getOperands().get(0).getIdentifier().getName(), "e"); @@ -293,8 +336,7 @@ public void testFilterClauses() { } { - PinotQuery pinotQuery = - CalciteSqlParser.compileToPinotQuery("select * from vegetables where regexp_like(E, '^U.*')"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetables where regexp_like(E, '^U.*')"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), "REGEXP_LIKE"); Assert.assertEquals(func.getOperands().get(0).getIdentifier().getName(), "E"); @@ -302,8 +344,7 @@ public void testFilterClauses() { } { - PinotQuery pinotQuery = - CalciteSqlParser.compileToPinotQuery("select * from vegetables where g IN (12, 13, 15.2, 17)"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetables where g IN (12, 13, 15.2, 17)"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.IN.name()); Assert.assertEquals(func.getOperands().get(0).getIdentifier().getName(), "g"); @@ -314,7 +355,7 @@ public void testFilterClauses() { } { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetable where g"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetable where g"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.EQUALS.name()); Assert.assertEquals(func.getOperands().get(0).getIdentifier().getName(), "g"); @@ -322,7 +363,7 @@ public void testFilterClauses() { } { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetable where g or f = true"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetable where g or f = true"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.OR.name()); List operands = func.getOperands(); @@ -337,8 +378,7 @@ public void testFilterClauses() { } { - PinotQuery pinotQuery = - CalciteSqlParser.compileToPinotQuery("select * from vegetable where startsWith(g, 'str')"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetable where startsWith(g, 'str')"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.EQUALS.name()); Assert.assertEquals(func.getOperands().get(0).getFunctionCall().getOperator(), "startswith"); @@ -346,8 +386,8 @@ public void testFilterClauses() { } { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery( - "select * from vegetable where startsWith(g, 'str')=true and startsWith(f, 'str')"); + PinotQuery pinotQuery = + compileToPinotQuery("select * from vegetable where startsWith(g, 'str')=true and startsWith(f, 'str')"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.AND.name()); List operands = func.getOperands(); @@ -365,7 +405,7 @@ public void testFilterClauses() { } { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery( + PinotQuery pinotQuery = compileToPinotQuery( "select * from vegetable where (startsWith(g, 'str')=true and startsWith(f, 'str')) AND (e and d=true)"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.AND.name()); @@ -394,8 +434,7 @@ public void testFilterClauses() { } { - PinotQuery pinotQuery = - CalciteSqlParser.compileToPinotQuery("select * from vegetable where isSubnetOf('192.168.0.1/24', foo)"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetable where isSubnetOf('192.168.0.1/24', foo)"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.EQUALS.name()); List operands = func.getOperands(); @@ -405,7 +444,7 @@ public void testFilterClauses() { } { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery( + PinotQuery pinotQuery = compileToPinotQuery( "select * from vegetable where isSubnetOf('192.168.0.1/24', foo)=true AND isSubnetOf('192.168.0.1/24', " + "foo)"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); @@ -429,7 +468,7 @@ public void testFilterClauses() { @Test public void testFilterClausesWithRightExpression() { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetables where a > b"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetables where a > b"); Function func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.GREATER_THAN.name()); Assert.assertEquals(func.getOperands().get(0).getFunctionCall().getOperator(), "minus"); @@ -438,7 +477,7 @@ public void testFilterClausesWithRightExpression() { Assert.assertEquals(func.getOperands().get(0).getFunctionCall().getOperands().get(1).getIdentifier().getName(), "b"); Assert.assertEquals(func.getOperands().get(1).getLiteral().getLongValue(), 0L); - pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetables where 0 < a-b"); + pinotQuery = compileToPinotQuery("select * from vegetables where 0 < a-b"); func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.GREATER_THAN.name()); Assert.assertEquals(func.getOperands().get(0).getFunctionCall().getOperator(), "minus"); @@ -448,7 +487,7 @@ public void testFilterClausesWithRightExpression() { "b"); Assert.assertEquals(func.getOperands().get(1).getLiteral().getLongValue(), 0L); - pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetables where b < 100 + c"); + pinotQuery = compileToPinotQuery("select * from vegetables where b < 100 + c"); func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.LESS_THAN.name()); Assert.assertEquals(func.getOperands().get(0).getFunctionCall().getOperator(), "minus"); @@ -463,7 +502,7 @@ public void testFilterClausesWithRightExpression() { func.getOperands().get(0).getFunctionCall().getOperands().get(1).getFunctionCall().getOperands().get(1) .getIdentifier().getName(), "c"); Assert.assertEquals(func.getOperands().get(1).getLiteral().getLongValue(), 0L); - pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetables where b -(100+c)< 0"); + pinotQuery = compileToPinotQuery("select * from vegetables where b -(100+c)< 0"); func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.LESS_THAN.name()); Assert.assertEquals(func.getOperands().get(0).getFunctionCall().getOperator(), "minus"); @@ -479,8 +518,7 @@ public void testFilterClausesWithRightExpression() { .getIdentifier().getName(), "c"); Assert.assertEquals(func.getOperands().get(1).getLiteral().getLongValue(), 0L); - pinotQuery = - CalciteSqlParser.compileToPinotQuery("select * from vegetables where foo1(bar1(a-b)) <= foo2(bar2(c+d))"); + pinotQuery = compileToPinotQuery("select * from vegetables where foo1(bar1(a-b)) <= foo2(bar2(c+d))"); func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.LESS_THAN_OR_EQUAL.name()); Assert.assertEquals(func.getOperands().get(0).getFunctionCall().getOperator(), "minus"); @@ -517,8 +555,7 @@ public void testFilterClausesWithRightExpression() { .getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(1).getIdentifier().getName(), "d"); Assert.assertEquals(func.getOperands().get(1).getLiteral().getLongValue(), 0L); - pinotQuery = - CalciteSqlParser.compileToPinotQuery("select * from vegetables where foo1(bar1(a-b)) - foo2(bar2(c+d)) <= 0"); + pinotQuery = compileToPinotQuery("select * from vegetables where foo1(bar1(a-b)) - foo2(bar2(c+d)) <= 0"); func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.LESS_THAN_OR_EQUAL.name()); Assert.assertEquals(func.getOperands().get(0).getFunctionCall().getOperator(), "minus"); @@ -556,12 +593,12 @@ public void testFilterClausesWithRightExpression() { "d"); Assert.assertEquals(func.getOperands().get(1).getLiteral().getLongValue(), 0L); - pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetables where c >= 10"); + pinotQuery = compileToPinotQuery("select * from vegetables where c >= 10"); func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.GREATER_THAN_OR_EQUAL.name()); Assert.assertEquals(func.getOperands().get(0).getIdentifier().getName(), "c"); Assert.assertEquals(func.getOperands().get(1).getLiteral().getLongValue(), 10L); - pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetables where 10 <= c"); + pinotQuery = compileToPinotQuery("select * from vegetables where 10 <= c"); func = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(func.getOperator(), FilterKind.GREATER_THAN_OR_EQUAL.name()); Assert.assertEquals(func.getOperands().get(0).getIdentifier().getName(), "c"); @@ -584,7 +621,7 @@ public void testInvalidFilterClauses() { private void testInvalidFilterClause(String filter) { try { - CalciteSqlParser.compileToPinotQuery("select * from vegetables where " + filter); + compileToPinotQuery("select * from vegetables where " + filter); } catch (SqlCompilationException e) { // Expected return; @@ -614,8 +651,7 @@ public void testTopZero() { public void testLimitOffsets() { PinotQuery pinotQuery; try { - pinotQuery = - CalciteSqlParser.compileToPinotQuery("select a, b, c from meetupRsvp order by a, b, c limit 100 offset 200"); + pinotQuery = compileToPinotQuery("select a, b, c from meetupRsvp order by a, b, c limit 100 offset 200"); } catch (SqlCompilationException e) { throw e; } @@ -626,8 +662,7 @@ public void testLimitOffsets() { Assert.assertEquals(200, pinotQuery.getOffset()); try { - pinotQuery = - CalciteSqlParser.compileToPinotQuery("select a, b, c from meetupRsvp order by a, b, c limit 200,100"); + pinotQuery = compileToPinotQuery("select a, b, c from meetupRsvp order by a, b, c limit 200,100"); } catch (SqlCompilationException e) { throw e; } @@ -643,7 +678,7 @@ public void testGroupbys() { PinotQuery pinotQuery; try { - pinotQuery = CalciteSqlParser.compileToPinotQuery( + pinotQuery = compileToPinotQuery( "select sum(rsvp_count), count(*), group_city from meetupRsvp group by group_city order by sum(rsvp_count) " + "limit 10"); } catch (SqlCompilationException e) { @@ -660,7 +695,7 @@ public void testGroupbys() { Assert.assertEquals(10, pinotQuery.getLimit()); try { - pinotQuery = CalciteSqlParser.compileToPinotQuery( + pinotQuery = compileToPinotQuery( "select sum(rsvp_count), count(*) from meetupRsvp group by group_city order by sum(rsvp_count) limit 10"); } catch (SqlCompilationException e) { throw e; @@ -676,7 +711,7 @@ public void testGroupbys() { Assert.assertEquals(10, pinotQuery.getLimit()); try { - pinotQuery = CalciteSqlParser.compileToPinotQuery( + pinotQuery = compileToPinotQuery( "select group_city, sum(rsvp_count), count(*) from meetupRsvp group by group_city order by sum(rsvp_count)," + " count(*) limit 10"); } catch (SqlCompilationException e) { @@ -703,10 +738,9 @@ public void testGroupbys() { // nested functions in group by try { - pinotQuery = CalciteSqlParser.compileToPinotQuery( - "select concat(upper(playerName), lower(teamID), '-') playerTeam, " - + "upper(league) leagueUpper, count(playerName) cnt from baseballStats group by playerTeam, lower" - + "(teamID), leagueUpper having cnt > 1 order by cnt desc limit 10"); + pinotQuery = compileToPinotQuery("select concat(upper(playerName), lower(teamID), '-') playerTeam, " + + "upper(league) leagueUpper, count(playerName) cnt from baseballStats group by playerTeam, lower" + + "(teamID), leagueUpper having cnt > 1 order by cnt desc limit 10"); } catch (SqlCompilationException e) { throw e; } @@ -723,7 +757,7 @@ public void testGroupbys() { private void assertCompilationFails(String query) { try { - CalciteSqlParser.compileToPinotQuery(query); + compileToPinotQuery(query); } catch (SqlCompilationException e) { // Expected return; @@ -735,7 +769,7 @@ private void assertCompilationFails(String query) { private void testTopZeroFor(String s, final int expectedTopN, boolean parseException) { PinotQuery pinotQuery; try { - pinotQuery = CalciteSqlParser.compileToPinotQuery(s); + pinotQuery = compileToPinotQuery(s); } catch (SqlCompilationException e) { if (parseException) { return; @@ -766,7 +800,7 @@ public void testParseExceptionHasCharacterPosition() { final String query = "select foo from bar where baz ? 2"; try { - CalciteSqlParser.compileToPinotQuery(query); + compileToPinotQuery(query); } catch (SqlCompilationException e) { // Expected Assert.assertTrue(e.getCause().getMessage().contains("at line 1, column 31."), @@ -780,11 +814,10 @@ public void testParseExceptionHasCharacterPosition() { @Test public void testCStyleInequalityOperator() { - PinotQuery pinotQuery = - CalciteSqlParser.compileToPinotQuery("select * from vegetables where name <> 'Brussels sprouts'"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetables where name <> 'Brussels sprouts'"); Assert.assertEquals(pinotQuery.getFilterExpression().getFunctionCall().getOperator(), "NOT_EQUALS"); - pinotQuery = CalciteSqlParser.compileToPinotQuery("select * from vegetables where name != 'Brussels sprouts'"); + pinotQuery = compileToPinotQuery("select * from vegetables where name != 'Brussels sprouts'"); Assert.assertEquals(pinotQuery.getFilterExpression().getFunctionCall().getOperator(), "NOT_EQUALS"); } @@ -792,18 +825,17 @@ public void testCStyleInequalityOperator() { @Deprecated // TODO: to be removed once OPTIONS REGEX match is deprecated public void testQueryOptions() { - PinotQuery pinotQuery = - CalciteSqlParser.compileToPinotQuery("select * from vegetables where name <> 'Brussels sprouts'"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetables where name <> 'Brussels sprouts'"); Assert.assertEquals(pinotQuery.getQueryOptionsSize(), 0); Assert.assertNull(pinotQuery.getQueryOptions()); - pinotQuery = CalciteSqlParser.compileToPinotQuery( - "select * from vegetables where name <> 'Brussels sprouts' OPTION (delicious=yes)"); + pinotQuery = + compileToPinotQuery("select * from vegetables where name <> 'Brussels sprouts' OPTION (delicious=yes)"); Assert.assertEquals(pinotQuery.getQueryOptionsSize(), 1); Assert.assertTrue(pinotQuery.getQueryOptions().containsKey("delicious")); Assert.assertEquals(pinotQuery.getQueryOptions().get("delicious"), "yes"); - pinotQuery = CalciteSqlParser.compileToPinotQuery( + pinotQuery = compileToPinotQuery( "select * from vegetables where name <> 'Brussels sprouts' OPTION (delicious=yes, foo=1234, bar='potato')"); Assert.assertEquals(pinotQuery.getQueryOptionsSize(), 3); Assert.assertTrue(pinotQuery.getQueryOptions().containsKey("delicious")); @@ -813,7 +845,7 @@ public void testQueryOptions() { // Assert that wrongly inserted query option will not be parsed. try { - CalciteSqlParser.compileToPinotQuery( + compileToPinotQuery( "select * from vegetables where name <> 'Brussels sprouts' OPTION (delicious=yes) option(foo=1234) option" + "(bar='potato')"); } catch (SqlCompilationException e) { @@ -821,7 +853,7 @@ public void testQueryOptions() { Assert.assertTrue(e.getCause().getMessage().contains("OPTION")); } try { - CalciteSqlParser.compileToPinotQuery("select * from vegetables where name <> 'Brussels OPTION (delicious=yes)"); + compileToPinotQuery("select * from vegetables where name <> 'Brussels OPTION (delicious=yes)"); } catch (SqlCompilationException e) { Assert.assertTrue(e.getCause() instanceof ParseException); } @@ -829,18 +861,16 @@ public void testQueryOptions() { @Test public void testQuerySetOptions() { - PinotQuery pinotQuery = - CalciteSqlParser.compileToPinotQuery("select * from vegetables where name <> 'Brussels sprouts'"); + PinotQuery pinotQuery = compileToPinotQuery("select * from vegetables where name <> 'Brussels sprouts'"); Assert.assertEquals(pinotQuery.getQueryOptionsSize(), 0); Assert.assertNull(pinotQuery.getQueryOptions()); - pinotQuery = CalciteSqlParser.compileToPinotQuery( - "SET delicious='yes'; select * from vegetables where name <> 'Brussels sprouts'"); + pinotQuery = compileToPinotQuery("SET delicious='yes'; select * from vegetables where name <> 'Brussels sprouts'"); Assert.assertEquals(pinotQuery.getQueryOptionsSize(), 1); Assert.assertTrue(pinotQuery.getQueryOptions().containsKey("delicious")); Assert.assertEquals(pinotQuery.getQueryOptions().get("delicious"), "yes"); - pinotQuery = CalciteSqlParser.compileToPinotQuery("SET delicious='yes'; SET foo='1234'; SET bar='''potato''';" + pinotQuery = compileToPinotQuery("SET delicious='yes'; SET foo='1234'; SET bar='''potato''';" + "select * from vegetables where name <> 'Brussels sprouts' "); Assert.assertEquals(pinotQuery.getQueryOptionsSize(), 3); Assert.assertTrue(pinotQuery.getQueryOptions().containsKey("delicious")); @@ -848,7 +878,7 @@ public void testQuerySetOptions() { Assert.assertEquals(pinotQuery.getQueryOptions().get("foo"), "1234"); Assert.assertEquals(pinotQuery.getQueryOptions().get("bar"), "'potato'"); - pinotQuery = CalciteSqlParser.compileToPinotQuery("SET delicious='yes'; SET foo='1234'; " + pinotQuery = compileToPinotQuery("SET delicious='yes'; SET foo='1234'; " + "SET bar='''potato'''; select * from vegetables where name <> 'Brussels sprouts' "); Assert.assertEquals(pinotQuery.getQueryOptionsSize(), 3); Assert.assertTrue(pinotQuery.getQueryOptions().containsKey("delicious")); @@ -856,7 +886,7 @@ public void testQuerySetOptions() { Assert.assertEquals(pinotQuery.getQueryOptions().get("foo"), "1234"); Assert.assertEquals(pinotQuery.getQueryOptions().get("bar"), "'potato'"); - pinotQuery = CalciteSqlParser.compileToPinotQuery("SET delicious='yes'; SET foo='1234'; " + pinotQuery = compileToPinotQuery("SET delicious='yes'; SET foo='1234'; " + "select * from vegetables where name <> 'Brussels sprouts'; SET bar='''potato'''; "); Assert.assertEquals(pinotQuery.getQueryOptionsSize(), 3); Assert.assertTrue(pinotQuery.getQueryOptions().containsKey("delicious")); @@ -866,23 +896,21 @@ public void testQuerySetOptions() { // test invalid options try { - CalciteSqlParser.compileToPinotQuery( - "select * from vegetables SET delicious='yes', foo='1234' where name <> 'Brussels sprouts'"); + compileToPinotQuery("select * from vegetables SET delicious='yes', foo='1234' where name <> 'Brussels sprouts'"); Assert.fail("SQL should not be compiled"); } catch (SqlCompilationException sce) { // expected. } try { - CalciteSqlParser.compileToPinotQuery( - "select * from vegetables where name <> 'Brussels sprouts'; SET (delicious='yes', foo=1234)"); + compileToPinotQuery("select * from vegetables where name <> 'Brussels sprouts'; SET (delicious='yes', foo=1234)"); Assert.fail("SQL should not be compiled"); } catch (SqlCompilationException sce) { // expected. } try { - CalciteSqlParser.compileToPinotQuery( + compileToPinotQuery( "select * from vegetables where name <> 'Brussels sprouts'; SET (delicious='yes', foo=1234); select * from " + "meat"); Assert.fail("SQL should not be compiled"); @@ -934,15 +962,15 @@ public void testRemoveComments() { } private void testRemoveComments(String sqlWithComments, String expectedSqlWithoutComments) { - PinotQuery commentedResult = CalciteSqlParser.compileToPinotQuery(sqlWithComments); - PinotQuery expectedResult = CalciteSqlParser.compileToPinotQuery(expectedSqlWithoutComments); + PinotQuery commentedResult = compileToPinotQuery(sqlWithComments); + PinotQuery expectedResult = compileToPinotQuery(expectedSqlWithoutComments); Assert.assertEquals(commentedResult, expectedResult); } @Test public void testIdentifierQuoteCharacter() { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery( - "select avg(attributes.age) as avg_age from person group by attributes.address_city"); + PinotQuery pinotQuery = + compileToPinotQuery("select avg(attributes.age) as avg_age from person group by attributes.address_city"); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(0) .getIdentifier().getName(), "attributes.age"); @@ -955,8 +983,7 @@ public void testStringLiteral() { assertCompilationFails("SELECT 'foo' FROM table"); // Allow string literal column in aggregation and group-by query - PinotQuery pinotQuery = - CalciteSqlParser.compileToPinotQuery("SELECT SUM('foo'), MAX(bar) FROM myTable GROUP BY 'foo', bar"); + PinotQuery pinotQuery = compileToPinotQuery("SELECT SUM('foo'), MAX(bar) FROM myTable GROUP BY 'foo', bar"); List selectFunctionList = pinotQuery.getSelectList(); Assert.assertEquals(selectFunctionList.size(), 2); Assert.assertEquals(selectFunctionList.get(0).getFunctionCall().getOperands().get(0).getLiteral().getStringValue(), @@ -969,8 +996,7 @@ public void testStringLiteral() { Assert.assertEquals(groupbyList.get(1).getIdentifier().getName(), "bar"); // For UDF, string literal won't be treated as column but as LITERAL - pinotQuery = CalciteSqlParser.compileToPinotQuery( - "SELECT SUM(ADD(foo, 'bar')) FROM myTable GROUP BY sub(foo, bar), SUB(BAR, FOO)"); + pinotQuery = compileToPinotQuery("SELECT SUM(ADD(foo, 'bar')) FROM myTable GROUP BY sub(foo, bar), SUB(BAR, FOO)"); selectFunctionList = pinotQuery.getSelectList(); Assert.assertEquals(selectFunctionList.size(), 1); Assert.assertEquals(selectFunctionList.get(0).getFunctionCall().getOperator(), "sum"); @@ -1000,8 +1026,7 @@ public void testStringLiteral() { @Test public void testFilterUdf() { - PinotQuery pinotQuery = - CalciteSqlParser.compileToPinotQuery("select count(*) from baseballStats where DIV(numberOfGames,10) = 100"); + PinotQuery pinotQuery = compileToPinotQuery("select count(*) from baseballStats where DIV(numberOfGames,10) = 100"); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "count"); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "*"); @@ -1017,7 +1042,7 @@ public void testFilterUdf() { Assert.assertEquals( pinotQuery.getFilterExpression().getFunctionCall().getOperands().get(1).getLiteral().getLongValue(), 100); - pinotQuery = CalciteSqlParser.compileToPinotQuery( + pinotQuery = compileToPinotQuery( "SELECT count(*) FROM mytable WHERE timeConvert(DaysSinceEpoch,'DAYS','SECONDS') = 1394323200"); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "count"); Assert.assertEquals( @@ -1042,8 +1067,8 @@ public void testFilterUdf() { @Test public void testSelectionTransformFunction() { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery( - " select mapKey(mapField,k1) from baseballStats where mapKey(mapField,k1) = 'v1'"); + PinotQuery pinotQuery = + compileToPinotQuery(" select mapKey(mapField,k1) from baseballStats where mapKey(mapField,k1) = 'v1'"); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "mapkey"); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "mapField"); @@ -1067,7 +1092,7 @@ public void testSelectionTransformFunction() { @Test public void testTimeTransformFunction() { PinotQuery pinotQuery = - CalciteSqlParser.compileToPinotQuery(" select hour(ts), d1, sum(m1) from baseballStats group by hour(ts), d1"); + compileToPinotQuery(" select hour(ts), d1, sum(m1) from baseballStats group by hour(ts), d1"); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "hour"); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "ts"); @@ -1083,7 +1108,7 @@ public void testTimeTransformFunction() { public void testSqlDistinctQueryCompilation() { // test single column DISTINCT String sql = "SELECT DISTINCT c1 FROM foo"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + PinotQuery pinotQuery = compileToPinotQuery(sql); List selectListExpressions = pinotQuery.getSelectList(); Assert.assertEquals(selectListExpressions.size(), 1); Assert.assertEquals(selectListExpressions.get(0).getType(), ExpressionType.FUNCTION); @@ -1097,7 +1122,7 @@ public void testSqlDistinctQueryCompilation() { // test multi column DISTINCT sql = "SELECT DISTINCT c1, c2 FROM foo"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); selectListExpressions = pinotQuery.getSelectList(); Assert.assertEquals(selectListExpressions.size(), 1); Assert.assertEquals(selectListExpressions.get(0).getType(), ExpressionType.FUNCTION); @@ -1113,7 +1138,7 @@ public void testSqlDistinctQueryCompilation() { // test multi column DISTINCT with filter sql = "SELECT DISTINCT c1, c2, c3 FROM foo WHERE c3 > 100"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); selectListExpressions = pinotQuery.getSelectList(); Assert.assertEquals(selectListExpressions.size(), 1); @@ -1138,7 +1163,7 @@ public void testSqlDistinctQueryCompilation() { // not supported by Calcite SQL (this is in compliance with SQL standard) try { sql = "SELECT sum(c1), DISTINCT c2 FROM foo"; - CalciteSqlParser.compileToPinotQuery(sql); + compileToPinotQuery(sql); Assert.fail("Query should have failed compilation"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1147,7 +1172,7 @@ public void testSqlDistinctQueryCompilation() { // not supported by Calcite SQL (this is in compliance with SQL standard) try { sql = "SELECT c1, DISTINCT c2 FROM foo"; - CalciteSqlParser.compileToPinotQuery(sql); + compileToPinotQuery(sql); Assert.fail("Query should have failed compilation"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1156,7 +1181,7 @@ public void testSqlDistinctQueryCompilation() { // not supported by Calcite SQL (this is in compliance with SQL standard) try { sql = "SELECT DIV(c1,c2), DISTINCT c3 FROM foo"; - CalciteSqlParser.compileToPinotQuery(sql); + compileToPinotQuery(sql); Assert.fail("Query should have failed compilation"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1173,7 +1198,7 @@ public void testSqlDistinctQueryCompilation() { // transform try { sql = "SELECT DISTINCT c1, sum(c2) FROM foo"; - CalciteSqlParser.compileToPinotQuery(sql); + compileToPinotQuery(sql); Assert.fail("Query should have failed compilation"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1184,7 +1209,7 @@ public void testSqlDistinctQueryCompilation() { // same reason as above try { sql = "SELECT DISTINCT sum(c1) FROM foo"; - CalciteSqlParser.compileToPinotQuery(sql); + compileToPinotQuery(sql); Assert.fail("Query should have failed compilation"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1195,7 +1220,7 @@ public void testSqlDistinctQueryCompilation() { // Pinot currently does not support DISTINCT * syntax try { sql = "SELECT DISTINCT * FROM foo"; - CalciteSqlParser.compileToPinotQuery(sql); + compileToPinotQuery(sql); Assert.fail("Query should have failed compilation"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1207,7 +1232,7 @@ public void testSqlDistinctQueryCompilation() { // Pinot currently does not support DISTINCT * syntax try { sql = "SELECT DISTINCT *, C1 FROM foo"; - CalciteSqlParser.compileToPinotQuery(sql); + compileToPinotQuery(sql); Assert.fail("Query should have failed compilation"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1219,7 +1244,7 @@ public void testSqlDistinctQueryCompilation() { // Pinot currently does not support GROUP BY with DISTINCT try { sql = "SELECT DISTINCT C1, C2 FROM foo GROUP BY C1"; - CalciteSqlParser.compileToPinotQuery(sql); + compileToPinotQuery(sql); Assert.fail("Query should have failed compilation"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1231,7 +1256,7 @@ public void testSqlDistinctQueryCompilation() { // test DISTINCT with single transform function sql = "SELECT DISTINCT add(col1,col2) FROM foo"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); selectListExpressions = pinotQuery.getSelectList(); Assert.assertEquals(selectListExpressions.size(), 1); Assert.assertEquals(selectListExpressions.get(0).getType(), ExpressionType.FUNCTION); @@ -1250,7 +1275,7 @@ public void testSqlDistinctQueryCompilation() { // multi-column distinct with multiple transform functions sql = "SELECT DISTINCT add(div(col1, col2), mul(col3, col4)), sub(col3, col4) FROM foo"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); selectListExpressions = pinotQuery.getSelectList(); Assert.assertEquals(selectListExpressions.size(), 1); Assert.assertEquals(selectListExpressions.get(0).getType(), ExpressionType.FUNCTION); @@ -1293,7 +1318,7 @@ public void testSqlDistinctQueryCompilation() { // multi-column distinct with multiple transform columns and additional identifiers sql = "SELECT DISTINCT add(div(col1, col2), mul(col3, col4)), sub(col3, col4), col5, col6 FROM foo"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); selectListExpressions = pinotQuery.getSelectList(); Assert.assertEquals(selectListExpressions.size(), 1); Assert.assertEquals(selectListExpressions.get(0).getType(), ExpressionType.FUNCTION); @@ -1349,7 +1374,7 @@ public void testQueryValidation() { String sql = "select group_country, sum(rsvp_count), count(*) from meetupRsvp group by group_city, group_country ORDER BY " + "sum(rsvp_count), count(*) limit 50"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + PinotQuery pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getGroupByListSize(), 2); Assert.assertEquals(pinotQuery.getSelectListSize(), 3); @@ -1357,7 +1382,7 @@ public void testQueryValidation() { try { sql = "select group_city, group_country, sum(rsvp_count), count(*) from meetupRsvp group by group_country ORDER " + "BY sum(rsvp_count), count(*) limit 50"; - CalciteSqlParser.compileToPinotQuery(sql); + compileToPinotQuery(sql); Assert.fail("Query should have failed compilation"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1367,7 +1392,7 @@ public void testQueryValidation() { // Valid groupBy non-aggregate function should pass. sql = "select dateConvert(secondsSinceEpoch), sum(rsvp_count), count(*) from meetupRsvp group by dateConvert" + "(secondsSinceEpoch) limit 50"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getGroupByListSize(), 1); Assert.assertEquals(pinotQuery.getSelectListSize(), 3); @@ -1375,7 +1400,7 @@ public void testQueryValidation() { try { sql = "select secondsSinceEpoch, dateConvert(secondsSinceEpoch), sum(rsvp_count), count(*) from meetupRsvp" + " group by dateConvert(secondsSinceEpoch) limit 50"; - CalciteSqlParser.compileToPinotQuery(sql); + compileToPinotQuery(sql); Assert.fail("Query should have failed compilation"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1386,7 +1411,7 @@ public void testQueryValidation() { try { sql = "select sum(rsvp_count), count(*) from meetupRsvp group by group_country, sum(rsvp_count), count(*) limit " + "50"; - CalciteSqlParser.compileToPinotQuery(sql); + compileToPinotQuery(sql); Assert.fail("Query should have failed compilation"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1401,7 +1426,7 @@ public void testAliasQuery() { // Valid alias in query. sql = "select secondsSinceEpoch, sum(rsvp_count) as sum_rsvp_count, count(*) as cnt from meetupRsvp" + " group by secondsSinceEpoch order by cnt, sum_rsvp_count DESC limit 50"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getSelectListSize(), 3); Assert.assertEquals(pinotQuery.getGroupByListSize(), 1); Assert.assertEquals(pinotQuery.getOrderByListSize(), 2); @@ -1423,7 +1448,7 @@ public void testAliasQuery() { // Valid mixed alias expressions in query. sql = "select secondsSinceEpoch, sum(rsvp_count), count(*) as cnt from meetupRsvp group by secondsSinceEpoch" + " order by cnt, sum(rsvp_count) DESC limit 50"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getSelectListSize(), 3); Assert.assertEquals(pinotQuery.getGroupByListSize(), 1); Assert.assertEquals(pinotQuery.getOrderByListSize(), 2); @@ -1445,7 +1470,7 @@ public void testAliasQuery() { sql = "select secondsSinceEpoch/86400 AS daysSinceEpoch, sum(rsvp_count) as sum_rsvp_count, count(*) as cnt" + " from meetupRsvp where daysSinceEpoch = 18523 group by daysSinceEpoch order by cnt, sum_rsvp_count DESC" + " limit 50"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getSelectListSize(), 3); // Alias should not be applied to filter Assert.assertEquals(pinotQuery.getFilterExpression().getFunctionCall().getOperator(), FilterKind.EQUALS.name()); @@ -1466,7 +1491,7 @@ public void testAliasQuery() { // Invalid groupBy clause shouldn't contain aggregate expression, like sum(rsvp_count), count(*). try { sql = "select sum(rsvp_count), count(*) as cnt from meetupRsvp group by group_country, cnt limit 50"; - CalciteSqlParser.compileToPinotQuery(sql); + compileToPinotQuery(sql); Assert.fail("Query should have failed compilation"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1478,7 +1503,7 @@ public void testAliasQuery() { public void testAliasInSelection() { // Alias should not be applied String sql = "SELECT C1 AS ALIAS_C1, C2 AS ALIAS_C2, ALIAS_C1 + ALIAS_C2 FROM Foo"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + PinotQuery pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getSelectListSize(), 3); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "as"); Assert.assertEquals( @@ -1504,7 +1529,7 @@ public void testSameAliasInSelection() { String sql; PinotQuery pinotQuery; sql = "SELECT C1 AS C1, C2 AS C2 FROM Foo"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getSelectListSize(), 2); Assert.assertEquals(pinotQuery.getSelectList().get(0).getIdentifier().getName(), "C1"); Assert.assertEquals(pinotQuery.getSelectList().get(1).getIdentifier().getName(), "C2"); @@ -1514,7 +1539,7 @@ public void testSameAliasInSelection() { public void testAliasInFilter() { // Alias should not be applied String sql = "SELECT C1 AS ALIAS_CI FROM Foo WHERE ALIAS_CI > 10"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + PinotQuery pinotQuery = compileToPinotQuery(sql); Assert.assertEquals( pinotQuery.getFilterExpression().getFunctionCall().getOperands().get(0).getIdentifier().getName(), "ALIAS_CI"); } @@ -1522,7 +1547,7 @@ public void testAliasInFilter() { @Test public void testColumnOverride() { String sql = "SELECT C1 + 1 AS C1, COUNT(*) AS cnt FROM Foo GROUP BY 1"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + PinotQuery pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getGroupByList().get(0).getFunctionCall().getOperator(), "plus"); Assert.assertEquals( pinotQuery.getGroupByList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "C1"); @@ -1532,7 +1557,7 @@ public void testColumnOverride() { @Test public void testArithmeticOperator() { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery("select a,b+2,c*5,(d+5)*2 from myTable"); + PinotQuery pinotQuery = compileToPinotQuery("select a,b+2,c*5,(d+5)*2 from myTable"); Assert.assertEquals(pinotQuery.getSelectListSize(), 4); Assert.assertEquals(pinotQuery.getSelectList().get(1).getFunctionCall().getOperator(), "plus"); Assert.assertEquals( @@ -1557,7 +1582,7 @@ public void testArithmeticOperator() { Assert.assertEquals( pinotQuery.getSelectList().get(3).getFunctionCall().getOperands().get(1).getLiteral().getLongValue(), 2); - pinotQuery = CalciteSqlParser.compileToPinotQuery("select a % 200 + b * 5 from myTable"); + pinotQuery = compileToPinotQuery("select a % 200 + b * 5 from myTable"); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "plus"); Assert.assertEquals( @@ -1588,7 +1613,7 @@ public void testArithmeticOperator() { public void testReservedKeywords() { // min, max, avg, sum, value, count, groups - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery( + PinotQuery pinotQuery = compileToPinotQuery( "select max(value) as max, min(value) as min, sum(value) as sum, count(*) as count, avg(value) as avg from " + "myTable where groups = 'foo'"); Assert.assertEquals(pinotQuery.getSelectListSize(), 5); @@ -1644,7 +1669,7 @@ public void testReservedKeywords() { pinotQuery.getFilterExpression().getFunctionCall().getOperands().get(1).getLiteral().getStringValue(), "foo"); // language, module, return, position, system - pinotQuery = CalciteSqlParser.compileToPinotQuery( + pinotQuery = compileToPinotQuery( "select * from myTable where (language = 'en' or return > 100) and position < 10 order by module, system desc"); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals(pinotQuery.getFilterExpression().getFunctionCall().getOperator(), "AND"); @@ -1664,7 +1689,7 @@ public void testReservedKeywords() { // table - need to escape try { - CalciteSqlParser.compileToPinotQuery("select count(*) from myTable where table = 'foo'"); + compileToPinotQuery("select count(*) from myTable where table = 'foo'"); Assert.fail("Query should have failed to compile"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1672,7 +1697,7 @@ public void testReservedKeywords() { } // date - need to escape try { - CalciteSqlParser.compileToPinotQuery("select count(*) from myTable group by Date"); + compileToPinotQuery("select count(*) from myTable group by Date"); Assert.fail("Query should have failed to compile"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1680,7 +1705,7 @@ public void testReservedKeywords() { // timestamp - need to escape try { - CalciteSqlParser.compileToPinotQuery("select count(*) from myTable where timestamp < 1000"); + compileToPinotQuery("select count(*) from myTable where timestamp < 1000"); Assert.fail("Query should have failed to compile"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1688,7 +1713,7 @@ public void testReservedKeywords() { // time - need to escape try { - CalciteSqlParser.compileToPinotQuery("select count(*) from myTable where time > 100"); + compileToPinotQuery("select count(*) from myTable where time > 100"); Assert.fail("Query should have failed to compile"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); @@ -1696,14 +1721,14 @@ public void testReservedKeywords() { // group - need to escape try { - CalciteSqlParser.compileToPinotQuery("select group from myTable where bar = 'foo'"); + compileToPinotQuery("select group from myTable where bar = 'foo'"); Assert.fail("Query should have failed to compile"); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); } // escaping the above works - pinotQuery = CalciteSqlParser.compileToPinotQuery( + pinotQuery = compileToPinotQuery( "select sum(foo) from \"table\" where \"Date\" = 2019 and (\"timestamp\" < 100 or \"time\" > 200) group by " + "\"group\""); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); @@ -1722,23 +1747,23 @@ public void testReservedKeywords() { @Test public void testCastTransformation() { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery("select CAST(25.65 AS int) from myTable"); + PinotQuery pinotQuery = compileToPinotQuery("select CAST(25.65 AS int) from myTable"); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getLiteral().getLongValue(), 25); - pinotQuery = CalciteSqlParser.compileToPinotQuery("SELECT CAST('20170825' AS LONG) from myTable"); + pinotQuery = compileToPinotQuery("SELECT CAST('20170825' AS LONG) from myTable"); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getLiteral().getLongValue(), 20170825); - pinotQuery = CalciteSqlParser.compileToPinotQuery("SELECT CAST(20170825.0 AS Float) from myTable"); + pinotQuery = compileToPinotQuery("SELECT CAST(20170825.0 AS Float) from myTable"); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals((float) pinotQuery.getSelectList().get(0).getLiteral().getDoubleValue(), 20170825.0F); - pinotQuery = CalciteSqlParser.compileToPinotQuery("SELECT CAST(20170825.0 AS dOuble) from myTable"); + pinotQuery = compileToPinotQuery("SELECT CAST(20170825.0 AS dOuble) from myTable"); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals((float) pinotQuery.getSelectList().get(0).getLiteral().getDoubleValue(), 20170825.0F); - pinotQuery = CalciteSqlParser.compileToPinotQuery("SELECT CAST(column1 AS STRING) from myTable"); + pinotQuery = compileToPinotQuery("SELECT CAST(column1 AS STRING) from myTable"); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "cast"); Assert.assertEquals( @@ -1747,7 +1772,7 @@ public void testCastTransformation() { pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(1).getLiteral().getStringValue(), "STRING"); - pinotQuery = CalciteSqlParser.compileToPinotQuery("SELECT CAST(column1 AS varchar) from myTable"); + pinotQuery = compileToPinotQuery("SELECT CAST(column1 AS varchar) from myTable"); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "cast"); Assert.assertEquals( @@ -1756,7 +1781,7 @@ public void testCastTransformation() { pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(1).getLiteral().getStringValue(), "VARCHAR"); - pinotQuery = CalciteSqlParser.compileToPinotQuery( + pinotQuery = compileToPinotQuery( "SELECT SUM(CAST(CAST(ArrTime AS STRING) AS LONG)) FROM mytable WHERE DaysSinceEpoch <> 16312 AND Carrier = " + "'DL'"); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); @@ -1772,21 +1797,21 @@ public void testCastTransformation() { @Test public void testDistinctCountRewrite() { String query = "SELECT count(distinct bar) FROM foo"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinctcount"); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "bar"); query = "SELECT count(distinct bar) FROM foo GROUP BY city"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinctcount"); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "bar"); query = "SELECT count(distinct bar), distinctCount(bar) FROM foo GROUP BY city"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 2); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinctcount"); Assert.assertEquals( @@ -1797,14 +1822,14 @@ public void testDistinctCountRewrite() { pinotQuery.getSelectList().get(1).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "bar"); query = "SELECT count(distinct bar), count(*), sum(a),min(a),max(b) FROM foo GROUP BY city"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 5); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinctcount"); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "bar"); query = "SELECT count(distinct bar) AS distinct_bar, count(*), sum(a),min(a),max(b) FROM foo GROUP BY city"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 5); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "as"); Assert.assertEquals( @@ -1821,21 +1846,21 @@ public void testDistinctCountRewrite() { @Test public void testDistinctSumRewrite() { String query = "SELECT sum(distinct bar) FROM foo"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinctsum"); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "bar"); query = "SELECT sum(distinct bar) FROM foo GROUP BY city"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinctsum"); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "bar"); query = "SELECT sum(distinct bar), distinctSum(bar) FROM foo GROUP BY city"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 2); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinctsum"); Assert.assertEquals( @@ -1846,14 +1871,14 @@ public void testDistinctSumRewrite() { pinotQuery.getSelectList().get(1).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "bar"); query = "SELECT sum(distinct bar), count(*), sum(a),min(a),max(b) FROM foo GROUP BY city"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 5); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinctsum"); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "bar"); query = "SELECT sum(distinct bar) AS distinct_bar, count(*), sum(a),min(a),max(b) FROM foo GROUP BY city"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 5); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "as"); Assert.assertEquals( @@ -1868,7 +1893,7 @@ public void testDistinctSumRewrite() { query = "SELECT sum(distinct bar) AS distinct_bar, count(*), sum(a),min(a),max(b) FROM foo GROUP BY city ORDER BY " + "distinct_bar"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 5); Function selectFunctionCall = pinotQuery.getSelectList().get(0).getFunctionCall(); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "as"); @@ -1889,21 +1914,21 @@ public void testDistinctSumRewrite() { @Test public void testDistinctAvgRewrite() { String query = "SELECT avg(distinct bar) FROM foo"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinctavg"); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "bar"); query = "SELECT avg(distinct bar) FROM foo GROUP BY city"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinctavg"); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "bar"); query = "SELECT avg(distinct bar), distinctAvg(bar) FROM foo GROUP BY city"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 2); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinctavg"); Assert.assertEquals( @@ -1914,14 +1939,14 @@ public void testDistinctAvgRewrite() { pinotQuery.getSelectList().get(1).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "bar"); query = "SELECT avg(distinct bar), count(*), avg(a),min(a),max(b) FROM foo GROUP BY city"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 5); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "distinctavg"); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "bar"); query = "SELECT avg(distinct bar) AS distinct_bar, count(*), avg(a),min(a),max(b) FROM foo GROUP BY city"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 5); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "as"); Assert.assertEquals( @@ -1936,7 +1961,7 @@ public void testDistinctAvgRewrite() { query = "SELECT avg(distinct bar) AS distinct_bar, count(*), avg(a),min(a),max(b) FROM foo GROUP BY city ORDER BY" + " distinct_bar"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().size(), 5); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "as"); Assert.assertEquals( @@ -1961,7 +1986,7 @@ public void testDistinctAvgRewrite() { public void testInvalidDistinctAggregationRewrite() { String query = "SELECT max(distinct bar) FROM foo"; try { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); } catch (Exception e) { Assert.assertTrue(e instanceof SqlCompilationException); Assert.assertEquals(e.getMessage(), "Function 'max' on DISTINCT is not supported."); @@ -1971,7 +1996,7 @@ public void testInvalidDistinctAggregationRewrite() { @Test public void testOrdinalsQueryRewrite() { String query = "SELECT foo, bar, count(*) FROM t GROUP BY 1, 2 ORDER BY 1, 2 DESC"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().get(0).getIdentifier().getName(), "foo"); Assert.assertEquals(pinotQuery.getSelectList().get(1).getIdentifier().getName(), "bar"); Assert.assertEquals(pinotQuery.getGroupByList().get(0).getIdentifier().getName(), "foo"); @@ -1982,7 +2007,7 @@ public void testOrdinalsQueryRewrite() { pinotQuery.getOrderByList().get(1).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "bar"); query = "SELECT foo, bar, count(*) FROM t GROUP BY 2, 1 ORDER BY 2, 1 DESC"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().get(0).getIdentifier().getName(), "foo"); Assert.assertEquals(pinotQuery.getSelectList().get(1).getIdentifier().getName(), "bar"); Assert.assertEquals(pinotQuery.getGroupByList().get(0).getIdentifier().getName(), "bar"); @@ -1993,7 +2018,7 @@ public void testOrdinalsQueryRewrite() { pinotQuery.getOrderByList().get(1).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "foo"); query = "SELECT foo as f, bar as b, count(*) FROM t GROUP BY 2, 1 ORDER BY 2, 1 DESC"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getGroupByList().get(0).getIdentifier().getName(), "bar"); Assert.assertEquals(pinotQuery.getGroupByList().get(1).getIdentifier().getName(), "foo"); Assert.assertEquals( @@ -2002,7 +2027,7 @@ public void testOrdinalsQueryRewrite() { pinotQuery.getOrderByList().get(1).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "foo"); query = "select a, b + 2, array_sum(c) as array_sum_c, count(*) from data group by a, 2, array_sum_c"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getGroupByList().get(0).getIdentifier().getName(), "a"); Assert.assertEquals(pinotQuery.getGroupByList().get(1).getFunctionCall().getOperator(), "plus"); Assert.assertEquals( @@ -2014,9 +2039,9 @@ public void testOrdinalsQueryRewrite() { pinotQuery.getGroupByList().get(2).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "c"); Assert.expectThrows(SqlCompilationException.class, - () -> CalciteSqlParser.compileToPinotQuery("SELECT foo, bar, count(*) FROM t GROUP BY 0")); + () -> compileToPinotQuery("SELECT foo, bar, count(*) FROM t GROUP BY 0")); Assert.expectThrows(SqlCompilationException.class, - () -> CalciteSqlParser.compileToPinotQuery("SELECT foo, bar, count(*) FROM t GROUP BY 3")); + () -> compileToPinotQuery("SELECT foo, bar, count(*) FROM t GROUP BY 3")); } @Test @@ -2024,7 +2049,7 @@ public void testOrdinalsQueryRewriteWithDistinctOrderBy() { String query = "SELECT baseballStats.playerName AS playerName FROM baseballStats GROUP BY baseballStats.playerName ORDER BY " + "1 ASC"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getFunctionCall().getOperands().get(0) .getIdentifier().getName(), "baseballStats.playerName"); @@ -2037,17 +2062,17 @@ public void testOrdinalsQueryRewriteWithDistinctOrderBy() { @Test public void testNoArgFunction() { String query = "SELECT noArgFunc() FROM foo "; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "noargfunc"); query = "SELECT a FROM foo where time_col > noArgFunc()"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Function greaterThan = pinotQuery.getFilterExpression().getFunctionCall(); Function minus = greaterThan.getOperands().get(0).getFunctionCall(); Assert.assertEquals(minus.getOperands().get(1).getFunctionCall().getOperator(), "noargfunc"); query = "SELECT sum(a), noArgFunc() FROM foo group by noArgFunc()"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getGroupByList().get(0).getFunctionCall().getOperator(), "noargfunc"); } @@ -2055,7 +2080,7 @@ public void testNoArgFunction() { public void testCompilationInvokedFunction() { String query = "SELECT now() FROM foo"; long lowerBound = System.currentTimeMillis(); - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); long nowTs = pinotQuery.getSelectList().get(0).getLiteral().getLongValue(); long upperBound = System.currentTimeMillis(); Assert.assertTrue(nowTs >= lowerBound); @@ -2063,7 +2088,7 @@ public void testCompilationInvokedFunction() { query = "SELECT a FROM foo where time_col > now()"; lowerBound = System.currentTimeMillis(); - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Function greaterThan = pinotQuery.getFilterExpression().getFunctionCall(); nowTs = greaterThan.getOperands().get(1).getLiteral().getLongValue(); upperBound = System.currentTimeMillis(); @@ -2071,14 +2096,14 @@ public void testCompilationInvokedFunction() { Assert.assertTrue(nowTs <= upperBound); query = "SELECT a FROM foo where time_col > fromDateTime('2020-01-01 UTC', 'yyyy-MM-dd z')"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); greaterThan = pinotQuery.getFilterExpression().getFunctionCall(); nowTs = greaterThan.getOperands().get(1).getLiteral().getLongValue(); Assert.assertEquals(nowTs, 1577836800000L); query = "SELECT ago('PT1H') FROM foo"; lowerBound = System.currentTimeMillis() - ONE_HOUR_IN_MS; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); nowTs = pinotQuery.getSelectList().get(0).getLiteral().getLongValue(); upperBound = System.currentTimeMillis() - ONE_HOUR_IN_MS; Assert.assertTrue(nowTs >= lowerBound); @@ -2086,7 +2111,7 @@ public void testCompilationInvokedFunction() { query = "SELECT a FROM foo where time_col > ago('PT1H')"; lowerBound = System.currentTimeMillis() - ONE_HOUR_IN_MS; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); greaterThan = pinotQuery.getFilterExpression().getFunctionCall(); nowTs = greaterThan.getOperands().get(1).getLiteral().getLongValue(); upperBound = System.currentTimeMillis() - ONE_HOUR_IN_MS; @@ -2095,7 +2120,7 @@ public void testCompilationInvokedFunction() { query = "select encodeUrl('key1=value 1&key2=value@!$2&key3=value%3'), " + "decodeUrl('key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); String encoded = pinotQuery.getSelectList().get(0).getLiteral().getStringValue(); String decoded = pinotQuery.getSelectList().get(1).getLiteral().getStringValue(); Assert.assertEquals(encoded, "key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253"); @@ -2105,7 +2130,7 @@ public void testCompilationInvokedFunction() { + "encodeUrl('key1=val1 key2=45% key3=#47 key4={''key'':[3,5]} + key5=1;2;3;4 key6=(a|b)&c key7= " + "key8=5*(6/4) key9=https://pinot@pinot.com key10=CFLAGS=\"-O2 -mcpu=pentiumpro\" key12=$JAVA_HOME'),'') " + "from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); encoded = pinotQuery.getSelectList().get(0).getLiteral().getStringValue(); Assert.assertEquals(encoded, "https://www.google.com/search?q=key1%3Dval1+key2%3D45%25+key3%3D%2347+" + "key4%3D%7B%27key%27%3A%5B3%2C5%5D%7D+%2B+key5%3D1%3B2%3B3%3B4+" @@ -2116,7 +2141,7 @@ public void testCompilationInvokedFunction() { + "key4%3D%7B%27key%27%3A%5B3%2C5%5D%7D+%2B+key5%3D1%3B2%3B3%3B4+key6%3D%28a%7Cb%29%26c+" + "key7%3D+key8%3D5*%286%2F4%29+key9%3Dhttps%3A%2F%2Fpinot%40pinot.com+" + "key10%3DCFLAGS%3D%22-O2+-mcpu%3Dpentiumpro%22+key12%3D%24JAVA_HOME') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); decoded = pinotQuery.getSelectList().get(0).getLiteral().getStringValue(); Assert.assertEquals(decoded, "https://www.google.com/search?q=key1=val1 key2=45% key3=#47 " + "key4={'key':[3,5]} + key5=1;2;3;4 key6=(a|b)&c key7= " @@ -2124,7 +2149,7 @@ public void testCompilationInvokedFunction() { query = "select a from mytable where foo=encodeUrl('key1=value 1&key2=value@!$2&key3=value%3') and" + " bar=decodeUrl('key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253')"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Function and = pinotQuery.getFilterExpression().getFunctionCall(); encoded = and.getOperands().get(0).getFunctionCall().getOperands().get(1).getLiteral().getStringValue(); decoded = and.getOperands().get(1).getFunctionCall().getOperands().get(1).getLiteral().getStringValue(); @@ -2132,14 +2157,14 @@ public void testCompilationInvokedFunction() { Assert.assertEquals(decoded, "key1=value 1&key2=value@!$2&key3=value%3"); query = "select toBase64(toUtf8('hello!')), fromUtf8(fromBase64('aGVsbG8h')) from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); String encodedBase64 = pinotQuery.getSelectList().get(0).getLiteral().getStringValue(); String decodedBase64 = pinotQuery.getSelectList().get(1).getLiteral().getStringValue(); Assert.assertEquals(encodedBase64, "aGVsbG8h"); Assert.assertEquals(decodedBase64, "hello!"); query = "select toBase64(fromBase64('aGVsbG8h')), fromUtf8(fromBase64(toBase64(toUtf8('hello!')))) from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); encodedBase64 = pinotQuery.getSelectList().get(0).getLiteral().getStringValue(); decodedBase64 = pinotQuery.getSelectList().get(1).getLiteral().getStringValue(); Assert.assertEquals(encodedBase64, "aGVsbG8h"); @@ -2147,7 +2172,7 @@ public void testCompilationInvokedFunction() { query = "select toBase64(toUtf8(upper('hello!'))), fromUtf8(fromBase64(toBase64(toUtf8(upper('hello!'))))) from " + "mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); encodedBase64 = pinotQuery.getSelectList().get(0).getLiteral().getStringValue(); decodedBase64 = pinotQuery.getSelectList().get(1).getLiteral().getStringValue(); Assert.assertEquals(encodedBase64, "SEVMTE8h"); @@ -2155,7 +2180,7 @@ public void testCompilationInvokedFunction() { query = "select reverse(fromUtf8(fromBase64(toBase64(toUtf8(upper('hello!')))))) from mytable where " + "fromUtf8(fromBase64(toBase64(toUtf8(upper('hello!'))))) = bar"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); String arg1 = pinotQuery.getSelectList().get(0).getLiteral().getStringValue(); String leftOp = pinotQuery.getFilterExpression().getFunctionCall().getOperands().get(1).getLiteral().getStringValue(); @@ -2163,7 +2188,7 @@ public void testCompilationInvokedFunction() { Assert.assertEquals(leftOp, "HELLO!"); query = "select a from mytable where foo = toBase64(toUtf8('hello!')) and bar = fromUtf8(fromBase64('aGVsbG8h'))"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); and = pinotQuery.getFilterExpression().getFunctionCall(); encoded = and.getOperands().get(0).getFunctionCall().getOperands().get(1).getLiteral().getStringValue(); decoded = and.getOperands().get(1).getFunctionCall().getOperands().get(1).getLiteral().getStringValue(); @@ -2173,7 +2198,7 @@ public void testCompilationInvokedFunction() { query = "select fromBase64('hello') from mytable"; Exception expectedError = null; try { - CalciteSqlParser.compileToPinotQuery(query); + compileToPinotQuery(query); } catch (Exception e) { expectedError = e; } @@ -2183,7 +2208,7 @@ public void testCompilationInvokedFunction() { query = "select toBase64('hello!') from mytable"; expectedError = null; try { - CalciteSqlParser.compileToPinotQuery(query); + compileToPinotQuery(query); } catch (Exception e) { expectedError = e; } @@ -2191,108 +2216,108 @@ public void testCompilationInvokedFunction() { Assert.assertTrue(expectedError instanceof SqlCompilationException); query = "select isSubnetOf('192.168.0.1/24', '192.168.0.225') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); boolean result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('192.168.0.1/24', '192.168.0.1') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('130.191.23.32/27', '130.191.23.40') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('130.191.23.32/26', '130.192.23.33') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertFalse(result); query = "select isSubnetOf('153.87.199.160/28', '153.87.199.166') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('2001:4800:7825:103::/64', '2001:4800:7825:103::2050') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('130.191.23.32/26', '130.191.23.33') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('2001:4801:7825:103:be76:4efe::/96', '2001:4801:7825:103:be76:4efe::e15') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('122.152.15.0/26', '122.152.15.28') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('96.141.228.254/26', '96.141.228.254') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('3.175.47.128/26', '3.175.48.178') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertFalse(result); query = "select isSubnetOf('192.168.0.1/24', '192.168.0.0') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('10.3.128.1/22', '10.3.128.123') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('10.3.128.1/22', '10.3.131.255') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('10.3.128.1/22', '1.2.3.1') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertFalse(result); query = "select isSubnetOf('1.2.3.128/1', '127.255.255.255') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('1.2.3.128/0', '192.168.5.1') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('2001:db8:85a3::8a2e:370:7334/62', '2001:0db8:85a3:0003:ffff:ffff:ffff:ffff') from " + "mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); query = "select isSubnetOf('123:db8:85a3::8a2e:370:7334/72', '124:db8:85a3::8a2e:370:7334') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertFalse(result); query = "select isSubnetOf('7890:db8:113::8a2e:370:7334/127', '7890:db8:113::8a2e:370:7336') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertFalse(result); query = "select isSubnetOf('7890:db8:113::8a2e:370:7334/127', '7890:db8:113::8a2e:370:7335') from mytable"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); result = pinotQuery.getSelectList().get(0).getLiteral().getBoolValue(); Assert.assertTrue(result); } @@ -2300,7 +2325,7 @@ public void testCompilationInvokedFunction() { @Test public void testCompilationInvokedNestedFunctions() { String query = "SELECT a FROM foo where time_col > toDateTime(now(), 'yyyy-MM-dd z')"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Function greaterThan = pinotQuery.getFilterExpression().getFunctionCall(); String today = greaterThan.getOperands().get(1).getLiteral().getStringValue(); String expectedTodayStr = @@ -2312,7 +2337,7 @@ public void testCompilationInvokedNestedFunctions() { public void testCompileTimeExpression() { final CompileTimeFunctionsInvoker compileTimeFunctionsInvoker = new CompileTimeFunctionsInvoker(); long lowerBound = System.currentTimeMillis(); - Expression expression = CalciteSqlParser.compileToExpression("now()"); + Expression expression = compileToExpression("now()"); Assert.assertNotNull(expression.getFunctionCall()); PinotQuery pinotQuery = new PinotQuery(); pinotQuery.setFilterExpression(expression); @@ -2324,7 +2349,7 @@ public void testCompileTimeExpression() { Assert.assertTrue(result >= lowerBound && result <= upperBound); lowerBound = TimeUnit.MILLISECONDS.toHours(System.currentTimeMillis()) + 1; - expression = CalciteSqlParser.compileToExpression("to_epoch_hours(now() + 3600000)"); + expression = compileToExpression("to_epoch_hours(now() + 3600000)"); Assert.assertNotNull(expression.getFunctionCall()); pinotQuery.setFilterExpression(expression); pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery); @@ -2335,7 +2360,7 @@ public void testCompileTimeExpression() { Assert.assertTrue(result >= lowerBound && result <= upperBound); lowerBound = System.currentTimeMillis() - ONE_HOUR_IN_MS; - expression = CalciteSqlParser.compileToExpression("ago('PT1H')"); + expression = compileToExpression("ago('PT1H')"); Assert.assertNotNull(expression.getFunctionCall()); pinotQuery.setFilterExpression(expression); pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery); @@ -2346,7 +2371,7 @@ public void testCompileTimeExpression() { Assert.assertTrue(result >= lowerBound && result <= upperBound); lowerBound = System.currentTimeMillis() + ONE_HOUR_IN_MS; - expression = CalciteSqlParser.compileToExpression("ago('PT-1H')"); + expression = compileToExpression("ago('PT-1H')"); Assert.assertNotNull(expression.getFunctionCall()); pinotQuery.setFilterExpression(expression); pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery); @@ -2356,7 +2381,7 @@ public void testCompileTimeExpression() { result = expression.getLiteral().getLongValue(); Assert.assertTrue(result >= lowerBound && result <= upperBound); - expression = CalciteSqlParser.compileToExpression("toDateTime(millisSinceEpoch)"); + expression = compileToExpression("toDateTime(millisSinceEpoch)"); Assert.assertNotNull(expression.getFunctionCall()); pinotQuery.setFilterExpression(expression); pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery); @@ -2366,7 +2391,7 @@ public void testCompileTimeExpression() { Assert.assertEquals(expression.getFunctionCall().getOperands().get(0).getIdentifier().getName(), "millisSinceEpoch"); - expression = CalciteSqlParser.compileToExpression("encodeUrl('key1=value 1&key2=value@!$2&key3=value%3')"); + expression = compileToExpression("encodeUrl('key1=value 1&key2=value@!$2&key3=value%3')"); Assert.assertNotNull(expression.getFunctionCall()); pinotQuery.setFilterExpression(expression); pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery); @@ -2375,8 +2400,7 @@ public void testCompileTimeExpression() { Assert.assertEquals(expression.getLiteral().getFieldValue(), "key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253"); - expression = - CalciteSqlParser.compileToExpression("decodeUrl('key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253')"); + expression = compileToExpression("decodeUrl('key1%3Dvalue+1%26key2%3Dvalue%40%21%242%26key3%3Dvalue%253')"); Assert.assertNotNull(expression.getFunctionCall()); pinotQuery.setFilterExpression(expression); pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery); @@ -2384,7 +2408,7 @@ public void testCompileTimeExpression() { Assert.assertNotNull(expression.getLiteral()); Assert.assertEquals(expression.getLiteral().getFieldValue(), "key1=value 1&key2=value@!$2&key3=value%3"); - expression = CalciteSqlParser.compileToExpression("reverse(playerName)"); + expression = compileToExpression("reverse(playerName)"); Assert.assertNotNull(expression.getFunctionCall()); pinotQuery.setFilterExpression(expression); pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery); @@ -2393,7 +2417,7 @@ public void testCompileTimeExpression() { Assert.assertEquals(expression.getFunctionCall().getOperator(), "reverse"); Assert.assertEquals(expression.getFunctionCall().getOperands().get(0).getIdentifier().getName(), "playerName"); - expression = CalciteSqlParser.compileToExpression("reverse('playerName')"); + expression = compileToExpression("reverse('playerName')"); Assert.assertNotNull(expression.getFunctionCall()); pinotQuery.setFilterExpression(expression); pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery); @@ -2401,7 +2425,7 @@ public void testCompileTimeExpression() { Assert.assertNotNull(expression.getLiteral()); Assert.assertEquals(expression.getLiteral().getFieldValue(), "emaNreyalp"); - expression = CalciteSqlParser.compileToExpression("reverse(123)"); + expression = compileToExpression("reverse(123)"); Assert.assertNotNull(expression.getFunctionCall()); pinotQuery.setFilterExpression(expression); pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery); @@ -2409,7 +2433,7 @@ public void testCompileTimeExpression() { Assert.assertNotNull(expression.getLiteral()); Assert.assertEquals(expression.getLiteral().getFieldValue(), "321"); - expression = CalciteSqlParser.compileToExpression("count(*)"); + expression = compileToExpression("count(*)"); Assert.assertNotNull(expression.getFunctionCall()); pinotQuery.setFilterExpression(expression); pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery); @@ -2418,7 +2442,7 @@ public void testCompileTimeExpression() { Assert.assertEquals(expression.getFunctionCall().getOperator(), "count"); Assert.assertEquals(expression.getFunctionCall().getOperands().get(0).getIdentifier().getName(), "*"); - expression = CalciteSqlParser.compileToExpression("toBase64(toUtf8('hello!'))"); + expression = compileToExpression("toBase64(toUtf8('hello!'))"); Assert.assertNotNull(expression.getFunctionCall()); pinotQuery.setFilterExpression(expression); pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery); @@ -2426,7 +2450,7 @@ public void testCompileTimeExpression() { Assert.assertNotNull(expression.getLiteral()); Assert.assertEquals(expression.getLiteral().getFieldValue(), "aGVsbG8h"); - expression = CalciteSqlParser.compileToExpression("fromUtf8(fromBase64('aGVsbG8h'))"); + expression = compileToExpression("fromUtf8(fromBase64('aGVsbG8h'))"); Assert.assertNotNull(expression.getFunctionCall()); pinotQuery.setFilterExpression(expression); pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery); @@ -2434,7 +2458,7 @@ public void testCompileTimeExpression() { Assert.assertNotNull(expression.getLiteral()); Assert.assertEquals(expression.getLiteral().getFieldValue(), "hello!"); - expression = CalciteSqlParser.compileToExpression("fromBase64(foo)"); + expression = compileToExpression("fromBase64(foo)"); Assert.assertNotNull(expression.getFunctionCall()); pinotQuery.setFilterExpression(expression); pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery); @@ -2443,7 +2467,7 @@ public void testCompileTimeExpression() { Assert.assertEquals(expression.getFunctionCall().getOperator(), "frombase64"); Assert.assertEquals(expression.getFunctionCall().getOperands().get(0).getIdentifier().getName(), "foo"); - expression = CalciteSqlParser.compileToExpression("toBase64(foo)"); + expression = compileToExpression("toBase64(foo)"); Assert.assertNotNull(expression.getFunctionCall()); pinotQuery.setFilterExpression(expression); pinotQuery = compileTimeFunctionsInvoker.rewrite(pinotQuery); @@ -2455,56 +2479,54 @@ public void testCompileTimeExpression() { @Test public void testLiteralExpressionCheck() { - Assert.assertTrue(CalciteSqlParser.isLiteralOnlyExpression(CalciteSqlParser.compileToExpression("1123"))); - Assert.assertTrue(CalciteSqlParser.isLiteralOnlyExpression(CalciteSqlParser.compileToExpression("'ab'"))); - Assert.assertTrue( - CalciteSqlParser.isLiteralOnlyExpression(CalciteSqlParser.compileToExpression("AS('ab', randomStr)"))); - Assert.assertTrue( - CalciteSqlParser.isLiteralOnlyExpression(CalciteSqlParser.compileToExpression("AS(123, randomTime)"))); - Assert.assertFalse(CalciteSqlParser.isLiteralOnlyExpression(CalciteSqlParser.compileToExpression("sum(abc)"))); - Assert.assertFalse(CalciteSqlParser.isLiteralOnlyExpression(CalciteSqlParser.compileToExpression("count(*)"))); - Assert.assertFalse(CalciteSqlParser.isLiteralOnlyExpression(CalciteSqlParser.compileToExpression("a+B"))); - Assert.assertFalse(CalciteSqlParser.isLiteralOnlyExpression(CalciteSqlParser.compileToExpression("c+1"))); + Assert.assertTrue(CalciteSqlParser.isLiteralOnlyExpression(compileToExpression("1123"))); + Assert.assertTrue(CalciteSqlParser.isLiteralOnlyExpression(compileToExpression("'ab'"))); + Assert.assertTrue(CalciteSqlParser.isLiteralOnlyExpression(compileToExpression("AS('ab', randomStr)"))); + Assert.assertTrue(CalciteSqlParser.isLiteralOnlyExpression(compileToExpression("AS(123, randomTime)"))); + Assert.assertFalse(CalciteSqlParser.isLiteralOnlyExpression(compileToExpression("sum(abc)"))); + Assert.assertFalse(CalciteSqlParser.isLiteralOnlyExpression(compileToExpression("count(*)"))); + Assert.assertFalse(CalciteSqlParser.isLiteralOnlyExpression(compileToExpression("a+B"))); + Assert.assertFalse(CalciteSqlParser.isLiteralOnlyExpression(compileToExpression("c+1"))); } @Test public void testCaseInsensitiveFilter() { String query = "SELECT count(*) FROM foo where text_match(col, 'expr')"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getFilterExpression().getFunctionCall().getOperator(), "TEXT_MATCH"); query = "SELECT count(*) FROM foo where TEXT_MATCH(col, 'expr')"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getFilterExpression().getFunctionCall().getOperator(), "TEXT_MATCH"); query = "SELECT count(*) FROM foo where regexp_like(col, 'expr')"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getFilterExpression().getFunctionCall().getOperator(), "REGEXP_LIKE"); query = "SELECT count(*) FROM foo where REGEXP_LIKE(col, 'expr')"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getFilterExpression().getFunctionCall().getOperator(), "REGEXP_LIKE"); query = "SELECT count(*) FROM foo where col is not null"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getFilterExpression().getFunctionCall().getOperator(), "IS_NOT_NULL"); Assert.assertEquals( pinotQuery.getFilterExpression().getFunctionCall().getOperands().get(0).getIdentifier().getName(), "col"); query = "SELECT count(*) FROM foo where col IS NOT NULL"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getFilterExpression().getFunctionCall().getOperator(), "IS_NOT_NULL"); Assert.assertEquals( pinotQuery.getFilterExpression().getFunctionCall().getOperands().get(0).getIdentifier().getName(), "col"); query = "SELECT count(*) FROM foo where col is null"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getFilterExpression().getFunctionCall().getOperator(), "IS_NULL"); Assert.assertEquals( pinotQuery.getFilterExpression().getFunctionCall().getOperands().get(0).getIdentifier().getName(), "col"); query = "SELECT count(*) FROM foo where col IS NULL"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getFilterExpression().getFunctionCall().getOperator(), "IS_NULL"); Assert.assertEquals( pinotQuery.getFilterExpression().getFunctionCall().getOperands().get(0).getIdentifier().getName(), "col"); @@ -2513,14 +2535,14 @@ public void testCaseInsensitiveFilter() { @Test public void testNonAggregationGroupByQuery() { String query = "SELECT col1 FROM foo GROUP BY col1"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator().toUpperCase(), "DISTINCT"); Assert.assertEquals( pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "col1"); query = "SELECT col1, col2 FROM foo GROUP BY col1, col2"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator().toUpperCase(), "DISTINCT"); Assert.assertEquals( @@ -2529,7 +2551,7 @@ public void testNonAggregationGroupByQuery() { pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(1).getIdentifier().getName(), "col2"); query = "SELECT col1+col2*5 FROM foo GROUP BY col1+col2*5"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator().toUpperCase(), "DISTINCT"); Assert.assertEquals( @@ -2549,7 +2571,7 @@ public void testNonAggregationGroupByQuery() { .getFunctionCall().getOperands().get(1).getLiteral().getLongValue(), 5L); query = "SELECT col1+col2*5 AS col3 FROM foo GROUP BY col3"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator().toUpperCase(), "DISTINCT"); Assert.assertEquals( @@ -2579,20 +2601,20 @@ public void testNonAggregationGroupByQuery() { @Test public void testInvalidNonAggregationGroupBy() { Assert.assertThrows(SqlCompilationException.class, - () -> CalciteSqlParser.compileToPinotQuery("SELECT col1 FROM foo GROUP BY col1, col2")); + () -> compileToPinotQuery("SELECT col1 FROM foo GROUP BY col1, col2")); Assert.assertThrows(SqlCompilationException.class, - () -> CalciteSqlParser.compileToPinotQuery("SELECT col1, col2 FROM foo GROUP BY col1")); + () -> compileToPinotQuery("SELECT col1, col2 FROM foo GROUP BY col1")); Assert.assertThrows(SqlCompilationException.class, - () -> CalciteSqlParser.compileToPinotQuery("SELECT col1 + col2 FROM foo GROUP BY col1")); + () -> compileToPinotQuery("SELECT col1 + col2 FROM foo GROUP BY col1")); Assert.assertThrows(SqlCompilationException.class, - () -> CalciteSqlParser.compileToPinotQuery("SELECT col1+col2 FROM foo GROUP BY col1,col2")); + () -> compileToPinotQuery("SELECT col1+col2 FROM foo GROUP BY col1,col2")); } @Test public void testFlattenAndOr() { { String query = "SELECT * FROM foo WHERE col1 > 0 AND (col2 > 0 AND col3 > 0) AND col4 > 0"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Function functionCall = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(functionCall.getOperator(), FilterKind.AND.name()); List operands = functionCall.getOperands(); @@ -2603,7 +2625,7 @@ public void testFlattenAndOr() { } { String query = "SELECT * FROM foo WHERE col1 > 0 AND (col2 AND col3 > 0) AND startsWith(col4, 'myStr')"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Function functionCall = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(functionCall.getOperator(), FilterKind.AND.name()); List operands = functionCall.getOperands(); @@ -2621,7 +2643,7 @@ public void testFlattenAndOr() { } { String query = "SELECT * FROM foo WHERE col1 > 0 AND (col2 AND col3 > 0) AND col4 = true"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Function functionCall = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(functionCall.getOperator(), FilterKind.AND.name()); List operands = functionCall.getOperands(); @@ -2639,7 +2661,7 @@ public void testFlattenAndOr() { } { String query = "SELECT * FROM foo WHERE col1 <= 0 OR col2 <= 0 OR (col3 <= 0 OR col4 <= 0)"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Function functionCall = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(functionCall.getOperator(), FilterKind.OR.name()); List operands = functionCall.getOperands(); @@ -2650,7 +2672,7 @@ public void testFlattenAndOr() { } { String query = "SELECT * FROM foo WHERE col1 <= 0 OR col2 OR (col3 <= 0 OR col4)"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Function functionCall = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(functionCall.getOperator(), FilterKind.OR.name()); List operands = functionCall.getOperands(); @@ -2666,7 +2688,7 @@ public void testFlattenAndOr() { { String query = "SELECT * FROM foo WHERE col1 > 0 AND ((col2 > 0 AND col3 > 0) AND (col1 <= 0 OR (col2 <= 0 OR " + "(col3 <= 0 OR col4 <= 0) OR (col3 > 0 AND col4 > 0))))"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Function functionCall = pinotQuery.getFilterExpression().getFunctionCall(); Assert.assertEquals(functionCall.getOperator(), FilterKind.AND.name()); List operands = functionCall.getOperands(); @@ -2695,7 +2717,7 @@ public void testFlattenAndOr() { public void testHavingClause() { { String query = "SELECT SUM(col1), col2 FROM foo WHERE true GROUP BY col2 HAVING SUM(col1) > 10"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Function functionCall = pinotQuery.getHavingExpression().getFunctionCall(); Assert.assertEquals(functionCall.getOperator(), FilterKind.GREATER_THAN.name()); List operands = functionCall.getOperands(); @@ -2706,7 +2728,7 @@ public void testHavingClause() { { String query = "SELECT SUM(col1), col2 FROM foo WHERE true GROUP BY col2 " + "HAVING SUM(col1) > 10 AND SUM(col3) > 5 AND SUM(col4) > 15"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Function functionCall = pinotQuery.getHavingExpression().getFunctionCall(); Assert.assertEquals(functionCall.getOperator(), FilterKind.AND.name()); List operands = functionCall.getOperands(); @@ -2721,7 +2743,7 @@ public void testHavingClause() { public void testPostAggregation() { { String query = "SELECT SUM(col1) * SUM(col2) FROM foo"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); List selectList = pinotQuery.getSelectList(); Assert.assertEquals(selectList.size(), 1); Function functionCall = selectList.get(0).getFunctionCall(); @@ -2734,7 +2756,7 @@ public void testPostAggregation() { } { String query = "SELECT SUM(col1), col2 FROM foo GROUP BY col2 ORDER BY MAX(col1) - MAX(col3)"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); List orderByList = pinotQuery.getOrderByList(); Assert.assertEquals(orderByList.size(), 1); Function functionCall = orderByList.get(0).getFunctionCall(); @@ -2752,7 +2774,7 @@ public void testPostAggregation() { { // Having will be rewritten to (SUM(col1) + SUM(col3)) - MAX(col4) > 0 String query = "SELECT SUM(col1), col2 FROM foo GROUP BY col2 HAVING SUM(col1) + SUM(col3) > MAX(col4)"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Function functionCall = pinotQuery.getHavingExpression().getFunctionCall(); Assert.assertEquals(functionCall.getOperator(), FilterKind.GREATER_THAN.name()); List operands = functionCall.getOperands(); @@ -2777,7 +2799,7 @@ public void testArrayAggregationRewrite() { String sql; PinotQuery pinotQuery; sql = "SELECT sum(array_sum(a)) FROM Foo"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "summv"); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().size(), 1); @@ -2785,7 +2807,7 @@ public void testArrayAggregationRewrite() { pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "a"); sql = "SELECT MIN(ARRAYMIN(a)) FROM Foo"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "minmv"); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().size(), 1); @@ -2793,7 +2815,7 @@ public void testArrayAggregationRewrite() { pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "a"); sql = "SELECT Max(ArrayMax(a)) FROM Foo"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "maxmv"); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().size(), 1); @@ -2801,7 +2823,7 @@ public void testArrayAggregationRewrite() { pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().get(0).getIdentifier().getName(), "a"); sql = "SELECT Max(ArrayMax(a)) + 1 FROM Foo"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getSelectListSize(), 1); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperator(), "plus"); Assert.assertEquals(pinotQuery.getSelectList().get(0).getFunctionCall().getOperands().size(), 2); @@ -2880,7 +2902,7 @@ public void testSupportedDistinctQueries() { private void testUnsupportedDistinctQuery(String query, String errorMessage) { try { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Assert.fail("Query should have failed"); } catch (Exception e) { Assert.assertEquals(errorMessage, e.getMessage()); @@ -2888,7 +2910,7 @@ private void testUnsupportedDistinctQuery(String query, String errorMessage) { } private void testSupportedDistinctQuery(String query) { - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); Assert.assertNotNull(pinotQuery); } @@ -2897,27 +2919,27 @@ public void testQueryWithSemicolon() { String sql; PinotQuery pinotQuery; sql = "SELECT col1, col2 FROM foo;"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getSelectListSize(), 2); Assert.assertEquals(pinotQuery.getSelectList().get(0).getIdentifier().getName(), "col1"); Assert.assertEquals(pinotQuery.getSelectList().get(1).getIdentifier().getName(), "col2"); // Query having extra white spaces before the semicolon sql = "SELECT col1, col2 FROM foo ;"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getSelectListSize(), 2); Assert.assertEquals(pinotQuery.getSelectList().get(0).getIdentifier().getName(), "col1"); Assert.assertEquals(pinotQuery.getSelectList().get(1).getIdentifier().getName(), "col2"); // Query having leading and trailing whitespaces sql = " SELECT col1, col2 FROM foo; "; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getSelectListSize(), 2); Assert.assertEquals(pinotQuery.getSelectList().get(0).getIdentifier().getName(), "col1"); Assert.assertEquals(pinotQuery.getSelectList().get(1).getIdentifier().getName(), "col2"); sql = "SELECT col1, count(*) FROM foo group by col1;"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getSelectListSize(), 2); Assert.assertEquals(pinotQuery.getSelectList().get(0).getIdentifier().getName(), "col1"); Assert.assertEquals(pinotQuery.getGroupByListSize(), 1); @@ -2927,14 +2949,14 @@ public void testQueryWithSemicolon() { // Check for Option SQL Query // TODO: change to SET syntax sql = "SELECT col1, count(*) FROM foo group by col1 option(skipUpsert=true);"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getQueryOptionsSize(), 1); Assert.assertTrue(pinotQuery.getQueryOptions().containsKey("skipUpsert")); // Check for the query where the literal has semicolon // TODO: change to SET syntax sql = "select col1, count(*) from foo where col1 = 'x;y' GROUP BY col1 option(skipUpsert=true);"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + pinotQuery = compileToPinotQuery(sql); Assert.assertEquals(pinotQuery.getQueryOptionsSize(), 1); Assert.assertTrue(pinotQuery.getQueryOptions().containsKey("skipUpsert")); } @@ -2943,41 +2965,40 @@ public void testQueryWithSemicolon() { public void testCatalogNameResolvedToDefault() { // Pinot doesn't support catalog. However, for backward compatibility, if a catalog is provided, we will resolve // the table from our default catalog. this means `a.foo` will be equivalent to `foo`. - PinotQuery randomCatalogQuery = CalciteSqlParser.compileToPinotQuery("SELECT count(*) FROM rand_catalog.foo"); - PinotQuery defaultCatalogQuery = CalciteSqlParser.compileToPinotQuery("SELECT count(*) FROM default.foo"); + PinotQuery randomCatalogQuery = compileToPinotQuery("SELECT count(*) FROM rand_catalog.foo"); + PinotQuery defaultCatalogQuery = compileToPinotQuery("SELECT count(*) FROM default.foo"); Assert.assertEquals(randomCatalogQuery.getDataSource().getTableName(), "rand_catalog.foo"); Assert.assertEquals(defaultCatalogQuery.getDataSource().getTableName(), "default.foo"); } @Test public void testInvalidQueryWithSemicolon() { - Assert.expectThrows(SqlCompilationException.class, () -> CalciteSqlParser.compileToPinotQuery(";")); + Assert.expectThrows(SqlCompilationException.class, () -> compileToPinotQuery(";")); - Assert.expectThrows(SqlCompilationException.class, () -> CalciteSqlParser.compileToPinotQuery(";;;;")); + Assert.expectThrows(SqlCompilationException.class, () -> compileToPinotQuery(";;;;")); Assert.expectThrows(SqlCompilationException.class, - () -> CalciteSqlParser.compileToPinotQuery("SELECT col1, count(*) FROM foo GROUP BY ; col1")); + () -> compileToPinotQuery("SELECT col1, count(*) FROM foo GROUP BY ; col1")); // Query having multiple SQL statements - Assert.expectThrows(SqlCompilationException.class, () -> CalciteSqlParser.compileToPinotQuery( + Assert.expectThrows(SqlCompilationException.class, () -> compileToPinotQuery( "SELECT col1, count(*) FROM foo GROUP BY col1; SELECT col2, count(*) FROM foo GROUP BY col2")); // Query having multiple SQL statements with trailing and leading whitespaces - Assert.expectThrows(SqlCompilationException.class, () -> CalciteSqlParser.compileToPinotQuery( + Assert.expectThrows(SqlCompilationException.class, () -> compileToPinotQuery( " SELECT col1, count(*) FROM foo GROUP BY col1; " + "SELECT col2, count(*) FROM foo GROUP BY col2 ")); } @Test public void testInvalidQueryWithAggregateFunction() { - Assert.expectThrows(SqlCompilationException.class, - () -> CalciteSqlParser.compileToPinotQuery("SELECT col1, count(*) from foo")); + Assert.expectThrows(SqlCompilationException.class, () -> compileToPinotQuery("SELECT col1, count(*) from foo")); Assert.expectThrows(SqlCompilationException.class, - () -> CalciteSqlParser.compileToPinotQuery("SELECT UPPER(col1), count(*) from foo")); + () -> compileToPinotQuery("SELECT UPPER(col1), count(*) from foo")); Assert.expectThrows(SqlCompilationException.class, - () -> CalciteSqlParser.compileToPinotQuery("SELECT UPPER(col1), avg(col2) from foo")); + () -> compileToPinotQuery("SELECT UPPER(col1), avg(col2) from foo")); } /** @@ -2986,7 +3007,8 @@ public void testInvalidQueryWithAggregateFunction() { @Test public void testParserExtensionImpl() { String customSql = "INSERT INTO db.tbl FROM FILE 'file:///tmp/file1', FILE 'file:///tmp/file2'"; - SqlNodeAndOptions sqlNodeAndOptions = CalciteSqlParser.compileToSqlNodeAndOptions(customSql);; + SqlNodeAndOptions sqlNodeAndOptions = CalciteSqlParser.compileToSqlNodeAndOptions(customSql); + ; Assert.assertTrue(sqlNodeAndOptions.getSqlNode() instanceof SqlInsertFromFile); Assert.assertEquals(sqlNodeAndOptions.getSqlType(), PinotSqlType.DML); } @@ -2997,7 +3019,7 @@ public void shouldParseBasicAtTimeZoneExtension() { String sql = "SELECT ts AT TIME ZONE 'pst' FROM myTable;"; // When: - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + PinotQuery pinotQuery = compileToPinotQuery(sql); // Then: Assert.assertEquals(pinotQuery.getSelectListSize(), 1); @@ -3014,7 +3036,7 @@ public void shouldParseNestedTimeExprAtTimeZoneExtension() { String sql = "SELECT ts + 123 AT TIME ZONE 'pst' FROM myTable;"; // When: - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + PinotQuery pinotQuery = compileToPinotQuery(sql); // Then: Assert.assertEquals(pinotQuery.getSelectListSize(), 1); @@ -3036,7 +3058,7 @@ public void shouldParseOutsideExprAtTimeZoneExtension() { String sql = "SELECT ts AT TIME ZONE 'pst' > 123 FROM myTable;"; // When: - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(sql); + PinotQuery pinotQuery = compileToPinotQuery(sql); // Then: Assert.assertEquals(pinotQuery.getSelectListSize(), 1); @@ -3054,7 +3076,7 @@ public void shouldParseOutsideExprAtTimeZoneExtension() { @Test public void testJoin() { String query = "SELECT T1.a, T2.b FROM T1 JOIN T2"; - PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + PinotQuery pinotQuery = compileToPinotQuery(query); DataSource dataSource = pinotQuery.getDataSource(); Assert.assertNull(dataSource.getTableName()); Assert.assertNull(dataSource.getSubquery()); @@ -3066,7 +3088,7 @@ public void testJoin() { Assert.assertNull(join.getCondition()); query = "SELECT T1.a, T2.b FROM T1 INNER JOIN T2 ON T1.key = T2.key"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); dataSource = pinotQuery.getDataSource(); Assert.assertNull(dataSource.getTableName()); Assert.assertNull(dataSource.getSubquery()); @@ -3075,10 +3097,10 @@ public void testJoin() { Assert.assertEquals(join.getType(), JoinType.INNER); Assert.assertEquals(join.getLeft().getTableName(), "T1"); Assert.assertEquals(join.getRight().getTableName(), "T2"); - Assert.assertEquals(join.getCondition(), CalciteSqlParser.compileToExpression("T1.key = T2.key")); + Assert.assertEquals(join.getCondition(), compileToExpression("T1.key = T2.key")); query = "SELECT T1.a, T2.b FROM T1 FULL JOIN T2 ON T1.key = T2.key"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); dataSource = pinotQuery.getDataSource(); Assert.assertNull(dataSource.getTableName()); Assert.assertNull(dataSource.getSubquery()); @@ -3087,10 +3109,10 @@ public void testJoin() { Assert.assertEquals(join.getType(), JoinType.FULL); Assert.assertEquals(join.getLeft().getTableName(), "T1"); Assert.assertEquals(join.getRight().getTableName(), "T2"); - Assert.assertEquals(join.getCondition(), CalciteSqlParser.compileToExpression("T1.key = T2.key")); + Assert.assertEquals(join.getCondition(), compileToExpression("T1.key = T2.key")); query = "SELECT T1.a, T2.b FROM T1 LEFT JOIN T2 ON T1.a > T2.b"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); dataSource = pinotQuery.getDataSource(); Assert.assertNull(dataSource.getTableName()); Assert.assertNull(dataSource.getSubquery()); @@ -3099,11 +3121,11 @@ public void testJoin() { Assert.assertEquals(join.getType(), JoinType.LEFT); Assert.assertEquals(join.getLeft().getTableName(), "T1"); Assert.assertEquals(join.getRight().getTableName(), "T2"); - Assert.assertEquals(join.getCondition(), CalciteSqlParser.compileToExpression("T1.a > T2.b")); + Assert.assertEquals(join.getCondition(), compileToExpression("T1.a > T2.b")); query = "SELECT T1.a, T2.b FROM T1 RIGHT JOIN (SELECT a, COUNT(*) AS b FROM T3 GROUP BY a) AS T2 ON T1.key = T2.key"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); dataSource = pinotQuery.getDataSource(); Assert.assertNull(dataSource.getTableName()); Assert.assertNull(dataSource.getSubquery()); @@ -3114,13 +3136,12 @@ public void testJoin() { DataSource right = join.getRight(); Assert.assertEquals(right.getTableName(), "T2"); PinotQuery rightSubquery = right.getSubquery(); - Assert.assertEquals(rightSubquery, - CalciteSqlParser.compileToPinotQuery("SELECT a, COUNT(*) AS b FROM T3 GROUP BY a")); - Assert.assertEquals(join.getCondition(), CalciteSqlParser.compileToExpression("T1.key = T2.key")); + Assert.assertEquals(rightSubquery, compileToPinotQuery("SELECT a, COUNT(*) AS b FROM T3 GROUP BY a")); + Assert.assertEquals(join.getCondition(), compileToExpression("T1.key = T2.key")); query = "SELECT T1.a, T2.b FROM T1 JOIN (SELECT key, COUNT(*) AS b FROM T3 JOIN T4 GROUP BY key) AS T2 " + "ON T1.key = T2.key"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); dataSource = pinotQuery.getDataSource(); Assert.assertNull(dataSource.getTableName()); Assert.assertNull(dataSource.getSubquery()); @@ -3131,13 +3152,12 @@ public void testJoin() { right = join.getRight(); Assert.assertEquals(right.getTableName(), "T2"); rightSubquery = right.getSubquery(); - Assert.assertEquals(rightSubquery, - CalciteSqlParser.compileToPinotQuery("SELECT key, COUNT(*) AS b FROM T3 JOIN T4 GROUP BY key")); - Assert.assertEquals(join.getCondition(), CalciteSqlParser.compileToExpression("T1.key = T2.key")); + Assert.assertEquals(rightSubquery, compileToPinotQuery("SELECT key, COUNT(*) AS b FROM T3 JOIN T4 GROUP BY key")); + Assert.assertEquals(join.getCondition(), compileToExpression("T1.key = T2.key")); // test for self join queries. query = "SELECT T1.a FROM T1 JOIN(SELECT key FROM T1) as self ON T1.key=self.key"; - pinotQuery = CalciteSqlParser.compileToPinotQuery(query); + pinotQuery = compileToPinotQuery(query); dataSource = pinotQuery.getDataSource(); Assert.assertNull(dataSource.getTableName()); Assert.assertNull(dataSource.getSubquery()); @@ -3148,72 +3168,72 @@ public void testJoin() { right = join.getRight(); Assert.assertEquals(right.getTableName(), "self"); rightSubquery = right.getSubquery(); - Assert.assertEquals(rightSubquery, CalciteSqlParser.compileToPinotQuery("SELECT key FROM T1")); - Assert.assertEquals(join.getCondition(), CalciteSqlParser.compileToExpression("T1.key = self.key")); + Assert.assertEquals(rightSubquery, compileToPinotQuery("SELECT key FROM T1")); + Assert.assertEquals(join.getCondition(), compileToExpression("T1.key = self.key")); } @Test public void testInPredicateWithOutNullPasses() { - CalciteSqlParser.compileToPinotQuery("SELECT * FROM testTable WHERE column1 IN (1, 2) AND column2 = 1"); + compileToPinotQuery("SELECT * FROM testTable WHERE column1 IN (1, 2) AND column2 = 1"); } @Test(expectedExceptions = {IllegalStateException.class}, expectedExceptionsMessageRegExp = "Using NULL in IN " + "filter is not supported") public void testSingleInPredicateWithNullFails() { - CalciteSqlParser.compileToPinotQuery("SELECT * FROM testTable WHERE column1 IN (1, 2, NULL)"); + compileToPinotQuery("SELECT * FROM testTable WHERE column1 IN (1, 2, NULL)"); } @Test(expectedExceptions = {IllegalStateException.class}, expectedExceptionsMessageRegExp = "Using NULL in NOT_IN " + "filter is not supported") public void testSingleNotInPredicateWithNullFails() { - CalciteSqlParser.compileToPinotQuery("SELECT * FROM testTable WHERE column1 NOT IN (1, 2, NULL)"); + compileToPinotQuery("SELECT * FROM testTable WHERE column1 NOT IN (1, 2, NULL)"); } @Test(expectedExceptions = {IllegalStateException.class}, expectedExceptionsMessageRegExp = "Using NULL in IN " + "filter is not supported") public void testAndFilterWithNullFails() { - CalciteSqlParser.compileToPinotQuery("SELECT * FROM testTable WHERE column1 IN (1, 2, NULL) AND column2 = 1"); + compileToPinotQuery("SELECT * FROM testTable WHERE column1 IN (1, 2, NULL) AND column2 = 1"); } @Test(expectedExceptions = {IllegalStateException.class}, expectedExceptionsMessageRegExp = "Using NULL in NOT_IN " + "filter is not supported") public void testOrFilterWithNullFails() { - CalciteSqlParser.compileToPinotQuery("SELECT * FROM testTable WHERE column1 NOT IN (1, 2, NULL) OR column2 = 1"); + compileToPinotQuery("SELECT * FROM testTable WHERE column1 NOT IN (1, 2, NULL) OR column2 = 1"); } @Test(expectedExceptions = {IllegalStateException.class}, expectedExceptionsMessageRegExp = "Using NULL in IN " + "filter is not supported") public void testNotFilterWithNullFails() { - CalciteSqlParser.compileToPinotQuery("SELECT * FROM testTable WHERE NOT(column1 IN (NULL, 1, 2))"); + compileToPinotQuery("SELECT * FROM testTable WHERE NOT(column1 IN (NULL, 1, 2))"); } @Test(expectedExceptions = {IllegalStateException.class}, expectedExceptionsMessageRegExp = "Using NULL in " + "GREATER_THAN filter is not supported") public void testGreaterThanNullFilterFails() { - CalciteSqlParser.compileToPinotQuery("SELECT * FROM testTable WHERE column1 > null"); + compileToPinotQuery("SELECT * FROM testTable WHERE column1 > null"); } @Test(expectedExceptions = {IllegalStateException.class}, expectedExceptionsMessageRegExp = "Using NULL in " + "LESS_THAN_OR_EQUAL filter is not supported") public void testLessThanOrEqualNullFilterFails() { - CalciteSqlParser.compileToPinotQuery("SELECT * FROM testTable WHERE column1 <= null"); + compileToPinotQuery("SELECT * FROM testTable WHERE column1 <= null"); } @Test(expectedExceptions = {IllegalStateException.class}, expectedExceptionsMessageRegExp = "Using NULL in LIKE " + "filter is not supported") public void testLikeFilterWithNullFails() { - CalciteSqlParser.compileToPinotQuery("SELECT * FROM testTable WHERE column1 LIKE null"); + compileToPinotQuery("SELECT * FROM testTable WHERE column1 LIKE null"); } @Test(expectedExceptions = {IllegalStateException.class}, expectedExceptionsMessageRegExp = "Using NULL in EQUALS " + "filter is not supported") public void testEqualFilterWithNullFails() { - CalciteSqlParser.compileToPinotQuery("SELECT * FROM testTable WHERE column1 = null"); + compileToPinotQuery("SELECT * FROM testTable WHERE column1 = null"); } @Test(expectedExceptions = {IllegalStateException.class}, expectedExceptionsMessageRegExp = "Using NULL in " + "NOT_EQUALS filter is not supported") public void testInEqualFilterWithNullFails() { - CalciteSqlParser.compileToPinotQuery("SELECT * FROM testTable WHERE column1 != null"); + compileToPinotQuery("SELECT * FROM testTable WHERE column1 != null"); } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/MergeEqInFilterOptimizer.java b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/MergeEqInFilterOptimizer.java index 49a9ad171f2c..6836f8022617 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/MergeEqInFilterOptimizer.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/MergeEqInFilterOptimizer.java @@ -19,7 +19,6 @@ package org.apache.pinot.core.query.optimizer.filter; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -164,16 +163,12 @@ private Expression optimize(Expression filterExpression) { private static Expression getFilterExpression(Expression lhs, Set values) { int numValues = values.size(); if (numValues == 1) { - Expression eqFilter = RequestUtils.getFunctionExpression(FilterKind.EQUALS.name()); - eqFilter.getFunctionCall().setOperands(Arrays.asList(lhs, values.iterator().next())); - return eqFilter; + return RequestUtils.getFunctionExpression(FilterKind.EQUALS.name(), lhs, values.iterator().next()); } else { - Expression inFilter = RequestUtils.getFunctionExpression(FilterKind.IN.name()); List operands = new ArrayList<>(numValues + 1); operands.add(lhs); operands.addAll(values); - inFilter.getFunctionCall().setOperands(operands); - return inFilter; + return RequestUtils.getFunctionExpression(FilterKind.IN.name(), operands); } } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/MergeRangeFilterOptimizer.java b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/MergeRangeFilterOptimizer.java index 1d0b91cc8809..68f895e0d69e 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/MergeRangeFilterOptimizer.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/MergeRangeFilterOptimizer.java @@ -19,7 +19,6 @@ package org.apache.pinot.core.query.optimizer.filter; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -154,9 +153,7 @@ private static Comparable getComparable(Expression literalExpression, DataType d * Helper method to construct a RANGE predicate filter Expression from the given column and range. */ private static Expression getRangeFilterExpression(String column, Range range) { - Expression rangeFilter = RequestUtils.getFunctionExpression(FilterKind.RANGE.name()); - rangeFilter.getFunctionCall().setOperands(Arrays.asList(RequestUtils.getIdentifierExpression(column), - RequestUtils.getLiteralExpression(range.getRangeString()))); - return rangeFilter; + return RequestUtils.getFunctionExpression(FilterKind.RANGE.name(), RequestUtils.getIdentifierExpression(column), + RequestUtils.getLiteralExpression(range.getRangeString())); } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TextMatchFilterOptimizer.java b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TextMatchFilterOptimizer.java index 8c742cfc98a4..c6b2e29838f6 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TextMatchFilterOptimizer.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TextMatchFilterOptimizer.java @@ -19,8 +19,6 @@ package org.apache.pinot.core.query.optimizer.filter; import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -161,26 +159,20 @@ private Expression getNewFilter(String operator, List newChildren, } else { mergedTextMatchFilter = String.join(SPACE + operator + SPACE, literals); } - Expression mergedTextMatchExpression = RequestUtils.getFunctionExpression(FilterKind.TEXT_MATCH.name()); - Expression mergedTextMatchFilterExpression = RequestUtils.getLiteralExpression("(" + mergedTextMatchFilter + ")"); - mergedTextMatchExpression.getFunctionCall() - .setOperands(Arrays.asList(entry.getKey(), mergedTextMatchFilterExpression)); - + Expression mergedTextMatchExpression = + RequestUtils.getFunctionExpression(FilterKind.TEXT_MATCH.name(), entry.getKey(), + RequestUtils.getLiteralExpression("(" + mergedTextMatchFilter + ")")); if (allNot) { - Expression notExpression = RequestUtils.getFunctionExpression(FilterKind.NOT.name()); - notExpression.getFunctionCall().setOperands(Collections.singletonList(mergedTextMatchExpression)); - newChildren.add(notExpression); - continue; + newChildren.add(RequestUtils.getFunctionExpression(FilterKind.NOT.name(), mergedTextMatchExpression)); + } else { + newChildren.add(mergedTextMatchExpression); } - newChildren.add(mergedTextMatchExpression); } if (newChildren.size() == 1) { return newChildren.get(0); } assert operator.equals(FilterKind.OR.name()) || operator.equals(FilterKind.AND.name()); - Expression newExpression = RequestUtils.getFunctionExpression(operator); - newExpression.getFunctionCall().setOperands(newChildren); - return newExpression; + return RequestUtils.getFunctionExpression(operator, newChildren); } } diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizer.java b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizer.java index 7a2603ce9089..3979eb029d0d 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizer.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/filter/TimePredicateFilterOptimizer.java @@ -20,7 +20,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; -import java.util.Arrays; +import java.util.ArrayList; import java.util.List; import java.util.concurrent.TimeUnit; import javax.annotation.Nullable; @@ -234,9 +234,7 @@ private void optimizeTimeConvert(Function filterFunction, FilterKind filterKind) // Step 3: Rewrite the filter function String rangeString = new Range(lowerValue, lowerInclusive, upperValue, upperInclusive).getRangeString(); - filterFunction.setOperator(FilterKind.RANGE.name()); - filterFunction.setOperands( - Arrays.asList(timeConvertOperands.get(0), RequestUtils.getLiteralExpression(rangeString))); + rewriteToRange(filterFunction, timeConvertOperands.get(0), rangeString); } catch (Exception e) { LOGGER.warn("Caught exception while optimizing TIME_CONVERT predicate: {}, skipping the optimization", filterFunction, e); @@ -400,9 +398,7 @@ && isStringLiteral(dateTimeConvertOperands.get(3)), // Step 3: Rewrite the filter function String rangeString = new Range(lowerValue, lowerInclusive, upperValue, upperInclusive).getRangeString(); - filterFunction.setOperator(FilterKind.RANGE.name()); - filterFunction.setOperands( - Arrays.asList(dateTimeConvertOperands.get(0), RequestUtils.getLiteralExpression(rangeString))); + rewriteToRange(filterFunction, dateTimeConvertOperands.get(0), rangeString); } catch (Exception e) { LOGGER.warn("Caught exception while optimizing DATE_TIME_CONVERT predicate: {}, skipping the optimization", filterFunction, e); @@ -419,4 +415,13 @@ private boolean isStringLiteral(Expression expression) { private long ceil(long millisValue, long granularityMillis) { return (millisValue + granularityMillis - 1) / granularityMillis * granularityMillis; } + + private static void rewriteToRange(Function filterFunction, Expression expression, String rangeString) { + filterFunction.setOperator(FilterKind.RANGE.name()); + // NOTE: Create an ArrayList because we might need to modify the list later + List newOperands = new ArrayList<>(2); + newOperands.add(expression); + newOperands.add(RequestUtils.getLiteralExpression(rangeString)); + filterFunction.setOperands(newOperands); + } } diff --git a/pinot-core/src/test/java/org/apache/pinot/core/plan/maker/QueryOverrideWithHintsTest.java b/pinot-core/src/test/java/org/apache/pinot/core/plan/maker/QueryOverrideWithHintsTest.java index 57f169258ca8..522f5881916b 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/plan/maker/QueryOverrideWithHintsTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/plan/maker/QueryOverrideWithHintsTest.java @@ -223,9 +223,9 @@ public void testNotOverrideWithExpressionOverrideHints() { public void testRewriteExpressionsWithHints() { PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery( "SELECT datetrunc('MONTH', ts), count(*), sum(abc) from myTable group by datetrunc('MONTH', ts) "); - Expression dateTruncFunctionExpr = RequestUtils.getFunctionExpression("datetrunc"); - dateTruncFunctionExpr.getFunctionCall().setOperands(new ArrayList<>( - ImmutableList.of(RequestUtils.getLiteralExpression("MONTH"), RequestUtils.getIdentifierExpression("ts")))); + Expression dateTruncFunctionExpr = + RequestUtils.getFunctionExpression("datetrunc", RequestUtils.getLiteralExpression("MONTH"), + RequestUtils.getIdentifierExpression("ts")); Expression timestampIndexColumn = RequestUtils.getIdentifierExpression("$ts$MONTH"); pinotQuery.setExpressionOverrideHints(ImmutableMap.of(dateTruncFunctionExpr, timestampIndexColumn)); QueryContext queryContext = QueryContextConverterUtils.getQueryContext(pinotQuery); @@ -238,9 +238,9 @@ public void testRewriteExpressionsWithHints() { public void testNotRewriteExpressionsWithHints() { PinotQuery pinotQuery = CalciteSqlParser.compileToPinotQuery( "SELECT datetrunc('DAY', ts), count(*), sum(abc) from myTable group by datetrunc('DAY', ts)"); - Expression dateTruncFunctionExpr = RequestUtils.getFunctionExpression("datetrunc"); - dateTruncFunctionExpr.getFunctionCall().setOperands(new ArrayList<>( - ImmutableList.of(RequestUtils.getLiteralExpression("DAY"), RequestUtils.getIdentifierExpression("ts")))); + Expression dateTruncFunctionExpr = + RequestUtils.getFunctionExpression("datetrunc", RequestUtils.getLiteralExpression("DAY"), + RequestUtils.getIdentifierExpression("ts")); Expression timestampIndexColumn = RequestUtils.getIdentifierExpression("$ts$DAY"); pinotQuery.setExpressionOverrideHints(ImmutableMap.of(dateTruncFunctionExpr, timestampIndexColumn)); QueryContext queryContext = QueryContextConverterUtils.getQueryContext(pinotQuery); diff --git a/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/QueryOptimizerTest.java b/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/QueryOptimizerTest.java index 848f458742d2..62cf2e2b44b8 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/QueryOptimizerTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/QueryOptimizerTest.java @@ -79,10 +79,8 @@ public void testFlattenAndOrFilter() { } private static Expression getEqFilterExpression(String column, Object value) { - Expression eqFilterExpression = RequestUtils.getFunctionExpression(FilterKind.EQUALS.name()); - eqFilterExpression.getFunctionCall().setOperands( - Arrays.asList(RequestUtils.getIdentifierExpression(column), RequestUtils.getLiteralExpression(value))); - return eqFilterExpression; + return RequestUtils.getFunctionExpression(FilterKind.EQUALS.name(), RequestUtils.getIdentifierExpression(column), + RequestUtils.getLiteralExpression(value)); } @Test @@ -182,10 +180,8 @@ public void testMergeTextMatchFilter() { } private static Expression getRangeFilterExpression(String column, String rangeString) { - Expression rangeFilterExpression = RequestUtils.getFunctionExpression(FilterKind.RANGE.name()); - rangeFilterExpression.getFunctionCall().setOperands( - Arrays.asList(RequestUtils.getIdentifierExpression(column), RequestUtils.getLiteralExpression(rangeString))); - return rangeFilterExpression; + return RequestUtils.getFunctionExpression(FilterKind.RANGE.name(), RequestUtils.getIdentifierExpression(column), + RequestUtils.getLiteralExpression(rangeString)); } @Test diff --git a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/plan/server/ServerPlanRequestUtils.java b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/plan/server/ServerPlanRequestUtils.java index 9b8d19d77de5..f8cb04469805 100644 --- a/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/plan/server/ServerPlanRequestUtils.java +++ b/pinot-query-runtime/src/main/java/org/apache/pinot/query/runtime/plan/server/ServerPlanRequestUtils.java @@ -18,7 +18,6 @@ */ package org.apache.pinot.query.runtime.plan.server; -import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import java.util.ArrayList; import java.util.Arrays; @@ -229,14 +228,13 @@ private static void attachTimeBoundary(PinotQuery pinotQuery, TimeBoundaryInfo t String timeColumn = timeBoundaryInfo.getTimeColumn(); String timeValue = timeBoundaryInfo.getTimeValue(); Expression timeFilterExpression = RequestUtils.getFunctionExpression( - isOfflineRequest ? FilterKind.LESS_THAN_OR_EQUAL.name() : FilterKind.GREATER_THAN.name()); - timeFilterExpression.getFunctionCall().setOperands( - Arrays.asList(RequestUtils.getIdentifierExpression(timeColumn), RequestUtils.getLiteralExpression(timeValue))); + isOfflineRequest ? FilterKind.LESS_THAN_OR_EQUAL.name() : FilterKind.GREATER_THAN.name(), + RequestUtils.getIdentifierExpression(timeColumn), RequestUtils.getLiteralExpression(timeValue)); Expression filterExpression = pinotQuery.getFilterExpression(); if (filterExpression != null) { - Expression andFilterExpression = RequestUtils.getFunctionExpression(FilterKind.AND.name()); - andFilterExpression.getFunctionCall().setOperands(Arrays.asList(filterExpression, timeFilterExpression)); + Expression andFilterExpression = + RequestUtils.getFunctionExpression(FilterKind.AND.name(), filterExpression, timeFilterExpression); pinotQuery.setFilterExpression(andFilterExpression); } else { pinotQuery.setFilterExpression(timeFilterExpression); @@ -253,21 +251,26 @@ static void attachDynamicFilter(PinotQuery pinotQuery, JoinNode.JoinKeys joinKey List expressions = new ArrayList<>(); for (int i = 0; i < leftJoinKeys.size(); i++) { Expression leftExpr = pinotQuery.getSelectList().get(leftJoinKeys.get(i)); - if (dataContainer.size() == 0) { + if (dataContainer.isEmpty()) { // put a constant false expression - Expression constantFalseExpr = RequestUtils.getLiteralExpression(false); - expressions.add(constantFalseExpr); + expressions.add(RequestUtils.getLiteralExpression(false)); } else { int rightIdx = rightJoinKeys.get(i); - Expression inFilterExpr = RequestUtils.getFunctionExpression(FilterKind.IN.name()); List operands = new ArrayList<>(dataContainer.size() + 1); operands.add(leftExpr); operands.addAll(computeInOperands(dataContainer, dataSchema, rightIdx)); - inFilterExpr.getFunctionCall().setOperands(operands); - expressions.add(inFilterExpr); + expressions.add(RequestUtils.getFunctionExpression(FilterKind.IN.name(), operands)); } } - attachFilterExpression(pinotQuery, FilterKind.AND, expressions); + Expression filterExpression = pinotQuery.getFilterExpression(); + if (filterExpression != null) { + expressions.add(filterExpression); + } + if (expressions.size() > 1) { + pinotQuery.setFilterExpression(RequestUtils.getFunctionExpression(FilterKind.AND.name(), expressions)); + } else { + pinotQuery.setFilterExpression(expressions.get(0)); + } } private static List computeInOperands(List dataContainer, DataSchema dataSchema, int colIdx) { @@ -335,23 +338,4 @@ private static List computeInOperands(List dataContainer, } return expressions; } - - /** - * Attach Filter Expression to existing PinotQuery. - */ - private static void attachFilterExpression(PinotQuery pinotQuery, FilterKind attachKind, List exprs) { - Preconditions.checkState(attachKind == FilterKind.AND || attachKind == FilterKind.OR); - Expression filterExpression = pinotQuery.getFilterExpression(); - List arrayList = new ArrayList<>(exprs); - if (filterExpression != null) { - arrayList.add(filterExpression); - } - if (arrayList.size() > 1) { - Expression attachFilterExpression = RequestUtils.getFunctionExpression(attachKind.name()); - attachFilterExpression.getFunctionCall().setOperands(arrayList); - pinotQuery.setFilterExpression(attachFilterExpression); - } else { - pinotQuery.setFilterExpression(arrayList.get(0)); - } - } } From bf28a83958e069f868e794f4a5271328d5a5f525 Mon Sep 17 00:00:00 2001 From: Yash Mayya Date: Wed, 1 May 2024 07:35:34 +0530 Subject: [PATCH 54/58] Use more efficient variants of URLEncoder::encode and URLDecoder::decode (#13030) --- .../function/scalar/StringFunctions.java | 15 ++++------- .../apache/pinot/common/utils/URIUtils.java | 15 +++-------- .../SegmentGenerationUtilsTest.java | 8 +++--- .../pinot/integration/tests/ClusterTest.java | 8 +++--- .../tests/HybridClusterIntegrationTest.java | 6 ++--- .../hadoop/HadoopSegmentCreationMapper.java | 6 ++--- .../SparkSegmentGenerationJobRunner.java | 6 ++--- .../SparkSegmentGenerationJobRunner.java | 6 ++--- .../SegmentGenerationJobRunner.java | 4 +-- .../plugin/filesystem/AzurePinotFSUtil.java | 7 +++-- .../filesystem/test/AzurePinotFSUtilTest.java | 6 +++-- .../pinot/plugin/filesystem/S3PinotFS.java | 15 ++--------- .../ParquetNativeRecordReaderFullTest.java | 4 ++- .../segment/local/utils/SegmentPushUtils.java | 4 +-- .../server/api/resources/TablesResource.java | 20 +++----------- .../pinot/spi/filesystem/LocalPinotFS.java | 8 ++---- .../builder/ControllerRequestURLBuilder.java | 26 ++++++------------- 17 files changed, 56 insertions(+), 108 deletions(-) diff --git a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java index 31baeb5d2d44..21c086ffb71e 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java @@ -21,8 +21,6 @@ import it.unimi.dsi.fastutil.objects.ObjectLinkedOpenHashSet; import it.unimi.dsi.fastutil.objects.ObjectSet; import java.io.UnsupportedEncodingException; -import java.net.URLDecoder; -import java.net.URLEncoder; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.text.Normalizer; @@ -33,6 +31,7 @@ import javax.annotation.Nullable; import org.apache.commons.lang3.StringUtils; import org.apache.pinot.common.utils.RegexpPatternConverterUtils; +import org.apache.pinot.common.utils.URIUtils; import org.apache.pinot.spi.annotations.ScalarFunction; import org.apache.pinot.spi.utils.JsonUtils; @@ -804,24 +803,20 @@ public static int strcmp(String input1, String input2) { * * @param input plaintext string * @return url encoded string - * @throws UnsupportedEncodingException */ @ScalarFunction - public static String encodeUrl(String input) - throws UnsupportedEncodingException { - return URLEncoder.encode(input, StandardCharsets.UTF_8.toString()); + public static String encodeUrl(String input) { + return URIUtils.encode(input); } /** * * @param input url encoded string * @return plaintext string - * @throws UnsupportedEncodingException */ @ScalarFunction - public static String decodeUrl(String input) - throws UnsupportedEncodingException { - return URLDecoder.decode(input, StandardCharsets.UTF_8.toString()); + public static String decodeUrl(String input) { + return URIUtils.decode(input); } /** diff --git a/pinot-common/src/main/java/org/apache/pinot/common/utils/URIUtils.java b/pinot-common/src/main/java/org/apache/pinot/common/utils/URIUtils.java index 5aff60b07fe2..a857981f2a2d 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/utils/URIUtils.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/utils/URIUtils.java @@ -23,6 +23,7 @@ import java.net.URISyntaxException; import java.net.URLDecoder; import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; import java.util.Map; import java.util.StringJoiner; import org.apache.http.client.utils.URIBuilder; @@ -91,21 +92,11 @@ public static String constructDownloadUrl(String baseUrl, String rawTableName, S } public static String encode(String string) { - try { - return URLEncoder.encode(string, "UTF-8"); - } catch (Exception e) { - // Should never happen - throw new RuntimeException(e); - } + return URLEncoder.encode(string, StandardCharsets.UTF_8); } public static String decode(String string) { - try { - return URLDecoder.decode(string, "UTF-8"); - } catch (Exception e) { - // Should never happen - throw new RuntimeException(e); - } + return URLDecoder.decode(string, StandardCharsets.UTF_8); } /** diff --git a/pinot-common/src/test/java/org/apache/pinot/common/segment/generation/SegmentGenerationUtilsTest.java b/pinot-common/src/test/java/org/apache/pinot/common/segment/generation/SegmentGenerationUtilsTest.java index 983d8ededd33..d33c046207b1 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/segment/generation/SegmentGenerationUtilsTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/segment/generation/SegmentGenerationUtilsTest.java @@ -22,13 +22,12 @@ import com.google.common.collect.Lists; import java.io.File; import java.io.IOException; -import java.io.UnsupportedEncodingException; import java.net.URI; import java.net.URISyntaxException; -import java.net.URLEncoder; import java.nio.file.Files; import java.util.List; import org.apache.commons.io.FileUtils; +import org.apache.pinot.common.utils.URIUtils; import org.apache.pinot.spi.filesystem.PinotFS; import org.apache.pinot.spi.filesystem.PinotFSFactory; import org.testng.Assert; @@ -73,7 +72,7 @@ public void testRelativeURIs() // Invalid segment tar name with space @Test public void testInvalidRelativeURIs() - throws URISyntaxException, UnsupportedEncodingException { + throws URISyntaxException { URI inputDirURI = new URI("hdfs://namenode1:9999/path/to/"); URI inputFileURI = new URI("hdfs://namenode1:9999/path/to/subdir/file"); URI outputDirURI = new URI("hdfs://namenode2/output/dir/"); @@ -85,8 +84,7 @@ public void testInvalidRelativeURIs() Assert.assertTrue(e instanceof URISyntaxException); } URI outputSegmentTarURI = SegmentGenerationUtils.getRelativeOutputPath(inputDirURI, inputFileURI, outputDirURI) - .resolve(new URI( - URLEncoder.encode("table_OFFLINE_2021-02-01_09:39:00.000_2021-02-01_11:59:00.000_2.tar.gz", "UTF-8"))); + .resolve(new URI(URIUtils.encode("table_OFFLINE_2021-02-01_09:39:00.000_2021-02-01_11:59:00.000_2.tar.gz"))); Assert.assertEquals(outputSegmentTarURI.toString(), "hdfs://namenode2/output/dir/subdir/table_OFFLINE_2021-02-01_09%3A39%3A00.000_2021-02-01_11%3A59%3A00.000_2" + ".tar.gz"); diff --git a/pinot-integration-test-base/src/test/java/org/apache/pinot/integration/tests/ClusterTest.java b/pinot-integration-test-base/src/test/java/org/apache/pinot/integration/tests/ClusterTest.java index 278f378b1ab7..49edf86264ed 100644 --- a/pinot-integration-test-base/src/test/java/org/apache/pinot/integration/tests/ClusterTest.java +++ b/pinot-integration-test-base/src/test/java/org/apache/pinot/integration/tests/ClusterTest.java @@ -25,8 +25,6 @@ import java.io.File; import java.io.IOException; import java.net.URI; -import java.net.URLEncoder; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -56,6 +54,7 @@ import org.apache.pinot.broker.broker.helix.HelixBrokerStarter; import org.apache.pinot.common.exception.HttpErrorStatusException; import org.apache.pinot.common.utils.FileUploadDownloadClient; +import org.apache.pinot.common.utils.URIUtils; import org.apache.pinot.common.utils.http.HttpClient; import org.apache.pinot.controller.helix.ControllerTest; import org.apache.pinot.minion.BaseMinionStarter; @@ -456,8 +455,9 @@ private int uploadSegmentWithOnlyMetadata(String tableName, TableType tableType, FileUploadDownloadClient fileUploadDownloadClient, File segmentTarFile) throws IOException, HttpErrorStatusException { List

headers = ImmutableList.of(new BasicHeader(FileUploadDownloadClient.CustomHeaders.DOWNLOAD_URI, - "file://" + segmentTarFile.getParentFile().getAbsolutePath() + "/" + URLEncoder.encode(segmentTarFile.getName(), - StandardCharsets.UTF_8.toString())), new BasicHeader(FileUploadDownloadClient.CustomHeaders.UPLOAD_TYPE, + "file://" + segmentTarFile.getParentFile().getAbsolutePath() + "/" + + URIUtils.encode(segmentTarFile.getName())), + new BasicHeader(FileUploadDownloadClient.CustomHeaders.UPLOAD_TYPE, FileUploadDownloadClient.FileUploadType.METADATA.toString())); // Add table name and table type as request parameters NameValuePair tableNameValuePair = diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/HybridClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/HybridClusterIntegrationTest.java index 73d0dcb9bdfc..f66c194d8538 100644 --- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/HybridClusterIntegrationTest.java +++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/HybridClusterIntegrationTest.java @@ -20,7 +20,6 @@ import com.fasterxml.jackson.databind.JsonNode; import java.io.File; -import java.net.URLEncoder; import java.util.Collections; import java.util.List; import java.util.Map; @@ -28,6 +27,7 @@ import org.apache.helix.model.ExternalView; import org.apache.helix.model.IdealState; import org.apache.pinot.broker.broker.helix.BaseBrokerStarter; +import org.apache.pinot.common.utils.URIUtils; import org.apache.pinot.common.utils.config.TagNameUtils; import org.apache.pinot.controller.ControllerConf; import org.apache.pinot.spi.config.table.TableConfig; @@ -269,9 +269,9 @@ public void testBrokerDebugRoutingTableSQL(boolean useMultiStageQueryEngine) String offlineTableName = TableNameBuilder.OFFLINE.tableNameWithType(tableName); String realtimeTableName = TableNameBuilder.REALTIME.tableNameWithType(tableName); String encodedSQL; - encodedSQL = URLEncoder.encode("select * from " + realtimeTableName, "UTF-8"); + encodedSQL = URIUtils.encode("select * from " + realtimeTableName); Assert.assertNotNull(getDebugInfo("debug/routingTable/sql?query=" + encodedSQL)); - encodedSQL = URLEncoder.encode("select * from " + offlineTableName, "UTF-8"); + encodedSQL = URIUtils.encode("select * from " + offlineTableName); Assert.assertNotNull(getDebugInfo("debug/routingTable/sql?query=" + encodedSQL)); } diff --git a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-hadoop/src/main/java/org/apache/pinot/plugin/ingestion/batch/hadoop/HadoopSegmentCreationMapper.java b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-hadoop/src/main/java/org/apache/pinot/plugin/ingestion/batch/hadoop/HadoopSegmentCreationMapper.java index 778276434006..3efad6b05d81 100644 --- a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-hadoop/src/main/java/org/apache/pinot/plugin/ingestion/batch/hadoop/HadoopSegmentCreationMapper.java +++ b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-hadoop/src/main/java/org/apache/pinot/plugin/ingestion/batch/hadoop/HadoopSegmentCreationMapper.java @@ -22,7 +22,6 @@ import java.io.File; import java.io.IOException; import java.net.URI; -import java.net.URLEncoder; import java.nio.file.Files; import java.util.List; import java.util.UUID; @@ -34,6 +33,7 @@ import org.apache.hadoop.mapreduce.Mapper; import org.apache.pinot.common.segment.generation.SegmentGenerationUtils; import org.apache.pinot.common.utils.TarGzCompressionUtils; +import org.apache.pinot.common.utils.URIUtils; import org.apache.pinot.plugin.ingestion.batch.common.SegmentGenerationJobUtils; import org.apache.pinot.plugin.ingestion.batch.common.SegmentGenerationTaskRunner; import org.apache.pinot.spi.env.PinotConfiguration; @@ -175,7 +175,7 @@ protected void map(LongWritable key, Text value, Context context) { // Tar segment directory to compress file File localSegmentDir = new File(localOutputTempDir, segmentName); - String segmentTarFileName = URLEncoder.encode(segmentName + Constants.TAR_GZ_FILE_EXT, "UTF-8"); + String segmentTarFileName = URIUtils.encode(segmentName + Constants.TAR_GZ_FILE_EXT); File localSegmentTarFile = new File(localOutputTempDir, segmentTarFileName); LOGGER.info("Tarring segment from: {} to: {}", localSegmentDir, localSegmentTarFile); TarGzCompressionUtils.createTarGzFile(localSegmentDir, localSegmentTarFile); @@ -190,7 +190,7 @@ protected void map(LongWritable key, Text value, Context context) { _spec.isOverwriteOutput()); // Create and upload segment metadata tar file - String metadataTarFileName = URLEncoder.encode(segmentName + Constants.METADATA_TAR_GZ_FILE_EXT, "UTF-8"); + String metadataTarFileName = URIUtils.encode(segmentName + Constants.METADATA_TAR_GZ_FILE_EXT); URI outputMetadataTarURI = relativeOutputPath.resolve(metadataTarFileName); if (outputDirFS.exists(outputMetadataTarURI) && (_spec.isOverwriteOutput() || !_spec.isCreateMetadataTarGz())) { LOGGER.info("Deleting existing metadata tar gz file: {}", outputMetadataTarURI); diff --git a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-2.4/src/main/java/org/apache/pinot/plugin/ingestion/batch/spark/SparkSegmentGenerationJobRunner.java b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-2.4/src/main/java/org/apache/pinot/plugin/ingestion/batch/spark/SparkSegmentGenerationJobRunner.java index 204884ab8d37..6ae7ff97ab23 100644 --- a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-2.4/src/main/java/org/apache/pinot/plugin/ingestion/batch/spark/SparkSegmentGenerationJobRunner.java +++ b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-2.4/src/main/java/org/apache/pinot/plugin/ingestion/batch/spark/SparkSegmentGenerationJobRunner.java @@ -22,7 +22,6 @@ import java.io.IOException; import java.io.Serializable; import java.net.URI; -import java.net.URLEncoder; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; @@ -34,6 +33,7 @@ import org.apache.commons.io.FileUtils; import org.apache.pinot.common.segment.generation.SegmentGenerationUtils; import org.apache.pinot.common.utils.TarGzCompressionUtils; +import org.apache.pinot.common.utils.URIUtils; import org.apache.pinot.plugin.ingestion.batch.common.SegmentGenerationJobUtils; import org.apache.pinot.plugin.ingestion.batch.common.SegmentGenerationTaskRunner; import org.apache.pinot.spi.env.PinotConfiguration; @@ -283,7 +283,7 @@ public void call(String pathAndIdx) // Tar segment directory to compress file File localSegmentDir = new File(localOutputTempDir, segmentName); - String segmentTarFileName = URLEncoder.encode(segmentName + Constants.TAR_GZ_FILE_EXT, "UTF-8"); + String segmentTarFileName = URIUtils.encode(segmentName + Constants.TAR_GZ_FILE_EXT); File localSegmentTarFile = new File(localOutputTempDir, segmentTarFileName); LOGGER.info("Tarring segment from: {} to: {}", localSegmentDir, localSegmentTarFile); TarGzCompressionUtils.createTarGzFile(localSegmentDir, localSegmentTarFile); @@ -299,7 +299,7 @@ public void call(String pathAndIdx) _spec.isOverwriteOutput()); // Create and upload segment metadata tar file - String metadataTarFileName = URLEncoder.encode(segmentName + Constants.METADATA_TAR_GZ_FILE_EXT, "UTF-8"); + String metadataTarFileName = URIUtils.encode(segmentName + Constants.METADATA_TAR_GZ_FILE_EXT); URI outputMetadataTarURI = relativeOutputPath.resolve(metadataTarFileName); if (finalOutputDirFS.exists(outputMetadataTarURI) && (_spec.isOverwriteOutput() diff --git a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-3/src/main/java/org/apache/pinot/plugin/ingestion/batch/spark3/SparkSegmentGenerationJobRunner.java b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-3/src/main/java/org/apache/pinot/plugin/ingestion/batch/spark3/SparkSegmentGenerationJobRunner.java index d595da66b5a6..ef1f6cea5d9a 100644 --- a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-3/src/main/java/org/apache/pinot/plugin/ingestion/batch/spark3/SparkSegmentGenerationJobRunner.java +++ b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-spark-3/src/main/java/org/apache/pinot/plugin/ingestion/batch/spark3/SparkSegmentGenerationJobRunner.java @@ -22,7 +22,6 @@ import java.io.IOException; import java.io.Serializable; import java.net.URI; -import java.net.URLEncoder; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; @@ -34,6 +33,7 @@ import org.apache.commons.io.FileUtils; import org.apache.pinot.common.segment.generation.SegmentGenerationUtils; import org.apache.pinot.common.utils.TarGzCompressionUtils; +import org.apache.pinot.common.utils.URIUtils; import org.apache.pinot.plugin.ingestion.batch.common.SegmentGenerationJobUtils; import org.apache.pinot.plugin.ingestion.batch.common.SegmentGenerationTaskRunner; import org.apache.pinot.spi.env.PinotConfiguration; @@ -281,7 +281,7 @@ public void call(String pathAndIdx) // Tar segment directory to compress file File localSegmentDir = new File(localOutputTempDir, segmentName); - String segmentTarFileName = URLEncoder.encode(segmentName + Constants.TAR_GZ_FILE_EXT, "UTF-8"); + String segmentTarFileName = URIUtils.encode(segmentName + Constants.TAR_GZ_FILE_EXT); File localSegmentTarFile = new File(localOutputTempDir, segmentTarFileName); LOGGER.info("Tarring segment from: {} to: {}", localSegmentDir, localSegmentTarFile); TarGzCompressionUtils.createTarGzFile(localSegmentDir, localSegmentTarFile); @@ -297,7 +297,7 @@ public void call(String pathAndIdx) _spec.isOverwriteOutput()); // Create and upload segment metadata tar file - String metadataTarFileName = URLEncoder.encode(segmentName + Constants.METADATA_TAR_GZ_FILE_EXT, "UTF-8"); + String metadataTarFileName = URIUtils.encode(segmentName + Constants.METADATA_TAR_GZ_FILE_EXT); URI outputMetadataTarURI = relativeOutputPath.resolve(metadataTarFileName); if (finalOutputDirFS.exists(outputMetadataTarURI) && (_spec.isOverwriteOutput() || !_spec.isCreateMetadataTarGz())) { diff --git a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/main/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunner.java b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/main/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunner.java index 974316607768..a85247fc3d9b 100644 --- a/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/main/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunner.java +++ b/pinot-plugins/pinot-batch-ingestion/pinot-batch-ingestion-standalone/src/main/java/org/apache/pinot/plugin/ingestion/batch/standalone/SegmentGenerationJobRunner.java @@ -22,7 +22,6 @@ import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; -import java.net.URLEncoder; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collections; @@ -37,6 +36,7 @@ import org.apache.commons.io.FileUtils; import org.apache.pinot.common.segment.generation.SegmentGenerationUtils; import org.apache.pinot.common.utils.TarGzCompressionUtils; +import org.apache.pinot.common.utils.URIUtils; import org.apache.pinot.plugin.ingestion.batch.common.SegmentGenerationJobUtils; import org.apache.pinot.plugin.ingestion.batch.common.SegmentGenerationTaskRunner; import org.apache.pinot.segment.local.utils.ConsistentDataPushUtils; @@ -265,7 +265,7 @@ private void submitSegmentGenTask(File localTempDir, URI inputFileURI, int seqId String segmentName = taskRunner.run(); // Tar segment directory to compress file localSegmentDir = new File(localOutputTempDir, segmentName); - String segmentTarFileName = URLEncoder.encode(segmentName + Constants.TAR_GZ_FILE_EXT, "UTF-8"); + String segmentTarFileName = URIUtils.encode(segmentName + Constants.TAR_GZ_FILE_EXT); localSegmentTarFile = new File(localOutputTempDir, segmentTarFileName); LOGGER.info("Tarring segment from: {} to: {}", localSegmentDir, localSegmentTarFile); TarGzCompressionUtils.createTarGzFile(localSegmentDir, localSegmentTarFile); diff --git a/pinot-plugins/pinot-file-system/pinot-adls/src/main/java/org/apache/pinot/plugin/filesystem/AzurePinotFSUtil.java b/pinot-plugins/pinot-file-system/pinot-adls/src/main/java/org/apache/pinot/plugin/filesystem/AzurePinotFSUtil.java index 282b4346d6d6..c9665e7093b6 100644 --- a/pinot-plugins/pinot-file-system/pinot-adls/src/main/java/org/apache/pinot/plugin/filesystem/AzurePinotFSUtil.java +++ b/pinot-plugins/pinot-file-system/pinot-adls/src/main/java/org/apache/pinot/plugin/filesystem/AzurePinotFSUtil.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.net.URI; import java.net.URLDecoder; +import java.nio.charset.StandardCharsets; /** @@ -41,12 +42,10 @@ private AzurePinotFSUtil() { * * @param uri a uri path * @return path in Azure Data Lake Gen2 format - * @throws IOException */ - public static String convertUriToAzureStylePath(URI uri) - throws IOException { + public static String convertUriToAzureStylePath(URI uri) { // Pinot side code uses `URLEncoder` when building uri - String path = URLDecoder.decode(uri.getRawPath(), "UTF-8"); + String path = URLDecoder.decode(uri.getRawPath(), StandardCharsets.UTF_8); if (path.startsWith(DIRECTORY_DELIMITER)) { path = path.substring(1); } diff --git a/pinot-plugins/pinot-file-system/pinot-adls/src/test/java/org/apache/pinot/plugin/filesystem/test/AzurePinotFSUtilTest.java b/pinot-plugins/pinot-file-system/pinot-adls/src/test/java/org/apache/pinot/plugin/filesystem/test/AzurePinotFSUtilTest.java index 80e68a659952..057a48147096 100644 --- a/pinot-plugins/pinot-file-system/pinot-adls/src/test/java/org/apache/pinot/plugin/filesystem/test/AzurePinotFSUtilTest.java +++ b/pinot-plugins/pinot-file-system/pinot-adls/src/test/java/org/apache/pinot/plugin/filesystem/test/AzurePinotFSUtilTest.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.net.URI; import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; import org.apache.pinot.plugin.filesystem.AzurePinotFSUtil; import org.testng.Assert; import org.testng.annotations.Test; @@ -67,11 +68,12 @@ public void testUriToAzureStylePath(String tableName, String segmentName, boolea throws Exception { // "/encode(dir)/encode(segment)" String expectedPath = String.join(File.separator, tableName, segmentName); - URI uri = createUri(URLEncoder.encode(tableName, "UTF-8"), URLEncoder.encode(segmentName, "UTF-8")); + URI uri = createUri(URLEncoder.encode(tableName, StandardCharsets.UTF_8), URLEncoder.encode(segmentName, + StandardCharsets.UTF_8)); checkUri(uri, expectedPath, urlEncoded); // "/encode(dir/segment)" - uri = createUri(URLEncoder.encode(String.join(File.separator, tableName, segmentName), "UTF-8")); + uri = createUri(URLEncoder.encode(String.join(File.separator, tableName, segmentName), StandardCharsets.UTF_8)); checkUri(uri, expectedPath, urlEncoded); // "/encode(dir/segment)" diff --git a/pinot-plugins/pinot-file-system/pinot-s3/src/main/java/org/apache/pinot/plugin/filesystem/S3PinotFS.java b/pinot-plugins/pinot-file-system/pinot-s3/src/main/java/org/apache/pinot/plugin/filesystem/S3PinotFS.java index e3a7daa55eef..4fc84f3541c2 100644 --- a/pinot-plugins/pinot-file-system/pinot-s3/src/main/java/org/apache/pinot/plugin/filesystem/S3PinotFS.java +++ b/pinot-plugins/pinot-file-system/pinot-s3/src/main/java/org/apache/pinot/plugin/filesystem/S3PinotFS.java @@ -26,7 +26,6 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; -import java.io.UnsupportedEncodingException; import java.net.URI; import java.net.URISyntaxException; import java.net.URLEncoder; @@ -325,12 +324,7 @@ private boolean isEmptyDirectory(URI uri) private boolean copyFile(URI srcUri, URI dstUri) throws IOException { try { - String encodedUrl = null; - try { - encodedUrl = URLEncoder.encode(srcUri.getHost() + srcUri.getPath(), StandardCharsets.UTF_8.toString()); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e); - } + String encodedUrl = URLEncoder.encode(srcUri.getHost() + srcUri.getPath(), StandardCharsets.UTF_8); String dstPath = sanitizePath(dstUri.getPath()); CopyObjectRequest copyReq = generateCopyObjectRequest(encodedUrl, dstUri, dstPath, null); @@ -674,12 +668,7 @@ public boolean touch(URI uri) throws IOException { try { HeadObjectResponse s3ObjectMetadata = getS3ObjectMetadata(uri); - String encodedUrl = null; - try { - encodedUrl = URLEncoder.encode(uri.getHost() + uri.getPath(), StandardCharsets.UTF_8.toString()); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e); - } + String encodedUrl = URLEncoder.encode(uri.getHost() + uri.getPath(), StandardCharsets.UTF_8); String path = sanitizePath(uri.getPath()); CopyObjectRequest request = generateCopyObjectRequest(encodedUrl, uri, path, diff --git a/pinot-plugins/pinot-input-format/pinot-parquet/src/test/java/org/apache/pinot/plugin/inputformat/parquet/ParquetNativeRecordReaderFullTest.java b/pinot-plugins/pinot-input-format/pinot-parquet/src/test/java/org/apache/pinot/plugin/inputformat/parquet/ParquetNativeRecordReaderFullTest.java index f0438a050835..6e84b19c9a12 100644 --- a/pinot-plugins/pinot-input-format/pinot-parquet/src/test/java/org/apache/pinot/plugin/inputformat/parquet/ParquetNativeRecordReaderFullTest.java +++ b/pinot-plugins/pinot-input-format/pinot-parquet/src/test/java/org/apache/pinot/plugin/inputformat/parquet/ParquetNativeRecordReaderFullTest.java @@ -20,6 +20,7 @@ import java.io.File; import java.net.URLDecoder; +import java.nio.charset.StandardCharsets; import org.apache.commons.io.FileUtils; import org.testng.annotations.Test; @@ -125,7 +126,8 @@ protected void testReadDataSet2() protected void testParquetFile(String filePath) throws Exception { - File dataFile = new File(URLDecoder.decode(getClass().getClassLoader().getResource(filePath).getFile(), "UTF-8")); + File dataFile = new File(URLDecoder.decode(getClass().getClassLoader().getResource(filePath).getFile(), + StandardCharsets.UTF_8)); ParquetNativeRecordReader recordReader = new ParquetNativeRecordReader(); recordReader.init(dataFile, null, null); while (recordReader.hasNext()) { diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SegmentPushUtils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SegmentPushUtils.java index 0a6be1905b22..6ca93f24918e 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SegmentPushUtils.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SegmentPushUtils.java @@ -24,7 +24,6 @@ import java.io.Serializable; import java.net.URI; import java.net.URISyntaxException; -import java.net.URLDecoder; import java.nio.file.FileSystems; import java.nio.file.PathMatcher; import java.nio.file.Paths; @@ -44,6 +43,7 @@ import org.apache.pinot.common.utils.FileUploadDownloadClient; import org.apache.pinot.common.utils.SimpleHttpResponse; import org.apache.pinot.common.utils.TarGzCompressionUtils; +import org.apache.pinot.common.utils.URIUtils; import org.apache.pinot.common.utils.http.HttpClient; import org.apache.pinot.segment.spi.V1Constants; import org.apache.pinot.segment.spi.creator.name.SegmentNameUtils; @@ -407,7 +407,7 @@ public static File generateSegmentMetadataFile(PinotFS fileSystem, URI tarFileUR try { if (fileSystem instanceof LocalPinotFS) { // For local file system, we don't need to copy the tar file. - tarFile = new File(URLDecoder.decode(tarFileURI.getRawPath(), "UTF-8")); + tarFile = new File(URIUtils.decode(tarFileURI.getRawPath())); } else { // For other file systems, we need to download the file to local file system fileSystem.copyToLocalFile(tarFileURI, tarFile); diff --git a/pinot-server/src/main/java/org/apache/pinot/server/api/resources/TablesResource.java b/pinot-server/src/main/java/org/apache/pinot/server/api/resources/TablesResource.java index 1baa52ffa4c9..1c35dc21ece9 100644 --- a/pinot-server/src/main/java/org/apache/pinot/server/api/resources/TablesResource.java +++ b/pinot-server/src/main/java/org/apache/pinot/server/api/resources/TablesResource.java @@ -29,9 +29,7 @@ import io.swagger.annotations.SecurityDefinition; import io.swagger.annotations.SwaggerDefinition; import java.io.File; -import java.io.UnsupportedEncodingException; import java.net.URI; -import java.net.URLDecoder; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.util.ArrayList; @@ -207,11 +205,7 @@ public String getSegmentMetadata( List decodedColumns = new ArrayList<>(columns.size()); for (String column : columns) { - try { - decodedColumns.add(URLDecoder.decode(column, StandardCharsets.UTF_8.name())); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e.getCause()); - } + decodedColumns.add(URIUtils.decode(column)); } boolean allColumns = false; @@ -380,19 +374,11 @@ public String getSegmentMetadata( List columns, @Context HttpHeaders headers) { tableName = DatabaseUtils.translateTableName(tableName, headers); for (int i = 0; i < columns.size(); i++) { - try { - columns.set(i, URLDecoder.decode(columns.get(i), StandardCharsets.UTF_8.name())); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e.getCause()); - } + columns.set(i, URIUtils.decode(columns.get(i))); } TableDataManager tableDataManager = ServerResourceUtils.checkGetTableDataManager(_serverInstance, tableName); - try { - segmentName = URLDecoder.decode(segmentName, StandardCharsets.UTF_8.name()); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e.getCause()); - } + segmentName = URIUtils.decode(segmentName); SegmentDataManager segmentDataManager = tableDataManager.acquireSegment(segmentName); if (segmentDataManager == null) { throw new WebApplicationException(String.format("Table %s segments %s does not exist", tableName, segmentName), diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/filesystem/LocalPinotFS.java b/pinot-spi/src/main/java/org/apache/pinot/spi/filesystem/LocalPinotFS.java index 7fd8ca5906ea..b82490b5e028 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/filesystem/LocalPinotFS.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/filesystem/LocalPinotFS.java @@ -23,9 +23,9 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; -import java.io.UnsupportedEncodingException; import java.net.URI; import java.net.URLDecoder; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -188,11 +188,7 @@ public InputStream open(URI uri) private static File toFile(URI uri) { // NOTE: Do not use new File(uri) because scheme might not exist and it does not decode '+' to ' ' // Do not use uri.getPath() because it does not decode '+' to ' ' - try { - return new File(URLDecoder.decode(uri.getRawPath(), "UTF-8")); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e); - } + return new File(URLDecoder.decode(uri.getRawPath(), StandardCharsets.UTF_8)); } private static void copy(File srcFile, File dstFile, boolean recursive) diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/builder/ControllerRequestURLBuilder.java b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/builder/ControllerRequestURLBuilder.java index 3b22e04941f1..f4133fee59d2 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/builder/ControllerRequestURLBuilder.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/builder/ControllerRequestURLBuilder.java @@ -18,7 +18,6 @@ */ package org.apache.pinot.spi.utils.builder; -import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; import java.util.List; @@ -488,31 +487,27 @@ public String forInstanceReplace(String tableName, @Nullable InstancePartitionsT return url; } - public String forIngestFromFile(String tableNameWithType, String batchConfigMapStr) - throws UnsupportedEncodingException { + public String forIngestFromFile(String tableNameWithType, String batchConfigMapStr) { return String.format("%s?tableNameWithType=%s&batchConfigMapStr=%s", StringUtil.join("/", _baseUrl, "ingestFromFile"), tableNameWithType, - URLEncoder.encode(batchConfigMapStr, StandardCharsets.UTF_8.toString())); + URLEncoder.encode(batchConfigMapStr, StandardCharsets.UTF_8)); } - public String forIngestFromFile(String tableNameWithType, Map batchConfigMap) - throws UnsupportedEncodingException { + public String forIngestFromFile(String tableNameWithType, Map batchConfigMap) { String batchConfigMapStr = batchConfigMap.entrySet().stream().map(e -> String.format("\"%s\":\"%s\"", e.getKey(), e.getValue())) .collect(Collectors.joining(",", "{", "}")); return forIngestFromFile(tableNameWithType, batchConfigMapStr); } - public String forIngestFromURI(String tableNameWithType, String batchConfigMapStr, String sourceURIStr) - throws UnsupportedEncodingException { + public String forIngestFromURI(String tableNameWithType, String batchConfigMapStr, String sourceURIStr) { return String.format("%s?tableNameWithType=%s&batchConfigMapStr=%s&sourceURIStr=%s", StringUtil.join("/", _baseUrl, "ingestFromURI"), tableNameWithType, - URLEncoder.encode(batchConfigMapStr, StandardCharsets.UTF_8.toString()), - URLEncoder.encode(sourceURIStr, StandardCharsets.UTF_8.toString())); + URLEncoder.encode(batchConfigMapStr, StandardCharsets.UTF_8), + URLEncoder.encode(sourceURIStr, StandardCharsets.UTF_8)); } - public String forIngestFromURI(String tableNameWithType, Map batchConfigMap, String sourceURIStr) - throws UnsupportedEncodingException { + public String forIngestFromURI(String tableNameWithType, Map batchConfigMap, String sourceURIStr) { String batchConfigMapStr = batchConfigMap.entrySet().stream().map(e -> String.format("\"%s\":\"%s\"", e.getKey(), e.getValue())) .collect(Collectors.joining(",", "{", "}")); @@ -573,12 +568,7 @@ public String forUpdateTagsValidation() { } private static String encode(String s) { - try { - return URLEncoder.encode(s, "UTF-8"); - } catch (Exception e) { - // Should never happen - throw new RuntimeException(e); - } + return URLEncoder.encode(s, StandardCharsets.UTF_8); } public String forSegmentUpload() { From ad7068619a0c1c7152a707f4cb59fd8dbff2b06d Mon Sep 17 00:00:00 2001 From: David Cromberge Date: Wed, 1 May 2024 20:04:31 +0100 Subject: [PATCH 55/58] Enhancement: Sketch value aggregator performance (#13020) --- .../DistinctCountCPCSketchStarTreeV2Test.java | 24 +++- ...ntIntegerSumTupleSketchStarTreeV2Test.java | 25 +++- ...istinctCountThetaSketchStarTreeV2Test.java | 23 ++- ...DistinctCountCPCSketchValueAggregator.java | 88 +++++++----- ...stinctCountThetaSketchValueAggregator.java | 133 +++++++++--------- .../IntegerTupleSketchValueAggregator.java | 90 ++++++++---- ...inctCountCPCSketchValueAggregatorTest.java | 64 +++++---- ...ctCountThetaSketchValueAggregatorTest.java | 89 +++++++----- ...IntegerTupleSketchValueAggregatorTest.java | 27 ++-- .../pinot/spi/utils/CommonConstants.java | 4 +- 10 files changed, 353 insertions(+), 214 deletions(-) diff --git a/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/DistinctCountCPCSketchStarTreeV2Test.java b/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/DistinctCountCPCSketchStarTreeV2Test.java index 3732d3553b57..c7129a71f21b 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/DistinctCountCPCSketchStarTreeV2Test.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/DistinctCountCPCSketchStarTreeV2Test.java @@ -21,6 +21,7 @@ import java.util.Collections; import java.util.Random; import org.apache.datasketches.cpc.CpcSketch; +import org.apache.datasketches.cpc.CpcUnion; import org.apache.pinot.segment.local.aggregator.DistinctCountCPCSketchValueAggregator; import org.apache.pinot.segment.local.aggregator.ValueAggregator; import org.apache.pinot.spi.data.FieldSpec.DataType; @@ -28,10 +29,10 @@ import static org.testng.Assert.assertEquals; -public class DistinctCountCPCSketchStarTreeV2Test extends BaseStarTreeV2Test { +public class DistinctCountCPCSketchStarTreeV2Test extends BaseStarTreeV2Test { @Override - ValueAggregator getValueAggregator() { + ValueAggregator getValueAggregator() { return new DistinctCountCPCSketchValueAggregator(Collections.emptyList()); } @@ -46,7 +47,22 @@ Object getRandomRawValue(Random random) { } @Override - void assertAggregatedValue(CpcSketch starTreeResult, CpcSketch nonStarTreeResult) { - assertEquals((long) starTreeResult.getEstimate(), (long) nonStarTreeResult.getEstimate()); + void assertAggregatedValue(Object starTreeResult, Object nonStarTreeResult) { + // Use error at (lgK=12, stddev=2) from: + // https://datasketches.apache.org/docs/CPC/CpcPerformance.html + double delta = (1 << 12) * 0.01; + assertEquals((long) toSketch(starTreeResult).getEstimate(), (long) toSketch(nonStarTreeResult).getEstimate(), + delta); + } + + private CpcSketch toSketch(Object value) { + if (value instanceof CpcUnion) { + return ((CpcUnion) value).getResult(); + } else if (value instanceof CpcSketch) { + return (CpcSketch) value; + } else { + throw new IllegalStateException( + "Unsupported data type for CPC Sketch aggregation: " + value.getClass().getSimpleName()); + } } } diff --git a/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/DistinctCountIntegerSumTupleSketchStarTreeV2Test.java b/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/DistinctCountIntegerSumTupleSketchStarTreeV2Test.java index b9c52bf95875..d10efb94595a 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/DistinctCountIntegerSumTupleSketchStarTreeV2Test.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/DistinctCountIntegerSumTupleSketchStarTreeV2Test.java @@ -20,6 +20,7 @@ import java.util.Random; import org.apache.datasketches.tuple.Sketch; +import org.apache.datasketches.tuple.Union; import org.apache.datasketches.tuple.aninteger.IntegerSketch; import org.apache.datasketches.tuple.aninteger.IntegerSummary; import org.apache.pinot.core.common.ObjectSerDeUtils; @@ -30,11 +31,10 @@ import static org.testng.Assert.assertEquals; -public class DistinctCountIntegerSumTupleSketchStarTreeV2Test - extends BaseStarTreeV2Test> { +public class DistinctCountIntegerSumTupleSketchStarTreeV2Test extends BaseStarTreeV2Test { @Override - ValueAggregator> getValueAggregator() { + ValueAggregator getValueAggregator() { return new IntegerTupleSketchValueAggregator(IntegerSummary.Mode.Sum); } @@ -51,7 +51,22 @@ byte[] getRandomRawValue(Random random) { } @Override - void assertAggregatedValue(Sketch starTreeResult, Sketch nonStarTreeResult) { - assertEquals(starTreeResult.getEstimate(), nonStarTreeResult.getEstimate()); + void assertAggregatedValue(Object starTreeResult, Object nonStarTreeResult) { + // Use error at (lgK=14, stddev=2) from: + // https://datasketches.apache.org/docs/Theta/ThetaErrorTable.html + double delta = (1 << 14) * 0.01563; + assertEquals(toSketch(starTreeResult).getEstimate(), toSketch(nonStarTreeResult).getEstimate(), delta); + } + + @SuppressWarnings("unchecked") + private Sketch toSketch(Object value) { + if (value instanceof Union) { + return ((Union) value).getResult(); + } else if (value instanceof Sketch) { + return ((Sketch) value); + } else { + throw new IllegalStateException( + "Unsupported data type for Integer Tuple Sketch aggregation: " + value.getClass().getSimpleName()); + } } } diff --git a/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/DistinctCountThetaSketchStarTreeV2Test.java b/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/DistinctCountThetaSketchStarTreeV2Test.java index 4e924c9d0c65..9fd34dc8c075 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/DistinctCountThetaSketchStarTreeV2Test.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/DistinctCountThetaSketchStarTreeV2Test.java @@ -20,6 +20,7 @@ import java.util.Random; import org.apache.datasketches.theta.Sketch; +import org.apache.datasketches.theta.Union; import org.apache.pinot.segment.local.aggregator.DistinctCountThetaSketchValueAggregator; import org.apache.pinot.segment.local.aggregator.ValueAggregator; import org.apache.pinot.spi.data.FieldSpec.DataType; @@ -27,10 +28,10 @@ import static org.testng.Assert.assertEquals; -public class DistinctCountThetaSketchStarTreeV2Test extends BaseStarTreeV2Test { +public class DistinctCountThetaSketchStarTreeV2Test extends BaseStarTreeV2Test { @Override - ValueAggregator getValueAggregator() { + ValueAggregator getValueAggregator() { return new DistinctCountThetaSketchValueAggregator(); } @@ -45,7 +46,21 @@ Object getRandomRawValue(Random random) { } @Override - void assertAggregatedValue(Sketch starTreeResult, Sketch nonStarTreeResult) { - assertEquals(starTreeResult.getEstimate(), nonStarTreeResult.getEstimate()); + void assertAggregatedValue(Object starTreeResult, Object nonStarTreeResult) { + // Use error at (lgK=14, stddev=2) from: + // https://datasketches.apache.org/docs/Theta/ThetaErrorTable.html + double delta = (1 << 14) * 0.01563; + assertEquals(toSketch(starTreeResult).getEstimate(), toSketch(nonStarTreeResult).getEstimate(), delta); + } + + private Sketch toSketch(Object value) { + if (value instanceof Union) { + return ((Union) value).getResult(); + } else if (value instanceof Sketch) { + return (Sketch) value; + } else { + throw new IllegalStateException( + "Unsupported data type for Theta Sketch aggregation: " + value.getClass().getSimpleName()); + } } } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/aggregator/DistinctCountCPCSketchValueAggregator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/aggregator/DistinctCountCPCSketchValueAggregator.java index 7ac3090188da..203b900a32da 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/aggregator/DistinctCountCPCSketchValueAggregator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/aggregator/DistinctCountCPCSketchValueAggregator.java @@ -28,13 +28,11 @@ import org.apache.pinot.spi.utils.CommonConstants; -public class DistinctCountCPCSketchValueAggregator implements ValueAggregator { +public class DistinctCountCPCSketchValueAggregator implements ValueAggregator { public static final DataType AGGREGATED_VALUE_TYPE = DataType.BYTES; private final int _lgK; - private int _maxByteSize; - public DistinctCountCPCSketchValueAggregator(List arguments) { // length 1 means we use the Helix default if (arguments.size() <= 1) { @@ -55,64 +53,61 @@ public DataType getAggregatedValueType() { } @Override - public CpcSketch getInitialAggregatedValue(Object rawValue) { - CpcSketch initialValue; + public Object getInitialAggregatedValue(Object rawValue) { + CpcUnion cpcUnion = new CpcUnion(_lgK); if (rawValue instanceof byte[]) { // Serialized Sketch byte[] bytes = (byte[]) rawValue; - initialValue = deserializeAggregatedValue(bytes); - _maxByteSize = Math.max(_maxByteSize, bytes.length); + cpcUnion.update(deserializeAggregatedValue(bytes)); } else if (rawValue instanceof byte[][]) { // Multiple Serialized Sketches byte[][] serializedSketches = (byte[][]) rawValue; - CpcUnion union = new CpcUnion(_lgK); for (byte[] bytes : serializedSketches) { - union.update(deserializeAggregatedValue(bytes)); + cpcUnion.update(deserializeAggregatedValue(bytes)); } - initialValue = union.getResult(); - updateMaxByteSize(initialValue); } else { - initialValue = empty(); - addObjectToSketch(rawValue, initialValue); - updateMaxByteSize(initialValue); + CpcSketch pristineSketch = empty(); + addObjectToSketch(rawValue, pristineSketch); + cpcUnion.update(pristineSketch); } - return initialValue; + return cpcUnion; } @Override - public CpcSketch applyRawValue(CpcSketch value, Object rawValue) { + public Object applyRawValue(Object aggregatedValue, Object rawValue) { + CpcUnion cpcUnion = extractUnion(aggregatedValue); if (rawValue instanceof byte[]) { byte[] bytes = (byte[]) rawValue; - CpcSketch sketch = union(value, deserializeAggregatedValue(bytes)); - updateMaxByteSize(sketch); - return sketch; + CpcSketch sketch = deserializeAggregatedValue(bytes); + cpcUnion.update(sketch); } else { - addObjectToSketch(rawValue, value); - updateMaxByteSize(value); - return value; + CpcSketch pristineSketch = empty(); + addObjectToSketch(rawValue, pristineSketch); + cpcUnion.update(pristineSketch); } + return cpcUnion; } @Override - public CpcSketch applyAggregatedValue(CpcSketch value, CpcSketch aggregatedValue) { - CpcSketch result = union(value, aggregatedValue); - updateMaxByteSize(result); - return result; + public Object applyAggregatedValue(Object value, Object aggregatedValue) { + CpcUnion cpcUnion = extractUnion(aggregatedValue); + CpcSketch sketch = extractSketch(value); + cpcUnion.update(sketch); + return cpcUnion; } @Override - public CpcSketch cloneAggregatedValue(CpcSketch value) { + public Object cloneAggregatedValue(Object value) { return deserializeAggregatedValue(serializeAggregatedValue(value)); } @Override public int getMaxAggregatedValueByteSize() { - // NOTE: For aggregated metrics, initial aggregated value might have not been generated. Returns the byte size - // based on lgK. - return _maxByteSize > 0 ? _maxByteSize : CpcSketch.getMaxSerializedBytes(_lgK); + return CpcSketch.getMaxSerializedBytes(_lgK); } @Override - public byte[] serializeAggregatedValue(CpcSketch value) { - return CustomSerDeUtils.DATA_SKETCH_CPC_SER_DE.serialize(value); + public byte[] serializeAggregatedValue(Object value) { + CpcSketch sketch = extractSketch(value); + return CustomSerDeUtils.DATA_SKETCH_CPC_SER_DE.serialize(sketch); } @Override @@ -181,9 +176,32 @@ private void addObjectsToSketch(Object[] rawValues, CpcSketch sketch) { } } - private void updateMaxByteSize(CpcSketch sketch) { - if (sketch != null) { - _maxByteSize = Math.max(_maxByteSize, sketch.toByteArray().length); + private CpcUnion extractUnion(Object value) { + if (value == null) { + return new CpcUnion(_lgK); + } else if (value instanceof CpcUnion) { + return (CpcUnion) value; + } else if (value instanceof CpcSketch) { + CpcSketch sketch = (CpcSketch) value; + CpcUnion cpcUnion = new CpcUnion(_lgK); + cpcUnion.update(sketch); + return cpcUnion; + } else { + throw new IllegalStateException( + "Unsupported data type for CPC Sketch aggregation: " + value.getClass().getSimpleName()); + } + } + + private CpcSketch extractSketch(Object value) { + if (value == null) { + return empty(); + } else if (value instanceof CpcUnion) { + return ((CpcUnion) value).getResult(); + } else if (value instanceof CpcSketch) { + return (CpcSketch) value; + } else { + throw new IllegalStateException( + "Unsupported data type for CPC Sketch aggregation: " + value.getClass().getSimpleName()); } } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/aggregator/DistinctCountThetaSketchValueAggregator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/aggregator/DistinctCountThetaSketchValueAggregator.java index f36f9a00e936..3222265f973c 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/aggregator/DistinctCountThetaSketchValueAggregator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/aggregator/DistinctCountThetaSketchValueAggregator.java @@ -18,29 +18,26 @@ */ package org.apache.pinot.segment.local.aggregator; -import java.util.Arrays; -import java.util.stream.StreamSupport; +import org.apache.datasketches.theta.SetOperationBuilder; import org.apache.datasketches.theta.Sketch; -import org.apache.datasketches.theta.Sketches; import org.apache.datasketches.theta.Union; -import org.apache.datasketches.theta.UpdateSketch; import org.apache.pinot.segment.local.utils.CustomSerDeUtils; import org.apache.pinot.segment.spi.AggregationFunctionType; import org.apache.pinot.spi.data.FieldSpec.DataType; import org.apache.pinot.spi.utils.CommonConstants; -public class DistinctCountThetaSketchValueAggregator implements ValueAggregator { +public class DistinctCountThetaSketchValueAggregator implements ValueAggregator { public static final DataType AGGREGATED_VALUE_TYPE = DataType.BYTES; - private final Union _union; + private final SetOperationBuilder _setOperationBuilder; // This changes a lot similar to the Bitmap aggregator private int _maxByteSize; public DistinctCountThetaSketchValueAggregator() { - // TODO: Handle configurable nominal entries for StarTreeBuilder - _union = Union.builder().setNominalEntries(CommonConstants.Helix.DEFAULT_THETA_SKETCH_NOMINAL_ENTRIES).buildUnion(); + _setOperationBuilder = + Union.builder().setNominalEntries(CommonConstants.Helix.DEFAULT_THETA_SKETCH_NOMINAL_ENTRIES); } @Override @@ -53,51 +50,49 @@ public DataType getAggregatedValueType() { return AGGREGATED_VALUE_TYPE; } - // Utility method to create a theta sketch with one item in it - private Sketch singleItemSketch(Object rawValue) { - // TODO: Handle configurable nominal entries for StarTreeBuilder - UpdateSketch sketch = - Sketches.updateSketchBuilder().setNominalEntries(CommonConstants.Helix.DEFAULT_THETA_SKETCH_NOMINAL_ENTRIES) - .build(); + private void singleItemUpdate(Union thetaUnion, Object rawValue) { if (rawValue instanceof String) { - sketch.update((String) rawValue); + thetaUnion.update((String) rawValue); } else if (rawValue instanceof Integer) { - sketch.update((Integer) rawValue); + thetaUnion.update((Integer) rawValue); } else if (rawValue instanceof Long) { - sketch.update((Long) rawValue); + thetaUnion.update((Long) rawValue); } else if (rawValue instanceof Double) { - sketch.update((Double) rawValue); + thetaUnion.update((Double) rawValue); } else if (rawValue instanceof Float) { - sketch.update((Float) rawValue); + thetaUnion.update((Float) rawValue); } else if (rawValue instanceof Object[]) { - addObjectsToSketch((Object[]) rawValue, sketch); + multiItemUpdate(thetaUnion, (Object[]) rawValue); + } else if (rawValue instanceof Sketch) { + thetaUnion.union((Sketch) rawValue); + } else if (rawValue instanceof Union) { + thetaUnion.union(((Union) rawValue).getResult()); } else { throw new IllegalStateException( "Unsupported data type for Theta Sketch aggregation: " + rawValue.getClass().getSimpleName()); } - return sketch.compact(); } - private void addObjectsToSketch(Object[] rawValues, UpdateSketch updateSketch) { + private void multiItemUpdate(Union thetaUnion, Object[] rawValues) { if (rawValues instanceof String[]) { for (String s : (String[]) rawValues) { - updateSketch.update(s); + thetaUnion.update(s); } } else if (rawValues instanceof Integer[]) { for (Integer i : (Integer[]) rawValues) { - updateSketch.update(i); + thetaUnion.update(i); } } else if (rawValues instanceof Long[]) { for (Long l : (Long[]) rawValues) { - updateSketch.update(l); + thetaUnion.update(l); } } else if (rawValues instanceof Double[]) { for (Double d : (Double[]) rawValues) { - updateSketch.update(d); + thetaUnion.update(d); } } else if (rawValues instanceof Float[]) { for (Float f : (Float[]) rawValues) { - updateSketch.update(f); + thetaUnion.update(f); } } else { throw new IllegalStateException( @@ -105,59 +100,64 @@ private void addObjectsToSketch(Object[] rawValues, UpdateSketch updateSketch) { } } - // Utility method to merge two sketches - private Sketch union(Sketch left, Sketch right) { - return _union.union(left, right); - } - - // Utility method to make an empty sketch - private Sketch empty() { - // TODO: Handle configurable nominal entries for StarTreeBuilder - return Sketches.updateSketchBuilder().setNominalEntries(CommonConstants.Helix.DEFAULT_THETA_SKETCH_NOMINAL_ENTRIES) - .build().compact(); - } - @Override - public Sketch getInitialAggregatedValue(Object rawValue) { - Sketch initialValue; + public Object getInitialAggregatedValue(Object rawValue) { + Union thetaUnion = _setOperationBuilder.buildUnion(); if (rawValue instanceof byte[]) { // Serialized Sketch byte[] bytes = (byte[]) rawValue; - initialValue = deserializeAggregatedValue(bytes); - _maxByteSize = Math.max(_maxByteSize, bytes.length); + Sketch sketch = deserializeAggregatedValue(bytes); + thetaUnion.union(sketch); } else if (rawValue instanceof byte[][]) { // Multiple Serialized Sketches byte[][] serializedSketches = (byte[][]) rawValue; - initialValue = StreamSupport.stream(Arrays.stream(serializedSketches).spliterator(), false) - .map(this::deserializeAggregatedValue).reduce(this::union).orElseGet(this::empty); - _maxByteSize = Math.max(_maxByteSize, initialValue.getCurrentBytes()); + for (byte[] sketchBytes : serializedSketches) { + thetaUnion.union(deserializeAggregatedValue(sketchBytes)); + } } else { - initialValue = singleItemSketch(rawValue); - _maxByteSize = Math.max(_maxByteSize, initialValue.getCurrentBytes()); + singleItemUpdate(thetaUnion, rawValue); + } + _maxByteSize = Math.max(_maxByteSize, thetaUnion.getCurrentBytes()); + return thetaUnion; + } + + private Union extractUnion(Object value) { + if (value == null) { + return _setOperationBuilder.buildUnion(); + } else if (value instanceof Union) { + return (Union) value; + } else if (value instanceof Sketch) { + Sketch sketch = (Sketch) value; + Union thetaUnion = _setOperationBuilder.buildUnion(); + thetaUnion.union(sketch); + return thetaUnion; + } else { + throw new IllegalStateException( + "Unsupported data type for Theta Sketch aggregation: " + value.getClass().getSimpleName()); } - return initialValue; } @Override - public Sketch applyRawValue(Sketch value, Object rawValue) { - Sketch right; + public Object applyRawValue(Object aggregatedValue, Object rawValue) { + Union thetaUnion = extractUnion(aggregatedValue); if (rawValue instanceof byte[]) { - right = deserializeAggregatedValue((byte[]) rawValue); + Sketch sketch = deserializeAggregatedValue((byte[]) rawValue); + thetaUnion.union(sketch); } else { - right = singleItemSketch(rawValue); + singleItemUpdate(thetaUnion, rawValue); } - Sketch result = union(value, right).compact(); - _maxByteSize = Math.max(_maxByteSize, result.getCurrentBytes()); - return result; + _maxByteSize = Math.max(_maxByteSize, thetaUnion.getCurrentBytes()); + return thetaUnion; } @Override - public Sketch applyAggregatedValue(Sketch value, Sketch aggregatedValue) { - Sketch result = union(value, aggregatedValue); - _maxByteSize = Math.max(_maxByteSize, result.getCurrentBytes()); - return result; + public Object applyAggregatedValue(Object value, Object aggregatedValue) { + Union thetaUnion = extractUnion(aggregatedValue); + singleItemUpdate(thetaUnion, value); + _maxByteSize = Math.max(_maxByteSize, thetaUnion.getCurrentBytes()); + return thetaUnion; } @Override - public Sketch cloneAggregatedValue(Sketch value) { + public Object cloneAggregatedValue(Object value) { return deserializeAggregatedValue(serializeAggregatedValue(value)); } @@ -167,8 +167,15 @@ public int getMaxAggregatedValueByteSize() { } @Override - public byte[] serializeAggregatedValue(Sketch value) { - return CustomSerDeUtils.DATA_SKETCH_THETA_SER_DE.serialize(value); + public byte[] serializeAggregatedValue(Object value) { + if (value instanceof Union) { + return CustomSerDeUtils.DATA_SKETCH_THETA_SER_DE.serialize(((Union) value).getResult()); + } else if (value instanceof Sketch) { + return CustomSerDeUtils.DATA_SKETCH_THETA_SER_DE.serialize(((Sketch) value)); + } else { + throw new IllegalStateException( + "Unsupported data type for Theta Sketch aggregation: " + value.getClass().getSimpleName()); + } } @Override diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/aggregator/IntegerTupleSketchValueAggregator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/aggregator/IntegerTupleSketchValueAggregator.java index 1440e738d1d4..87d5c0f97e0c 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/aggregator/IntegerTupleSketchValueAggregator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/aggregator/IntegerTupleSketchValueAggregator.java @@ -25,17 +25,19 @@ import org.apache.pinot.segment.local.utils.CustomSerDeUtils; import org.apache.pinot.segment.spi.AggregationFunctionType; import org.apache.pinot.spi.data.FieldSpec.DataType; +import org.apache.pinot.spi.utils.CommonConstants; -public class IntegerTupleSketchValueAggregator implements ValueAggregator> { +@SuppressWarnings("unchecked") +public class IntegerTupleSketchValueAggregator implements ValueAggregator { public static final DataType AGGREGATED_VALUE_TYPE = DataType.BYTES; - // This changes a lot similar to the Bitmap aggregator - private int _maxByteSize; + private final int _nominalEntries; private final IntegerSummary.Mode _mode; public IntegerTupleSketchValueAggregator(IntegerSummary.Mode mode) { + _nominalEntries = 1 << CommonConstants.Helix.DEFAULT_TUPLE_SKETCH_LGK; _mode = mode; } @@ -49,47 +51,85 @@ public DataType getAggregatedValueType() { return AGGREGATED_VALUE_TYPE; } - // Utility method to merge two sketches - private Sketch union(Sketch a, Sketch b) { - return new Union<>(new IntegerSummarySetOperations(_mode, _mode)).union(a, b); - } - @Override - public Sketch getInitialAggregatedValue(byte[] rawValue) { + public Object getInitialAggregatedValue(byte[] rawValue) { Sketch initialValue = deserializeAggregatedValue(rawValue); - _maxByteSize = Math.max(_maxByteSize, rawValue.length); - return initialValue; + Union tupleUnion = new Union<>(_nominalEntries, new IntegerSummarySetOperations(_mode, _mode)); + tupleUnion.union(initialValue); + return tupleUnion; + } + + private Union extractUnion(Object value) { + if (value == null) { + return new Union<>(_nominalEntries, new IntegerSummarySetOperations(_mode, _mode)); + } else if (value instanceof Union) { + return (Union) value; + } else if (value instanceof Sketch) { + Sketch sketch = (Sketch) value; + Union tupleUnion = new Union<>(_nominalEntries, new IntegerSummarySetOperations(_mode, _mode)); + tupleUnion.union(sketch); + return tupleUnion; + } else { + throw new IllegalStateException( + "Unsupported data type for Integer Tuple Sketch aggregation: " + value.getClass().getSimpleName()); + } + } + + private Sketch extractSketch(Object value) { + if (value instanceof Union) { + return ((Union) value).getResult(); + } else if (value instanceof Sketch) { + return ((Sketch) value); + } else { + throw new IllegalStateException( + "Unsupported data type for Integer Tuple Sketch aggregation: " + value.getClass().getSimpleName()); + } } @Override - public Sketch applyRawValue(Sketch value, byte[] rawValue) { - Sketch right = deserializeAggregatedValue(rawValue); - Sketch result = union(value, right).compact(); - _maxByteSize = Math.max(_maxByteSize, result.toByteArray().length); - return result; + public Object applyRawValue(Object aggregatedValue, byte[] rawValue) { + Union tupleUnion = extractUnion(aggregatedValue); + tupleUnion.union(deserializeAggregatedValue(rawValue)); + return tupleUnion; } @Override - public Sketch applyAggregatedValue(Sketch value, - Sketch aggregatedValue) { - Sketch result = union(value, aggregatedValue); - _maxByteSize = Math.max(_maxByteSize, result.toByteArray().length); - return result; + public Object applyAggregatedValue(Object value, Object aggregatedValue) { + Union tupleUnion = extractUnion(aggregatedValue); + Sketch sketch = extractSketch(value); + tupleUnion.union(sketch); + return tupleUnion; } @Override - public Sketch cloneAggregatedValue(Sketch value) { + public Object cloneAggregatedValue(Object value) { return deserializeAggregatedValue(serializeAggregatedValue(value)); } + /** + * Returns the maximum number of storage bytes required for a Compact Integer Tuple Sketch with the given + * number of actual entries. Note that this assumes the worst case of the sketch in + * estimation mode, which requires storing theta and count. + * @return the maximum number of storage bytes required for a Compact Integer Tuple Sketch with the given number + * of entries. + */ @Override public int getMaxAggregatedValueByteSize() { - return _maxByteSize; + if (_nominalEntries == 0) { + return 8; + } + if (_nominalEntries == 1) { + return 16; + } + int longSizeInBytes = Long.BYTES; + int intSizeInBytes = Integer.BYTES; + return (_nominalEntries * (longSizeInBytes + intSizeInBytes)) + 24; } @Override - public byte[] serializeAggregatedValue(Sketch value) { - return CustomSerDeUtils.DATA_SKETCH_INT_TUPLE_SER_DE.serialize(value); + public byte[] serializeAggregatedValue(Object value) { + Sketch sketch = extractSketch(value); + return sketch.compact().toByteArray(); } @Override diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/aggregator/DistinctCountCPCSketchValueAggregatorTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/aggregator/DistinctCountCPCSketchValueAggregatorTest.java index b8dcb701f5ed..c9bc80f8264f 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/aggregator/DistinctCountCPCSketchValueAggregatorTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/aggregator/DistinctCountCPCSketchValueAggregatorTest.java @@ -34,19 +34,18 @@ public class DistinctCountCPCSketchValueAggregatorTest { @Test public void initialShouldCreateSingleItemSketch() { DistinctCountCPCSketchValueAggregator agg = new DistinctCountCPCSketchValueAggregator(Collections.emptyList()); - assertEquals(agg.getInitialAggregatedValue("hello world").getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue("hello world")).getEstimate(), 1.0); } @Test public void initialShouldParseASketch() { - CpcSketch input = new CpcSketch(); - IntStream.range(0, 1000).forEach(input::update); + CpcSketch input = new CpcSketch(12); + IntStream.range(0, 100).forEach(input::update); DistinctCountCPCSketchValueAggregator agg = new DistinctCountCPCSketchValueAggregator(Collections.emptyList()); byte[] bytes = agg.serializeAggregatedValue(input); - assertEquals(agg.getInitialAggregatedValue(bytes).getEstimate(), input.getEstimate()); - - // and should update the max size - assertEquals(agg.getMaxAggregatedValueByteSize(), input.toByteArray().length); + assertEquals(Math.round(toSketch(agg.getInitialAggregatedValue(bytes)).getEstimate()), + Math.round(input.getEstimate())); + assertEquals(agg.getMaxAggregatedValueByteSize(), 2580); } @Test @@ -57,7 +56,7 @@ public void initialShouldParseMultiValueSketches() { input2.update("world"); DistinctCountCPCSketchValueAggregator agg = new DistinctCountCPCSketchValueAggregator(Collections.emptyList()); byte[][] bytes = {agg.serializeAggregatedValue(input1), agg.serializeAggregatedValue(input2)}; - assertEquals(Math.round(agg.getInitialAggregatedValue(bytes).getEstimate()), 2); + assertEquals(Math.round(toSketch(agg.getInitialAggregatedValue(bytes)).getEstimate()), 2); } @Test @@ -67,7 +66,7 @@ public void applyAggregatedValueShouldUnion() { CpcSketch input2 = new CpcSketch(); IntStream.range(0, 1000).forEach(input2::update); DistinctCountCPCSketchValueAggregator agg = new DistinctCountCPCSketchValueAggregator(Collections.emptyList()); - CpcSketch result = agg.applyAggregatedValue(input1, input2); + CpcSketch result = toSketch(agg.applyAggregatedValue(input1, input2)); CpcUnion union = new CpcUnion(CommonConstants.Helix.DEFAULT_CPC_SKETCH_LGK); union.update(input1); @@ -75,9 +74,7 @@ public void applyAggregatedValueShouldUnion() { CpcSketch merged = union.getResult(); assertEquals(result.getEstimate(), merged.getEstimate()); - - // and should update the max size - assertEquals(agg.getMaxAggregatedValueByteSize(), merged.toByteArray().length); + assertEquals(agg.getMaxAggregatedValueByteSize(), 2580); } @Test @@ -88,7 +85,7 @@ public void applyRawValueShouldUnion() { IntStream.range(0, 1000).forEach(input2::update); DistinctCountCPCSketchValueAggregator agg = new DistinctCountCPCSketchValueAggregator(Collections.emptyList()); byte[] result2bytes = agg.serializeAggregatedValue(input2); - CpcSketch result = agg.applyRawValue(input1, result2bytes); + CpcSketch result = toSketch(agg.applyRawValue(input1, result2bytes)); CpcUnion union = new CpcUnion(CommonConstants.Helix.DEFAULT_CPC_SKETCH_LGK); union.update(input1); @@ -96,9 +93,7 @@ public void applyRawValueShouldUnion() { CpcSketch merged = union.getResult(); assertEquals(result.getEstimate(), merged.getEstimate()); - - // and should update the max size - assertEquals(agg.getMaxAggregatedValueByteSize(), merged.toByteArray().length); + assertEquals(agg.getMaxAggregatedValueByteSize(), 2580); } @Test @@ -106,13 +101,13 @@ public void applyRawValueShouldAdd() { CpcSketch input1 = new CpcSketch(); input1.update("hello".hashCode()); DistinctCountCPCSketchValueAggregator agg = new DistinctCountCPCSketchValueAggregator(Collections.emptyList()); - CpcSketch result = agg.applyRawValue(input1, "world"); + CpcSketch result = toSketch(agg.applyRawValue(input1, "world")); assertEquals(Math.round(result.getEstimate()), 2); CpcSketch pristine = new CpcSketch(); pristine.update("hello"); pristine.update("world"); - assertEquals(agg.getMaxAggregatedValueByteSize(), pristine.toByteArray().length); + assertEquals(agg.getMaxAggregatedValueByteSize(), 2580); } @Test @@ -121,7 +116,7 @@ public void applyRawValueShouldSupportMultiValue() { input1.update("hello"); DistinctCountCPCSketchValueAggregator agg = new DistinctCountCPCSketchValueAggregator(Collections.emptyList()); String[] strings = {"hello", "world", "this", "is", "some", "strings"}; - CpcSketch result = agg.applyRawValue(input1, strings); + CpcSketch result = toSketch(agg.applyRawValue(input1, strings)); assertEquals(Math.round(result.getEstimate()), 6); @@ -129,16 +124,16 @@ public void applyRawValueShouldSupportMultiValue() { for (String value : strings) { pristine.update(value); } - assertEquals(agg.getMaxAggregatedValueByteSize(), pristine.toByteArray().length); + assertEquals(agg.getMaxAggregatedValueByteSize(), 2580); } @Test public void getInitialValueShouldSupportDifferentTypes() { DistinctCountCPCSketchValueAggregator agg = new DistinctCountCPCSketchValueAggregator(Collections.emptyList()); - assertEquals(agg.getInitialAggregatedValue(12345).getEstimate(), 1.0); - assertEquals(agg.getInitialAggregatedValue(12345L).getEstimate(), 1.0); - assertEquals(agg.getInitialAggregatedValue(12.345f).getEstimate(), 1.0); - assertEquals(agg.getInitialAggregatedValue(12.345d).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(12345)).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(12345L)).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(12.345f)).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(12.345d)).getEstimate(), 1.0); assertThrows(() -> agg.getInitialAggregatedValue(new Object())); } @@ -146,16 +141,27 @@ public void getInitialValueShouldSupportDifferentTypes() { public void getInitialValueShouldSupportMultiValueTypes() { DistinctCountCPCSketchValueAggregator agg = new DistinctCountCPCSketchValueAggregator(Collections.emptyList()); Integer[] ints = {12345}; - assertEquals(agg.getInitialAggregatedValue(ints).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(ints)).getEstimate(), 1.0); Long[] longs = {12345L}; - assertEquals(agg.getInitialAggregatedValue(longs).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(longs)).getEstimate(), 1.0); Float[] floats = {12.345f}; - assertEquals(agg.getInitialAggregatedValue(floats).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(floats)).getEstimate(), 1.0); Double[] doubles = {12.345d}; - assertEquals(agg.getInitialAggregatedValue(doubles).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(doubles)).getEstimate(), 1.0); Object[] objects = {new Object()}; assertThrows(() -> agg.getInitialAggregatedValue(objects)); byte[][] zeroSketches = {}; - assertEquals(agg.getInitialAggregatedValue(zeroSketches).getEstimate(), 0.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(zeroSketches)).getEstimate(), 0.0); + } + + private CpcSketch toSketch(Object value) { + if (value instanceof CpcUnion) { + return ((CpcUnion) value).getResult(); + } else if (value instanceof CpcSketch) { + return (CpcSketch) value; + } else { + throw new IllegalStateException( + "Unsupported data type for CPC Sketch aggregation: " + value.getClass().getSimpleName()); + } } } diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/aggregator/DistinctCountThetaSketchValueAggregatorTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/aggregator/DistinctCountThetaSketchValueAggregatorTest.java index fdc820c1200f..8bbdb9443a06 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/aggregator/DistinctCountThetaSketchValueAggregatorTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/aggregator/DistinctCountThetaSketchValueAggregatorTest.java @@ -37,7 +37,7 @@ public class DistinctCountThetaSketchValueAggregatorTest { @Test public void initialShouldCreateSingleItemSketch() { DistinctCountThetaSketchValueAggregator agg = new DistinctCountThetaSketchValueAggregator(); - assertEquals(agg.getInitialAggregatedValue("hello world").getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue("hello world")).getEstimate(), 1.0); } @Test @@ -47,10 +47,13 @@ public void initialShouldParseASketch() { Sketch result = input.compact(); DistinctCountThetaSketchValueAggregator agg = new DistinctCountThetaSketchValueAggregator(); byte[] bytes = agg.serializeAggregatedValue(result); - assertEquals(agg.getInitialAggregatedValue(bytes).getEstimate(), result.getEstimate()); - + Sketch initSketch = toSketch(agg.getInitialAggregatedValue(bytes)); + Union union = + Union.builder().setNominalEntries(CommonConstants.Helix.DEFAULT_THETA_SKETCH_NOMINAL_ENTRIES).buildUnion(); + union.union(initSketch); + assertEquals(initSketch.getEstimate(), result.getEstimate()); // and should update the max size - assertEquals(agg.getMaxAggregatedValueByteSize(), result.getCurrentBytes()); + assertEquals(agg.getMaxAggregatedValueByteSize(), union.getCurrentBytes()); } @Test @@ -61,7 +64,7 @@ public void initialShouldParseMultiValueSketches() { input2.update("world"); DistinctCountThetaSketchValueAggregator agg = new DistinctCountThetaSketchValueAggregator(); byte[][] bytes = {agg.serializeAggregatedValue(input1), agg.serializeAggregatedValue(input2)}; - assertEquals(agg.getInitialAggregatedValue(bytes).getEstimate(), 2.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(bytes)).getEstimate(), 2.0); } @Test @@ -73,16 +76,14 @@ public void applyAggregatedValueShouldUnion() { IntStream.range(0, 1000).forEach(input2::update); Sketch result2 = input2.compact(); DistinctCountThetaSketchValueAggregator agg = new DistinctCountThetaSketchValueAggregator(); - Sketch result = agg.applyAggregatedValue(result1, result2); + Sketch result = toSketch(agg.applyAggregatedValue(result1, result2)); Union union = Union.builder().setNominalEntries(CommonConstants.Helix.DEFAULT_THETA_SKETCH_NOMINAL_ENTRIES).buildUnion(); - - Sketch merged = union.union(result1, result2); - + union.union(result1); + union.union(result2); + Sketch merged = union.getResult(); assertEquals(result.getEstimate(), merged.getEstimate()); - - // and should update the max size - assertEquals(agg.getMaxAggregatedValueByteSize(), merged.getCurrentBytes()); + assertEquals(agg.getMaxAggregatedValueByteSize(), union.getCurrentBytes()); } @Test @@ -95,16 +96,15 @@ public void applyRawValueShouldUnion() { Sketch result2 = input2.compact(); DistinctCountThetaSketchValueAggregator agg = new DistinctCountThetaSketchValueAggregator(); byte[] result2bytes = agg.serializeAggregatedValue(result2); - Sketch result = agg.applyRawValue(result1, result2bytes); + Sketch result = toSketch(agg.applyRawValue(result1, result2bytes)); Union union = Union.builder().setNominalEntries(CommonConstants.Helix.DEFAULT_THETA_SKETCH_NOMINAL_ENTRIES).buildUnion(); - - Sketch merged = union.union(result1, result2); - + union.union(result1); + union.union(result2); + Sketch merged = union.getResult(); assertEquals(result.getEstimate(), merged.getEstimate()); - // and should update the max size - assertEquals(agg.getMaxAggregatedValueByteSize(), merged.getCurrentBytes()); + assertEquals(agg.getMaxAggregatedValueByteSize(), union.getCurrentBytes()); } @Test @@ -113,13 +113,13 @@ public void applyRawValueShouldAdd() { input1.update("hello".hashCode()); Sketch result1 = input1.compact(); DistinctCountThetaSketchValueAggregator agg = new DistinctCountThetaSketchValueAggregator(); - Sketch result = agg.applyRawValue(result1, "world"); - + Sketch result = toSketch(agg.applyRawValue(result1, "world")); + Union union = + Union.builder().setNominalEntries(CommonConstants.Helix.DEFAULT_THETA_SKETCH_NOMINAL_ENTRIES).buildUnion(); + union.union(result); assertEquals(result.getEstimate(), 2.0); - // and should update the max size - assertEquals(agg.getMaxAggregatedValueByteSize(), 32 // may change in future versions of datasketches - ); + assertEquals(agg.getMaxAggregatedValueByteSize(), union.getCurrentBytes()); } @Test @@ -129,22 +129,22 @@ public void applyRawValueShouldSupportMultiValue() { Sketch result1 = input1.compact(); DistinctCountThetaSketchValueAggregator agg = new DistinctCountThetaSketchValueAggregator(); String[] strings = {"hello", "world", "this", "is", "some", "strings"}; - Sketch result = agg.applyRawValue(result1, (Object) strings); - + Sketch result = toSketch(agg.applyRawValue(result1, (Object) strings)); + Union union = + Union.builder().setNominalEntries(CommonConstants.Helix.DEFAULT_THETA_SKETCH_NOMINAL_ENTRIES).buildUnion(); + union.union(result); assertEquals(result.getEstimate(), 6.0); - // and should update the max size - assertEquals(agg.getMaxAggregatedValueByteSize(), 64 // may change in future versions of datasketches - ); + assertEquals(agg.getMaxAggregatedValueByteSize(), union.getCurrentBytes()); } @Test public void getInitialValueShouldSupportDifferentTypes() { DistinctCountThetaSketchValueAggregator agg = new DistinctCountThetaSketchValueAggregator(); - assertEquals(agg.getInitialAggregatedValue(12345).getEstimate(), 1.0); - assertEquals(agg.getInitialAggregatedValue(12345L).getEstimate(), 1.0); - assertEquals(agg.getInitialAggregatedValue(12.345f).getEstimate(), 1.0); - assertEquals(agg.getInitialAggregatedValue(12.345d).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(12345)).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(12345L)).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(12.345f)).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(12.345d)).getEstimate(), 1.0); assertThrows(() -> agg.getInitialAggregatedValue(new Object())); } @@ -152,17 +152,17 @@ public void getInitialValueShouldSupportDifferentTypes() { public void getInitialValueShouldSupportMultiValueTypes() { DistinctCountThetaSketchValueAggregator agg = new DistinctCountThetaSketchValueAggregator(); Integer[] ints = {12345}; - assertEquals(agg.getInitialAggregatedValue(ints).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(ints)).getEstimate(), 1.0); Long[] longs = {12345L}; - assertEquals(agg.getInitialAggregatedValue(longs).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(longs)).getEstimate(), 1.0); Float[] floats = {12.345f}; - assertEquals(agg.getInitialAggregatedValue(floats).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(floats)).getEstimate(), 1.0); Double[] doubles = {12.345d}; - assertEquals(agg.getInitialAggregatedValue(doubles).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(doubles)).getEstimate(), 1.0); Object[] objects = {new Object()}; assertThrows(() -> agg.getInitialAggregatedValue(objects)); byte[][] zeroSketches = {}; - assertEquals(agg.getInitialAggregatedValue(zeroSketches).getEstimate(), 0.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(zeroSketches)).getEstimate(), 0.0); } @Test @@ -172,7 +172,18 @@ public void shouldRetainSketchOrdering() { Sketch unordered = input.compact(false, null); Sketch ordered = input.compact(true, null); DistinctCountThetaSketchValueAggregator agg = new DistinctCountThetaSketchValueAggregator(); - assertTrue(agg.cloneAggregatedValue(ordered).isOrdered()); - assertFalse(agg.cloneAggregatedValue(unordered).isOrdered()); + assertTrue(toSketch(agg.cloneAggregatedValue(ordered)).isOrdered()); + assertFalse(toSketch(agg.cloneAggregatedValue(unordered)).isOrdered()); + } + + private Sketch toSketch(Object value) { + if (value instanceof Union) { + return ((Union) value).getResult(); + } else if (value instanceof Sketch) { + return (Sketch) value; + } else { + throw new IllegalStateException( + "Unsupported data type for Theta Sketch aggregation: " + value.getClass().getSimpleName()); + } } } diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/aggregator/IntegerTupleSketchValueAggregatorTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/aggregator/IntegerTupleSketchValueAggregatorTest.java index d108d799b040..cfc8b88f8ed7 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/aggregator/IntegerTupleSketchValueAggregatorTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/aggregator/IntegerTupleSketchValueAggregatorTest.java @@ -19,6 +19,7 @@ package org.apache.pinot.segment.local.aggregator; import org.apache.datasketches.tuple.Sketch; +import org.apache.datasketches.tuple.Union; import org.apache.datasketches.tuple.aninteger.IntegerSketch; import org.apache.datasketches.tuple.aninteger.IntegerSummary; import org.testng.annotations.Test; @@ -32,12 +33,12 @@ private byte[] sketchContaining(String key, int value) { IntegerSketch is = new IntegerSketch(16, IntegerSummary.Mode.Sum); is.update(key, value); return is.compact().toByteArray(); - }; + } @Test public void initialShouldParseASketch() { IntegerTupleSketchValueAggregator agg = new IntegerTupleSketchValueAggregator(IntegerSummary.Mode.Sum); - assertEquals(agg.getInitialAggregatedValue(sketchContaining("hello world", 1)).getEstimate(), 1.0); + assertEquals(toSketch(agg.getInitialAggregatedValue(sketchContaining("hello world", 1))).getEstimate(), 1.0); } @Test @@ -47,11 +48,9 @@ public void applyAggregatedValueShouldUnion() { s1.update("a", 1); s2.update("b", 1); IntegerTupleSketchValueAggregator agg = new IntegerTupleSketchValueAggregator(IntegerSummary.Mode.Sum); - Sketch merged = agg.applyAggregatedValue(s1, s2); + Sketch merged = toSketch(agg.applyAggregatedValue(s1, s2)); assertEquals(merged.getEstimate(), 2.0); - - // and should update the max size - assertEquals(agg.getMaxAggregatedValueByteSize(), agg.serializeAggregatedValue(merged).length); + assertEquals(agg.getMaxAggregatedValueByteSize(), 196632); } @Test @@ -61,10 +60,20 @@ public void applyRawValueShouldUnion() { s1.update("a", 1); s2.update("b", 1); IntegerTupleSketchValueAggregator agg = new IntegerTupleSketchValueAggregator(IntegerSummary.Mode.Sum); - Sketch merged = agg.applyRawValue(s1, agg.serializeAggregatedValue(s2)); + Sketch merged = toSketch(agg.applyRawValue(s1, agg.serializeAggregatedValue(s2))); assertEquals(merged.getEstimate(), 2.0); + assertEquals(agg.getMaxAggregatedValueByteSize(), 196632); + } - // and should update the max size - assertEquals(agg.getMaxAggregatedValueByteSize(), agg.serializeAggregatedValue(merged).length); + @SuppressWarnings("unchecked") + private Sketch toSketch(Object value) { + if (value instanceof Union) { + return ((Union) value).getResult(); + } else if (value instanceof Sketch) { + return ((Sketch) value); + } else { + throw new IllegalStateException( + "Unsupported data type for Integer Tuple Sketch aggregation: " + value.getClass().getSimpleName()); + } } } diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java index befd5b57633e..24ea49cfa101 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/CommonConstants.java @@ -107,7 +107,9 @@ public static class Helix { // https://datasketches.apache.org/docs/Theta/ThetaErrorTable.html public static final int DEFAULT_THETA_SKETCH_NOMINAL_ENTRIES = 16384; - public static final int DEFAULT_TUPLE_SKETCH_LGK = 16; + // 2 to the power of 14, for tradeoffs see datasketches library documentation: + // https://datasketches.apache.org/docs/Theta/ThetaErrorTable.html + public static final int DEFAULT_TUPLE_SKETCH_LGK = 14; public static final int DEFAULT_CPC_SKETCH_LGK = 12; public static final int DEFAULT_ULTRALOGLOG_P = 12; From 0f28a5cc6f58688040c6e20c458581a1077ed0cd Mon Sep 17 00:00:00 2001 From: rohit Date: Thu, 2 May 2024 01:35:14 +0530 Subject: [PATCH 56/58] fix merging null multi value in partial upsert (#13031) --- .../local/upsert/PartialUpsertHandler.java | 16 ++++++++++++++-- .../local/upsert/PartialUpsertHandlerTest.java | 9 +++++++-- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/PartialUpsertHandler.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/PartialUpsertHandler.java index 118412ab7725..ad73de9d70f5 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/PartialUpsertHandler.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/upsert/PartialUpsertHandler.java @@ -18,6 +18,7 @@ */ package org.apache.pinot.segment.local.upsert; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.TreeMap; @@ -47,12 +48,24 @@ public class PartialUpsertHandler { private final TreeMap _fieldSpecMap; private final PartialUpsertMerger _partialUpsertMerger; + private final Map _defaultNullValues = new HashMap<>(); + public PartialUpsertHandler(Schema schema, List comparisonColumns, UpsertConfig upsertConfig) { _primaryKeyColumns = schema.getPrimaryKeyColumns(); _comparisonColumns = comparisonColumns; _fieldSpecMap = schema.getFieldSpecMap(); _partialUpsertMerger = PartialUpsertMergerFactory.getPartialUpsertMerger(_primaryKeyColumns, comparisonColumns, upsertConfig); + // cache default null values to handle null merger results + for (Map.Entry entry : schema.getFieldSpecMap().entrySet()) { + String column = entry.getKey(); + FieldSpec fieldSpec = entry.getValue(); + if (fieldSpec.isSingleValueField()) { + _defaultNullValues.put(column, fieldSpec.getDefaultNullValue()); + } else { + _defaultNullValues.put(column, new Object[]{fieldSpec.getDefaultNullValue()}); + } + } } public void merge(LazyRow previousRow, GenericRow newRow, Map resultHolder) { @@ -83,8 +96,7 @@ private void setMergedValue(GenericRow row, String column, @Nullable Object merg row.removeNullValueField(column); row.putValue(column, mergedValue); } else { - // if column exists but mapped to a null value then merger result was a null value - row.putDefaultNullValue(column, _fieldSpecMap.get(column).getDefaultNullValue()); + row.putDefaultNullValue(column, _defaultNullValues.get(column)); } } } diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/upsert/PartialUpsertHandlerTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/upsert/PartialUpsertHandlerTest.java index 4b954aa1400e..fc8fdbdefbe4 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/upsert/PartialUpsertHandlerTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/upsert/PartialUpsertHandlerTest.java @@ -86,11 +86,14 @@ public void testCustomPartialUpsertMergerWithNullResult() { newRowData.put("hoursSinceEpoch", null); // testing null comparison column GenericRow newRecord = initGenericRow(new GenericRow(), newRowData); LazyRow prevRecord = mock(LazyRow.class); - mockLazyRow(prevRecord, Map.of("pk", "pk1", "field1", 5L, "field2", "set", "hoursSinceEpoch", 2L)); - Map expectedData = new HashMap<>(Map.of("pk", "pk1", "field2", "reset", "hoursSinceEpoch", 2L)); + mockLazyRow(prevRecord, + Map.of("pk", "pk1", "field1", 5L, "field2", "set", "field3", new Integer[]{0}, "hoursSinceEpoch", 2L)); + Map expectedData = new HashMap<>( + Map.of("pk", "pk1", "field2", "reset", "hoursSinceEpoch", 2L)); expectedData.put("field1", Long.MIN_VALUE); GenericRow expectedRecord = initGenericRow(new GenericRow(), expectedData); expectedRecord.addNullValueField("field1"); + expectedRecord.putDefaultNullValue("field3", new Object[]{Integer.MIN_VALUE}); testCustomMerge(prevRecord, newRecord, expectedRecord, getCustomMerger()); } @@ -138,6 +141,7 @@ private void testCustomMerge(LazyRow prevRecord, GenericRow newRecord, GenericRo Schema schema = new Schema.SchemaBuilder().addSingleValueDimension("pk", FieldSpec.DataType.STRING) .addSingleValueDimension("field1", FieldSpec.DataType.LONG) .addSingleValueDimension("field2", FieldSpec.DataType.STRING) + .addMultiValueDimension("field3", FieldSpec.DataType.INT) .addDateTime("hoursSinceEpoch", FieldSpec.DataType.LONG, "1:HOURS:EPOCH", "1:HOURS") .setPrimaryKeyColumns(Arrays.asList("pk")).build(); @@ -169,6 +173,7 @@ public PartialUpsertMerger getCustomMerger() { } if ((newRow.getValue("field2")).equals("reset")) { resultHolder.put("field1", null); + resultHolder.put("field3", null); } }; } From c8b223f1fc2ab8df958175f8ae0c4b17902a2061 Mon Sep 17 00:00:00 2001 From: Abhishek Sharma Date: Wed, 1 May 2024 16:17:48 -0400 Subject: [PATCH 57/58] Upgrade lucene to 9.10.0 and compatibility changes to code. (#12866) --- .../impl/vector/MutableVectorIndex.java | 2 +- .../impl/inv/text/LuceneFSTIndexCreator.java | 2 +- .../impl/text/LuceneTextIndexCreator.java | 4 +-- .../impl/vector/HnswVectorIndexCreator.java | 3 +- .../{lucene95 => lucene99}/HnswCodec.java | 14 ++++---- .../HnswVectorsFormat.java | 10 +++--- .../SegmentV1V2ToV3FormatConverter.java | 8 ++--- .../local/segment/index/fst/FstIndexType.java | 3 +- .../loader/invertedindex/FSTIndexHandler.java | 4 +-- .../invertedindex/VectorIndexHandler.java | 5 +-- .../index/readers/LuceneFSTIndexReader.java | 3 +- .../segment/index/text/TextIndexType.java | 3 +- .../segment/index/vector/VectorIndexType.java | 3 +- .../local/segment/store/TextIndexUtils.java | 5 ++- .../local/segment/store/VectorIndexUtils.java | 24 +++++++------ .../segment/local/utils/fst/FSTBuilder.java | 13 ++++--- .../nativefst/NativeFSTIndexCreator.java | 2 +- .../creator/LuceneFSTIndexCreatorTest.java | 4 +-- .../creator/NativeFSTIndexCreatorTest.java | 4 +-- .../segment/index/loader/LoaderTest.java | 36 +++++++++---------- .../index/loader/SegmentPreProcessorTest.java | 4 +-- .../store/FilePerIndexDirectoryTest.java | 4 +-- .../store/SingleFileIndexDirectoryTest.java | 4 +-- .../local/utils/fst/FSTBuilderTest.java | 2 -- .../apache/pinot/segment/spi/V1Constants.java | 4 +++ .../spi/store/SegmentDirectoryPaths.java | 26 +++++++++++--- pom.xml | 2 +- 27 files changed, 117 insertions(+), 81 deletions(-) rename pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/{lucene95 => lucene99}/HnswCodec.java (92%) rename pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/{lucene95 => lucene99}/HnswVectorsFormat.java (92%) diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/vector/MutableVectorIndex.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/vector/MutableVectorIndex.java index a591650be4e7..47329a1c2a25 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/vector/MutableVectorIndex.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/vector/MutableVectorIndex.java @@ -83,7 +83,7 @@ public MutableVectorIndex(String segmentName, String vectorColumn, VectorIndexCo // to V3 if segmentVersion is set to V3 in SegmentGeneratorConfig. _indexDir = new File(FileUtils.getTempDirectory(), segmentName); _indexDirectory = FSDirectory.open( - new File(_indexDir, _vectorColumn + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION).toPath()); + new File(_indexDir, _vectorColumn + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION).toPath()); LOGGER.info("Creating mutable HNSW index for segment: {}, column: {} at path: {} with {}", segmentName, vectorColumn, _indexDir.getAbsolutePath(), vectorIndexConfig.getProperties()); _indexWriter = new IndexWriter(_indexDirectory, VectorIndexUtils.getIndexWriterConfig(vectorIndexConfig)); diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/text/LuceneFSTIndexCreator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/text/LuceneFSTIndexCreator.java index 60b903739bb1..2e51c19096a1 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/text/LuceneFSTIndexCreator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/inv/text/LuceneFSTIndexCreator.java @@ -55,7 +55,7 @@ public class LuceneFSTIndexCreator implements FSTIndexCreator { */ public LuceneFSTIndexCreator(File indexDir, String columnName, String[] sortedEntries) throws IOException { - _fstIndexFile = new File(indexDir, columnName + V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION); + _fstIndexFile = new File(indexDir, columnName + V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION); _fstBuilder = new FSTBuilder(); _dictId = 0; diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/LuceneTextIndexCreator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/LuceneTextIndexCreator.java index 49306d9404af..2cdbf13f6af4 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/LuceneTextIndexCreator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/text/LuceneTextIndexCreator.java @@ -330,7 +330,7 @@ public void close() } private File getV1TextIndexFile(File indexDir) { - String luceneIndexDirectory = _textColumn + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION; + String luceneIndexDirectory = _textColumn + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION; return new File(indexDir, luceneIndexDirectory); } @@ -339,7 +339,7 @@ private File getMutableIndexDir(File indexDir) { String tmpSegmentName = indexDir.getParentFile().getName(); String segmentName = tmpSegmentName.substring(tmpSegmentName.indexOf("tmp-") + 4, tmpSegmentName.lastIndexOf('-')); String mutableDir = indexDir.getParentFile().getParentFile().getParent() + "/consumers/" + segmentName + "/" - + _textColumn + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION; + + _textColumn + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION; return new File(mutableDir); } } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/HnswVectorIndexCreator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/HnswVectorIndexCreator.java index d13b45039762..c1f5cbb0b51e 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/HnswVectorIndexCreator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/HnswVectorIndexCreator.java @@ -61,7 +61,8 @@ public HnswVectorIndexCreator(String column, File segmentIndexDir, VectorIndexCo try { // segment generation is always in V1 and later we convert (as part of post creation processing) // to V3 if segmentVersion is set to V3 in SegmentGeneratorConfig. - File indexFile = new File(segmentIndexDir, _vectorColumn + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); + File indexFile = new File(segmentIndexDir, _vectorColumn + + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); _indexDirectory = FSDirectory.open(indexFile.toPath()); LOGGER.info("Creating HNSW index for column: {} at path: {} with {} for segment: {}", column, indexFile.getAbsolutePath(), vectorIndexConfig.getProperties(), segmentIndexDir.getAbsolutePath()); diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene95/HnswCodec.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene99/HnswCodec.java similarity index 92% rename from pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene95/HnswCodec.java rename to pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene99/HnswCodec.java index ee7cf560df31..bfcfcff5ac48 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene95/HnswCodec.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene99/HnswCodec.java @@ -16,8 +16,9 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.pinot.segment.local.segment.creator.impl.vector.lucene95; +package org.apache.pinot.segment.local.segment.creator.impl.vector.lucene99; +import org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat; import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.KnnVectorsFormat; @@ -25,8 +26,7 @@ import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat; import org.apache.lucene.codecs.lucene90.Lucene90PointsFormat; -import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat; -import org.apache.lucene.codecs.lucene95.Lucene95Codec; +import org.apache.lucene.codecs.lucene99.Lucene99Codec; import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; @@ -36,7 +36,7 @@ * Extend the Lucene 9.5 index format * The major change here is to allow custom: @link{org.apache.lucene.codecs.KnnVectorsFormat} * - * @see org.apache.lucene.codecs.lucene95 package documentation for file format details. + * @see org.apache.lucene.codecs.lucene99 package documentation for file format details. */ public class HnswCodec extends FilterCodec { @@ -73,8 +73,8 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) { * * @param mode stored fields compression mode to use for newly flushed/merged segments. */ - public HnswCodec(Lucene95Codec.Mode mode, KnnVectorsFormat defaultKnnVectorsFormat) { - super("Lucene95", new Lucene95Codec(mode)); + public HnswCodec(Lucene99Codec.Mode mode, KnnVectorsFormat defaultKnnVectorsFormat) { + super("Lucene99", new Lucene99Codec(mode)); _defaultKnnVectorsFormat = defaultKnnVectorsFormat; _defaultPostingsFormat = new Lucene90PostingsFormat(); _defaultDVFormat = new Lucene90DocValuesFormat(); @@ -123,7 +123,7 @@ public DocValuesFormat getDocValuesFormatForField(String field) { /** * Returns the vectors format that should be used for writing new segments of field * - *

The default implementation always returns "Lucene95". + *

The default implementation always returns "Lucene99". * *

WARNING: if you subclass, you are responsible for index backwards compatibility: * future version of Lucene are only guaranteed to be able to read the default implementation. diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene95/HnswVectorsFormat.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene99/HnswVectorsFormat.java similarity index 92% rename from pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene95/HnswVectorsFormat.java rename to pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene99/HnswVectorsFormat.java index b3f356c51e49..2ba2781445f4 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene95/HnswVectorsFormat.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/vector/lucene99/HnswVectorsFormat.java @@ -16,13 +16,13 @@ * specific language governing permissions and limitations * under the License. */ -package org.apache.pinot.segment.local.segment.creator.impl.vector.lucene95; +package org.apache.pinot.segment.local.segment.creator.impl.vector.lucene99; import java.io.IOException; import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.KnnVectorsReader; import org.apache.lucene.codecs.KnnVectorsWriter; -import org.apache.lucene.codecs.lucene95.Lucene95HnswVectorsFormat; +import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.util.hnsw.HnswGraph; @@ -41,7 +41,7 @@ public final class HnswVectorsFormat extends KnnVectorsFormat { public static final int DEFAULT_MAX_DIMENSIONS = 2048; private final int _maxDimensions; - private final Lucene95HnswVectorsFormat _delegate; + private final Lucene99HnswVectorsFormat _delegate; /** * Constructs a format using the given graph construction parameters. @@ -51,7 +51,7 @@ public final class HnswVectorsFormat extends KnnVectorsFormat { * @param maxDimensions the maximum number of dimensions supported by this format */ public HnswVectorsFormat(int maxConn, int beamWidth, int maxDimensions) { - super("Lucene95HnswVectorsFormat"); + super("Lucene99HnswVectorsFormat"); if (maxDimensions <= 0 || maxDimensions > DEFAULT_MAX_DIMENSIONS) { throw new IllegalArgumentException( "maxDimensions must be postive and less than or equal to" @@ -59,7 +59,7 @@ public HnswVectorsFormat(int maxConn, int beamWidth, int maxDimensions) { + "; maxDimensions=" + maxDimensions); } - _delegate = new Lucene95HnswVectorsFormat(maxConn, beamWidth); + _delegate = new Lucene99HnswVectorsFormat(maxConn, beamWidth); _maxDimensions = maxDimensions; } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/converter/SegmentV1V2ToV3FormatConverter.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/converter/SegmentV1V2ToV3FormatConverter.java index ece4daf604d2..0e84a4b6ad5c 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/converter/SegmentV1V2ToV3FormatConverter.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/converter/SegmentV1V2ToV3FormatConverter.java @@ -110,10 +110,10 @@ private void deleteV2Files(File v2SegmentDirectory) if (file.isFile() && file.exists()) { FileUtils.deleteQuietly(file); } - if (file.isDirectory() && file.getName().endsWith(V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION)) { + if (file.isDirectory() && file.getName().endsWith(V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION)) { FileUtils.deleteDirectory(file); } - if (file.isDirectory() && file.getName().endsWith(V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION)) { + if (file.isDirectory() && file.getName().endsWith(V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION)) { FileUtils.deleteDirectory(file); } } @@ -226,7 +226,7 @@ private void copyCreationMetadataIfExists(File currentDir, File v3Dir) private void copyLuceneTextIndexIfExists(File segmentDirectory, File v3Dir) throws IOException { // TODO: see if this can be done by reusing some existing methods - String suffix = V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION; + String suffix = V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION; File[] textIndexFiles = segmentDirectory.listFiles(new FilenameFilter() { @Override public boolean accept(File dir, String name) { @@ -263,7 +263,7 @@ public boolean accept(File dir, String name) { private void copyVectorIndexIfExists(File segmentDirectory, File v3Dir) throws IOException { // TODO: see if this can be done by reusing some existing methods - String suffix = V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION; + String suffix = V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION; File[] vectorIndexFiles = segmentDirectory.listFiles(new FilenameFilter() { @Override public boolean accept(File dir, String name) { diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/fst/FstIndexType.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/fst/FstIndexType.java index d04ce7bc97fe..83e755f734ef 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/fst/FstIndexType.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/fst/FstIndexType.java @@ -65,7 +65,8 @@ public class FstIndexType extends AbstractIndexType EXTENSIONS = ImmutableList.of(V1Constants.Indexes.LUCENE_FST_INDEX_FILE_EXTENSION, - V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION); + V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION, + V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION); protected FstIndexType() { super(StandardIndexes.FST_ID); diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/FSTIndexHandler.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/FSTIndexHandler.java index 778e92db0aed..b9d9c5096d84 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/FSTIndexHandler.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/FSTIndexHandler.java @@ -44,7 +44,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION; +import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION; /** @@ -157,7 +157,7 @@ private void createFSTIndexForColumn(SegmentDirectory.Writer segmentWriter, Colu String segmentName = _segmentDirectory.getSegmentMetadata().getName(); String columnName = columnMetadata.getColumnName(); File inProgress = new File(indexDir, columnName + ".fst.inprogress"); - File fstIndexFile = new File(indexDir, columnName + LUCENE_V9_FST_INDEX_FILE_EXTENSION); + File fstIndexFile = new File(indexDir, columnName + LUCENE_V99_FST_INDEX_FILE_EXTENSION); if (!inProgress.exists()) { // Create a marker file. diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/VectorIndexHandler.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/VectorIndexHandler.java index 584d4be1c37c..b3e5d2dfc38f 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/VectorIndexHandler.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/invertedindex/VectorIndexHandler.java @@ -115,9 +115,10 @@ private void createVectorIndexForColumn(SegmentDirectory.Writer segmentWriter, C String columnName = columnMetadata.getColumnName(); File inProgress = - new File(segmentDirectory, columnName + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION + ".inprogress"); + new File(segmentDirectory, columnName + + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION + ".inprogress"); File vectorIndexFile = - new File(segmentDirectory, columnName + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); + new File(segmentDirectory, columnName + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); if (!inProgress.exists()) { // Marker file does not exist, which means last run ended normally. diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/LuceneFSTIndexReader.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/LuceneFSTIndexReader.java index 6e579562c47e..3bf2c2a60134 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/LuceneFSTIndexReader.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/LuceneFSTIndexReader.java @@ -52,7 +52,8 @@ public LuceneFSTIndexReader(PinotDataBuffer pinotDataBuffer) _dataBufferIndexInput = new PinotBufferIndexInput(_dataBuffer, 0L, _dataBuffer.size()); _readFST = - new FST(_dataBufferIndexInput, _dataBufferIndexInput, PositiveIntOutputs.getSingleton(), new OffHeapFSTStore()); + new FST<>(FST.readMetadata(_dataBufferIndexInput, PositiveIntOutputs.getSingleton()), + _dataBufferIndexInput, new OffHeapFSTStore()); } @Override diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/TextIndexType.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/TextIndexType.java index 596380d81b51..cfbf6271f12e 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/TextIndexType.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/text/TextIndexType.java @@ -75,7 +75,8 @@ public class TextIndexType extends AbstractIndexType EXTENSIONS = Lists.newArrayList( V1Constants.Indexes.LUCENE_TEXT_INDEX_FILE_EXTENSION, V1Constants.Indexes.NATIVE_TEXT_INDEX_FILE_EXTENSION, - V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION, + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION ); protected TextIndexType() { diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/vector/VectorIndexType.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/vector/VectorIndexType.java index 59faf4c88036..cb228b81aa97 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/vector/VectorIndexType.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/vector/VectorIndexType.java @@ -129,7 +129,8 @@ public IndexHandler createIndexHandler(SegmentDirectory segmentDirectory, Map getFileExtensions(@Nullable ColumnMetadata columnMetadata) { - return List.of(V1Constants.Indexes.VECTOR_INDEX_FILE_EXTENSION); + return List.of(V1Constants.Indexes.VECTOR_INDEX_FILE_EXTENSION, + V1Constants.Indexes.VECTOR_V99_INDEX_FILE_EXTENSION); } private static class ReaderFactory implements IndexReaderFactory { diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/TextIndexUtils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/TextIndexUtils.java index caa47adff73d..0c2369bdd8e3 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/TextIndexUtils.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/TextIndexUtils.java @@ -49,6 +49,8 @@ static void cleanupTextIndex(File segDir, String column) { FileUtils.deleteQuietly(luceneMappingFile); File luceneV9IndexFile = new File(segDir, column + Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION); FileUtils.deleteQuietly(luceneV9IndexFile); + File luceneV99IndexFile = new File(segDir, column + Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION); + FileUtils.deleteQuietly(luceneV99IndexFile); File luceneV9MappingFile = new File(segDir, column + Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION); FileUtils.deleteQuietly(luceneV9MappingFile); @@ -61,7 +63,8 @@ static boolean hasTextIndex(File segDir, String column) { //@formatter:off return new File(segDir, column + Indexes.LUCENE_TEXT_INDEX_FILE_EXTENSION).exists() || new File(segDir, column + Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION).exists() - || new File(segDir, column + Indexes.NATIVE_TEXT_INDEX_FILE_EXTENSION).exists(); + || new File(segDir, column + Indexes.NATIVE_TEXT_INDEX_FILE_EXTENSION).exists() + || new File(segDir, column + Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION).exists(); //@formatter:on } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/VectorIndexUtils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/VectorIndexUtils.java index 15de36cf5fd3..698adcb31896 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/VectorIndexUtils.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/VectorIndexUtils.java @@ -20,12 +20,12 @@ import java.io.File; import org.apache.commons.io.FileUtils; -import org.apache.lucene.codecs.lucene95.Lucene95Codec; -import org.apache.lucene.codecs.lucene95.Lucene95HnswVectorsFormat; +import org.apache.lucene.codecs.lucene99.Lucene99Codec; +import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.VectorSimilarityFunction; -import org.apache.pinot.segment.local.segment.creator.impl.vector.lucene95.HnswCodec; -import org.apache.pinot.segment.local.segment.creator.impl.vector.lucene95.HnswVectorsFormat; +import org.apache.pinot.segment.local.segment.creator.impl.vector.lucene99.HnswCodec; +import org.apache.pinot.segment.local.segment.creator.impl.vector.lucene99.HnswVectorsFormat; import org.apache.pinot.segment.spi.V1Constants.Indexes; import org.apache.pinot.segment.spi.index.creator.VectorIndexConfig; @@ -38,17 +38,21 @@ static void cleanupVectorIndex(File segDir, String column) { // Remove the lucene index file and potentially the docId mapping file. File luceneIndexFile = new File(segDir, column + Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); FileUtils.deleteQuietly(luceneIndexFile); + File luceneV99IndexFile = new File(segDir, column + Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); + FileUtils.deleteQuietly(luceneV99IndexFile); File luceneMappingFile = new File(segDir, column + Indexes.VECTOR_HNSW_INDEX_DOCID_MAPPING_FILE_EXTENSION); FileUtils.deleteQuietly(luceneMappingFile); // Remove the native index file File nativeIndexFile = new File(segDir, column + Indexes.VECTOR_INDEX_FILE_EXTENSION); FileUtils.deleteQuietly(nativeIndexFile); + File nativeV99IndexFile = new File(segDir, column + Indexes.VECTOR_V99_INDEX_FILE_EXTENSION); + FileUtils.deleteQuietly(nativeV99IndexFile); } static boolean hasVectorIndex(File segDir, String column) { - return new File(segDir, column + Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION).exists() || new File(segDir, - column + Indexes.VECTOR_INDEX_FILE_EXTENSION).exists(); + return new File(segDir, column + Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION).exists() || new File(segDir, + column + Indexes.VECTOR_V99_INDEX_FILE_EXTENSION).exists(); } public static VectorSimilarityFunction toSimilarityFunction( @@ -81,17 +85,17 @@ public static IndexWriterConfig getIndexWriterConfig(VectorIndexConfig vectorInd indexWriterConfig.setUseCompoundFile(useCompoundFile); int maxCon = Integer.parseInt(vectorIndexConfig.getProperties() - .getOrDefault("maxCon", String.valueOf(Lucene95HnswVectorsFormat.DEFAULT_MAX_CONN))); + .getOrDefault("maxCon", String.valueOf(Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN))); int beamWidth = Integer.parseInt(vectorIndexConfig.getProperties() - .getOrDefault("beamWidth", String.valueOf(Lucene95HnswVectorsFormat.DEFAULT_BEAM_WIDTH))); + .getOrDefault("beamWidth", String.valueOf(Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH))); int maxDimensions = Integer.parseInt(vectorIndexConfig.getProperties() .getOrDefault("maxDimensions", String.valueOf(HnswVectorsFormat.DEFAULT_MAX_DIMENSIONS))); HnswVectorsFormat knnVectorsFormat = new HnswVectorsFormat(maxCon, beamWidth, maxDimensions); - Lucene95Codec.Mode mode = Lucene95Codec.Mode.valueOf(vectorIndexConfig.getProperties() - .getOrDefault("mode", Lucene95Codec.Mode.BEST_SPEED.name())); + Lucene99Codec.Mode mode = Lucene99Codec.Mode.valueOf(vectorIndexConfig.getProperties() + .getOrDefault("mode", Lucene99Codec.Mode.BEST_SPEED.name())); indexWriterConfig.setCodec(new HnswCodec(mode, knnVectorsFormat)); return indexWriterConfig; } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/fst/FSTBuilder.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/fst/FSTBuilder.java index 0a4596d173d4..a64f7817585b 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/fst/FSTBuilder.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/fst/FSTBuilder.java @@ -36,28 +36,31 @@ */ public class FSTBuilder { public static final Logger LOGGER = LoggerFactory.getLogger(FSTBuilder.class); - private final FSTCompiler _builder = new FSTCompiler<>(FST.INPUT_TYPE.BYTE4, PositiveIntOutputs.getSingleton()); + private final FSTCompiler _fstCompiler = + (new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE4, PositiveIntOutputs.getSingleton())).build(); private final IntsRefBuilder _scratch = new IntsRefBuilder(); public static FST buildFST(SortedMap input) throws IOException { PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton(); - FSTCompiler fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE4, fstOutput); + FSTCompiler.Builder fstCompilerBuilder = new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE4, fstOutput); + FSTCompiler fstCompiler = fstCompilerBuilder.build(); IntsRefBuilder scratch = new IntsRefBuilder(); for (Map.Entry entry : input.entrySet()) { fstCompiler.add(Util.toUTF16(entry.getKey(), scratch), entry.getValue().longValue()); } - return fstCompiler.compile(); + + return FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); } public void addEntry(String key, Integer value) throws IOException { - _builder.add(Util.toUTF16(key, _scratch), value.longValue()); + _fstCompiler.add(Util.toUTF16(key, _scratch), value.longValue()); } public FST done() throws IOException { - return _builder.compile(); + return FST.fromFSTReader(_fstCompiler.compile(), _fstCompiler.getFSTReader()); } } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/NativeFSTIndexCreator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/NativeFSTIndexCreator.java index 1f69bc50f1a1..933106b4e8bd 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/NativeFSTIndexCreator.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/NativeFSTIndexCreator.java @@ -47,7 +47,7 @@ public class NativeFSTIndexCreator implements FSTIndexCreator { * @param sortedEntries Sorted entries of the unique values of the column. */ public NativeFSTIndexCreator(File indexDir, String columnName, String[] sortedEntries) { - _fstIndexFile = new File(indexDir, columnName + V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION); + _fstIndexFile = new File(indexDir, columnName + V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION); _fstBuilder = new FSTBuilder(); _dictId = 0; diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/LuceneFSTIndexCreatorTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/LuceneFSTIndexCreatorTest.java index b9c568000e8f..e0e6168c9a28 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/LuceneFSTIndexCreatorTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/LuceneFSTIndexCreatorTest.java @@ -32,7 +32,7 @@ import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; -import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION; +import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION; public class LuceneFSTIndexCreatorTest { @@ -62,7 +62,7 @@ public void testIndexWriterReader() LuceneFSTIndexCreator creator = new LuceneFSTIndexCreator( INDEX_DIR, "testFSTColumn", uniqueValues); creator.seal(); - File fstFile = new File(INDEX_DIR, "testFSTColumn" + LUCENE_V9_FST_INDEX_FILE_EXTENSION); + File fstFile = new File(INDEX_DIR, "testFSTColumn" + LUCENE_V99_FST_INDEX_FILE_EXTENSION); PinotDataBuffer pinotDataBuffer = PinotDataBuffer.mapFile(fstFile, true, 0, fstFile.length(), ByteOrder.BIG_ENDIAN, "fstIndexFile"); LuceneFSTIndexReader reader = new LuceneFSTIndexReader(pinotDataBuffer); diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/NativeFSTIndexCreatorTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/NativeFSTIndexCreatorTest.java index d77268ef2825..f98324af588f 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/NativeFSTIndexCreatorTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/NativeFSTIndexCreatorTest.java @@ -29,7 +29,7 @@ import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; -import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION; +import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION; public class NativeFSTIndexCreatorTest { @@ -59,7 +59,7 @@ public void testIndexWriterReader() creator.seal(); } - File fstFile = new File(INDEX_DIR, "testFSTColumn" + LUCENE_V9_FST_INDEX_FILE_EXTENSION); + File fstFile = new File(INDEX_DIR, "testFSTColumn" + LUCENE_V99_FST_INDEX_FILE_EXTENSION); try (PinotDataBuffer dataBuffer = PinotDataBuffer.mapReadOnlyBigEndianFile(fstFile); NativeFSTIndexReader reader = new NativeFSTIndexReader(dataBuffer)) { diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/LoaderTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/LoaderTest.java index 87a1e5db2596..98dff135d584 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/LoaderTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/LoaderTest.java @@ -65,7 +65,7 @@ import org.testng.annotations.Test; import org.testng.collections.Lists; -import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION; +import static org.apache.pinot.segment.spi.V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; @@ -341,7 +341,7 @@ public void testFSTIndexLoad() fstIndexFile = SegmentDirectoryPaths.findFSTIndexIndexFile(_indexDir, FST_INDEX_COL_NAME); Assert.assertNotNull(fstIndexFile); Assert.assertFalse(fstIndexFile.isDirectory()); - Assert.assertEquals(fstIndexFile.getName(), FST_INDEX_COL_NAME + LUCENE_V9_FST_INDEX_FILE_EXTENSION); + Assert.assertEquals(fstIndexFile.getName(), FST_INDEX_COL_NAME + LUCENE_V99_FST_INDEX_FILE_EXTENSION); Assert.assertEquals(fstIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName()); indexSegment.destroy(); @@ -360,7 +360,7 @@ public void testFSTIndexLoad() fstIndexFile = SegmentDirectoryPaths.findFSTIndexIndexFile(_indexDir, FST_INDEX_COL_NAME); Assert.assertNotNull(fstIndexFile); Assert.assertFalse(fstIndexFile.isDirectory()); - Assert.assertEquals(fstIndexFile.getName(), FST_INDEX_COL_NAME + LUCENE_V9_FST_INDEX_FILE_EXTENSION); + Assert.assertEquals(fstIndexFile.getName(), FST_INDEX_COL_NAME + LUCENE_V99_FST_INDEX_FILE_EXTENSION); Assert.assertEquals(fstIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName()); indexSegment.destroy(); @@ -377,7 +377,7 @@ public void testFSTIndexLoad() fstIndexFile = SegmentDirectoryPaths.findFSTIndexIndexFile(_indexDir, FST_INDEX_COL_NAME); Assert.assertNotNull(fstIndexFile); Assert.assertFalse(fstIndexFile.isDirectory()); - Assert.assertEquals(fstIndexFile.getName(), FST_INDEX_COL_NAME + LUCENE_V9_FST_INDEX_FILE_EXTENSION); + Assert.assertEquals(fstIndexFile.getName(), FST_INDEX_COL_NAME + LUCENE_V99_FST_INDEX_FILE_EXTENSION); Assert.assertEquals(fstIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName()); indexSegment.destroy(); @@ -590,7 +590,7 @@ public void testTextIndexLoad() Assert.assertNotNull(textIndexFile); Assert.assertTrue(textIndexFile.isDirectory()); Assert.assertEquals(textIndexFile.getName(), - TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION); + TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION); Assert.assertEquals(textIndexFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME); // CASE 1: don't set the segment version to load in IndexLoadingConfig @@ -616,7 +616,7 @@ public void testTextIndexLoad() Assert.assertTrue(textIndexFile.isDirectory()); Assert.assertFalse(textIndexDocIdMappingFile.isDirectory()); Assert.assertEquals(textIndexFile.getName(), - TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION); + TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION); Assert.assertEquals(textIndexFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME); Assert.assertEquals(textIndexDocIdMappingFile.getName(), TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION); @@ -644,7 +644,7 @@ public void testTextIndexLoad() Assert.assertTrue(textIndexFile.isDirectory()); Assert.assertFalse(textIndexDocIdMappingFile.isDirectory()); Assert.assertEquals(textIndexFile.getName(), - TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION); + TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION); Assert.assertEquals(textIndexFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME); Assert.assertEquals(textIndexDocIdMappingFile.getName(), TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION); @@ -671,7 +671,7 @@ public void testTextIndexLoad() Assert.assertTrue(textIndexFile.isDirectory()); Assert.assertFalse(textIndexDocIdMappingFile.isDirectory()); Assert.assertEquals(textIndexFile.getName(), - TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION); + TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION); Assert.assertEquals(textIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName()); // CASE 1: don't set the segment version to load in IndexLoadingConfig @@ -694,7 +694,7 @@ public void testTextIndexLoad() Assert.assertNotNull(textIndexDocIdMappingFile); Assert.assertTrue(textIndexFile.isDirectory()); Assert.assertEquals(textIndexFile.getName(), - TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION); + TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION); Assert.assertEquals(textIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName()); Assert.assertEquals(textIndexDocIdMappingFile.getName(), TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION); @@ -720,7 +720,7 @@ public void testTextIndexLoad() Assert.assertNotNull(textIndexDocIdMappingFile); Assert.assertTrue(textIndexFile.isDirectory()); Assert.assertEquals(textIndexFile.getName(), - TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION); + TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION); Assert.assertEquals(textIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName()); Assert.assertEquals(textIndexDocIdMappingFile.getName(), TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION); @@ -746,7 +746,7 @@ public void testTextIndexLoad() Assert.assertNotNull(textIndexDocIdMappingFile); Assert.assertTrue(textIndexFile.isDirectory()); Assert.assertEquals(textIndexFile.getName(), - TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION); + TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION); Assert.assertEquals(textIndexFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME); Assert.assertEquals(textIndexDocIdMappingFile.getName(), TEXT_INDEX_COL_NAME + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION); @@ -776,7 +776,7 @@ public void testVectorIndexLoad() Assert.assertNotNull(vectorIndexFile); Assert.assertTrue(vectorIndexFile.isDirectory()); Assert.assertEquals(vectorIndexFile.getName(), - VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); + VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); Assert.assertEquals(vectorIndexFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME); // CASE 1: don't set the segment version to load in IndexLoadingConfig @@ -801,7 +801,7 @@ public void testVectorIndexLoad() Assert.assertNotNull(vectorIndexFile); Assert.assertTrue(vectorIndexFile.isDirectory()); Assert.assertEquals(vectorIndexFile.getName(), - VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); + VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); Assert.assertEquals(vectorIndexFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME); indexSegment.destroy(); @@ -821,7 +821,7 @@ public void testVectorIndexLoad() Assert.assertNotNull(vectorIndexFile); Assert.assertTrue(vectorIndexFile.isDirectory()); Assert.assertEquals(vectorIndexFile.getName(), - VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); + VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); Assert.assertEquals(vectorIndexFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME); indexSegment.destroy(); @@ -843,7 +843,7 @@ public void testVectorIndexLoad() Assert.assertNotNull(vectorIndexFile); Assert.assertTrue(vectorIndexFile.isDirectory()); Assert.assertEquals(vectorIndexFile.getName(), - VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); + VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); Assert.assertEquals(vectorIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName()); // CASE 1: don't set the segment version to load in IndexLoadingConfig @@ -867,7 +867,7 @@ public void testVectorIndexLoad() Assert.assertNotNull(vectorIndexFile); Assert.assertTrue(vectorIndexFile.isDirectory()); Assert.assertEquals(vectorIndexFile.getName(), - VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); + VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); Assert.assertEquals(vectorIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName()); indexSegment.destroy(); @@ -886,7 +886,7 @@ public void testVectorIndexLoad() Assert.assertNotNull(vectorIndexFile); Assert.assertTrue(vectorIndexFile.isDirectory()); Assert.assertEquals(vectorIndexFile.getName(), - VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); + VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); Assert.assertEquals(vectorIndexFile.getParentFile().getName(), new SegmentMetadataImpl(_indexDir).getName()); indexSegment.destroy(); @@ -905,7 +905,7 @@ public void testVectorIndexLoad() Assert.assertNotNull(vectorIndexFile); Assert.assertTrue(vectorIndexFile.isDirectory()); Assert.assertEquals(vectorIndexFile.getName(), - VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION); + VECTOR_INDEX_COL_NAME + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION); Assert.assertEquals(vectorIndexFile.getParentFile().getName(), SegmentDirectoryPaths.V3_SUBDIRECTORY_NAME); indexSegment.destroy(); } diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java index 3349821963a0..acdc67925655 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/loader/SegmentPreProcessorTest.java @@ -1441,8 +1441,8 @@ public void testV1CleanupIndices() // V1 use separate file for each column index. File iiFile = new File(_indexDir, strColumn + V1Constants.Indexes.BITMAP_INVERTED_INDEX_FILE_EXTENSION); File rgFile = new File(_indexDir, strColumn + V1Constants.Indexes.BITMAP_RANGE_INDEX_FILE_EXTENSION); - File txtFile = new File(_indexDir, strColumn + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION); - File fstFile = new File(_indexDir, strColumn + V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION); + File txtFile = new File(_indexDir, strColumn + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION); + File fstFile = new File(_indexDir, strColumn + V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION); File bfFile = new File(_indexDir, strColumn + V1Constants.Indexes.BLOOM_FILTER_FILE_EXTENSION); assertFalse(iiFile.exists()); diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java index 38eae8436ec8..a385a60b0347 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/FilePerIndexDirectoryTest.java @@ -231,11 +231,11 @@ public void testRemoveTextIndices() // Both files for TextIndex should be removed. fpi.removeIndex("foo", StandardIndexes.text()); - assertFalse(new File(TEMP_DIR, "foo" + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION).exists()); + assertFalse(new File(TEMP_DIR, "foo" + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION).exists()); assertFalse( new File(TEMP_DIR, "foo" + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION).exists()); } - assertTrue(new File(TEMP_DIR, "bar" + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION).exists()); + assertTrue(new File(TEMP_DIR, "bar" + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION).exists()); assertTrue(new File(TEMP_DIR, "bar" + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION).exists()); // Read indices back and check the content. diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java index 7f0dcebb05f8..3a94ceec1193 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectoryTest.java @@ -264,11 +264,11 @@ public void testRemoveTextIndices() // Both files for TextIndex should be removed. sfd.removeIndex("foo", StandardIndexes.text()); - assertFalse(new File(TEMP_DIR, "foo" + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION).exists()); + assertFalse(new File(TEMP_DIR, "foo" + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION).exists()); assertFalse( new File(TEMP_DIR, "foo" + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION).exists()); } - assertTrue(new File(TEMP_DIR, "bar" + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION).exists()); + assertTrue(new File(TEMP_DIR, "bar" + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION).exists()); assertTrue( new File(TEMP_DIR, "bar" + V1Constants.Indexes.LUCENE_TEXT_INDEX_DOCID_MAPPING_FILE_EXTENSION).exists()); diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/fst/FSTBuilderTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/fst/FSTBuilderTest.java index 493e7b3449e9..edee3ebef21e 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/fst/FSTBuilderTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/fst/FSTBuilderTest.java @@ -28,7 +28,6 @@ import org.apache.commons.io.FileUtils; import org.apache.lucene.store.OutputStreamDataOutput; import org.apache.lucene.util.fst.FST; -import org.apache.lucene.util.fst.OffHeapFSTStore; import org.apache.lucene.util.fst.Outputs; import org.apache.lucene.util.fst.PositiveIntOutputs; import org.apache.pinot.segment.spi.memory.PinotDataBuffer; @@ -78,7 +77,6 @@ public void testFSTBuilder() PinotDataBuffer pinotDataBuffer = PinotDataBuffer.mapFile(fstFile, true, 0, fstFile.length(), ByteOrder.BIG_ENDIAN, ""); PinotBufferIndexInput indexInput = new PinotBufferIndexInput(pinotDataBuffer, 0L, fstFile.length()); - FST readFST = new FST(indexInput, indexInput, outputs, new OffHeapFSTStore()); List results = RegexpMatcher.regexMatch("hello.*123", fst); Assert.assertEquals(results.size(), 1); diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java index 25ded5fa30d9..8827329a7bdd 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/V1Constants.java @@ -54,8 +54,12 @@ public static class Indexes { public static final String LUCENE_TEXT_INDEX_FILE_EXTENSION = ".lucene.index"; public static final String LUCENE_V9_FST_INDEX_FILE_EXTENSION = ".lucene.v9.fst"; public static final String LUCENE_V9_TEXT_INDEX_FILE_EXTENSION = ".lucene.v9.index"; + public static final String LUCENE_V99_FST_INDEX_FILE_EXTENSION = ".lucene.v99.fst"; + public static final String LUCENE_V99_TEXT_INDEX_FILE_EXTENSION = ".lucene.v99.index"; public static final String VECTOR_INDEX_FILE_EXTENSION = ".vector.index"; public static final String VECTOR_HNSW_INDEX_FILE_EXTENSION = ".vector.hnsw.index"; + public static final String VECTOR_V99_INDEX_FILE_EXTENSION = ".vector.v99.index"; + public static final String VECTOR_V99_HNSW_INDEX_FILE_EXTENSION = ".vector.v99.hnsw.index"; public static final String VECTOR_HNSW_INDEX_DOCID_MAPPING_FILE_EXTENSION = ".vector.hnsw.mapping"; } diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/store/SegmentDirectoryPaths.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/store/SegmentDirectoryPaths.java index b8c09a0329e0..c873ab7e03b5 100644 --- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/store/SegmentDirectoryPaths.java +++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/store/SegmentDirectoryPaths.java @@ -79,8 +79,14 @@ public static File findCreationMetaFile(File indexDir) { */ @Nullable public static File findTextIndexIndexFile(File indexDir, String column) { - String luceneIndexDirectory = column + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION; + String luceneIndexDirectory = column + V1Constants.Indexes.LUCENE_V99_TEXT_INDEX_FILE_EXTENSION; File indexFormatFile = findFormatFile(indexDir, luceneIndexDirectory); + // check for V9 version, if null + if (indexFormatFile == null) { + luceneIndexDirectory = column + V1Constants.Indexes.LUCENE_V9_TEXT_INDEX_FILE_EXTENSION; + indexFormatFile = findFormatFile(indexDir, luceneIndexDirectory); + } + // check for old version, if null if (indexFormatFile == null) { luceneIndexDirectory = column + V1Constants.Indexes.LUCENE_TEXT_INDEX_FILE_EXTENSION; indexFormatFile = findFormatFile(indexDir, luceneIndexDirectory); @@ -101,8 +107,14 @@ public static File findNativeTextIndexIndexFile(File indexDir, String column) { } public static File findFSTIndexIndexFile(File indexDir, String column) { - String luceneIndexDirectory = column + V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION; + String luceneIndexDirectory = column + V1Constants.Indexes.LUCENE_V99_FST_INDEX_FILE_EXTENSION; File formatFile = findFormatFile(indexDir, luceneIndexDirectory); + // check for V9 version, if null + if (formatFile == null) { + luceneIndexDirectory = column + V1Constants.Indexes.LUCENE_V9_FST_INDEX_FILE_EXTENSION; + formatFile = findFormatFile(indexDir, luceneIndexDirectory); + } + // check for old version, if null if (formatFile == null) { luceneIndexDirectory = column + V1Constants.Indexes.LUCENE_FST_INDEX_FILE_EXTENSION; formatFile = findFormatFile(indexDir, luceneIndexDirectory); @@ -120,8 +132,14 @@ public static File findTextIndexDocIdMappingFile(File indexDir, String column) { @Nullable @VisibleForTesting public static File findVectorIndexIndexFile(File segmentIndexDir, String column) { - String vectorIndexDirectory = column + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION; - return findFormatFile(segmentIndexDir, vectorIndexDirectory); + String vectorIndexDirectory = column + V1Constants.Indexes.VECTOR_V99_HNSW_INDEX_FILE_EXTENSION; + File formatFile = findFormatFile(segmentIndexDir, vectorIndexDirectory); + + if (formatFile == null) { + vectorIndexDirectory = column + V1Constants.Indexes.VECTOR_HNSW_INDEX_FILE_EXTENSION; + formatFile = findFormatFile(segmentIndexDir, vectorIndexDirectory); + } + return formatFile; } /** diff --git a/pom.xml b/pom.xml index e3d1f26a9bfc..1d7ea3245651 100644 --- a/pom.xml +++ b/pom.xml @@ -153,7 +153,7 @@ 2.5.1 2.3.2 1.36.0 - 9.8.0 + 9.10.0 0.10.2 0.17.0 From 5d1dc73cc608f5fa8cfea2c72a5fb8a2d112c143 Mon Sep 17 00:00:00 2001 From: Johan Adami <4760722+jadami10@users.noreply.github.com> Date: Wed, 1 May 2024 16:56:50 -0400 Subject: [PATCH 58/58] log the log rate limiter rate for dropped broker logs (#13041) --- .../main/java/org/apache/pinot/broker/querylog/QueryLogger.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pinot-broker/src/main/java/org/apache/pinot/broker/querylog/QueryLogger.java b/pinot-broker/src/main/java/org/apache/pinot/broker/querylog/QueryLogger.java index a1ccd63e6c34..28564cc1c67a 100644 --- a/pinot-broker/src/main/java/org/apache/pinot/broker/querylog/QueryLogger.java +++ b/pinot-broker/src/main/java/org/apache/pinot/broker/querylog/QueryLogger.java @@ -98,7 +98,7 @@ public void log(QueryLogParams params) { long numDroppedLogsSinceLastLog = _numDroppedLogs.getAndSet(0); if (numDroppedLogsSinceLastLog > 0) { _logger.warn("{} logs were dropped. (log max rate per second: {})", numDroppedLogsSinceLastLog, - _droppedLogRateLimiter.getRate()); + _logRateLimiter.getRate()); } } }