From b6846ceba06df1548a6fd24cf380baf3ae9fce06 Mon Sep 17 00:00:00 2001 From: normanj-bitquill <78755797+normanj-bitquill@users.noreply.github.com> Date: Thu, 5 Dec 2024 09:59:33 -0800 Subject: [PATCH] Add FILLNULL command in PPL (#3032) (#3075) Signed-off-by: Norman Jordan Signed-off-by: normanj-bitquill <78755797+normanj-bitquill@users.noreply.github.com> Co-authored-by: Andrew Carbonetto --- .../org/opensearch/sql/analysis/Analyzer.java | 24 ++ .../sql/ast/AbstractNodeVisitor.java | 5 + .../org/opensearch/sql/ast/dsl/AstDSL.java | 21 ++ .../org/opensearch/sql/ast/tree/FillNull.java | 89 ++++++++ .../opensearch/sql/analysis/AnalyzerTest.java | 44 ++++ docs/category.json | 1 + docs/user/ppl/cmd/fillnull.rst | 62 +++++ .../org/opensearch/sql/ppl/ExplainIT.java | 11 + .../opensearch/sql/ppl/FillNullCommandIT.java | 214 ++++++++++++++++++ .../ppl/explain_fillnull_push.json | 28 +++ ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 3 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 19 ++ .../opensearch/sql/ppl/parser/AstBuilder.java | 29 +++ .../sql/ppl/utils/PPLQueryDataAnonymizer.java | 28 +++ .../sql/ppl/antlr/PPLSyntaxParserTest.java | 12 + .../sql/ppl/parser/AstBuilderTest.java | 32 +++ .../ppl/utils/PPLQueryDataAnonymizerTest.java | 13 ++ 17 files changed, 635 insertions(+) create mode 100644 core/src/main/java/org/opensearch/sql/ast/tree/FillNull.java create mode 100644 docs/user/ppl/cmd/fillnull.rst create mode 100644 integ-test/src/test/java/org/opensearch/sql/ppl/FillNullCommandIT.java create mode 100644 integ-test/src/test/resources/expectedOutput/ppl/explain_fillnull_push.json diff --git a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java index d5e8b93b13..71db736f78 100644 --- a/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java +++ b/core/src/main/java/org/opensearch/sql/analysis/Analyzer.java @@ -46,6 +46,7 @@ import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; import org.opensearch.sql.ast.tree.FetchCursor; +import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Kmeans; @@ -558,6 +559,29 @@ public LogicalPlan visitAD(AD node, AnalysisContext context) { return new LogicalAD(child, options); } + /** Build {@link LogicalEval} for fillnull command. */ + @Override + public LogicalPlan visitFillNull(final FillNull node, final AnalysisContext context) { + LogicalPlan child = node.getChild().get(0).accept(this, context); + + ImmutableList.Builder> expressionsBuilder = + new Builder<>(); + for (FillNull.NullableFieldFill fieldFill : node.getNullableFieldFills()) { + Expression fieldExpr = + expressionAnalyzer.analyze(fieldFill.getNullableFieldReference(), context); + ReferenceExpression ref = + DSL.ref(fieldFill.getNullableFieldReference().getField().toString(), fieldExpr.type()); + FunctionExpression ifNullFunction = + DSL.ifnull(ref, expressionAnalyzer.analyze(fieldFill.getReplaceNullWithMe(), context)); + expressionsBuilder.add(new ImmutablePair<>(ref, ifNullFunction)); + TypeEnvironment typeEnvironment = context.peek(); + // define the new reference in type env. + typeEnvironment.define(ref); + } + + return new LogicalEval(child, expressionsBuilder.build()); + } + /** Build {@link LogicalML} for ml command. */ @Override public LogicalPlan visitML(ML node, AnalysisContext context) { diff --git a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java index 973b10310b..a0520dc70e 100644 --- a/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java @@ -45,6 +45,7 @@ import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; import org.opensearch.sql.ast.tree.FetchCursor; +import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Kmeans; @@ -312,4 +313,8 @@ public T visitFetchCursor(FetchCursor cursor, C context) { public T visitCloseCursor(CloseCursor closeCursor, C context) { return visitChildren(closeCursor, context); } + + public T visitFillNull(FillNull fillNull, C context) { + return visitChildren(fillNull, context); + } } diff --git a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java index 4f3056b0f7..8135731ff6 100644 --- a/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java +++ b/core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java @@ -5,10 +5,12 @@ package org.opensearch.sql.ast.dsl; +import com.google.common.collect.ImmutableList; import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; import lombok.experimental.UtilityClass; +import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.opensearch.sql.ast.expression.AggregateFunction; import org.opensearch.sql.ast.expression.Alias; @@ -46,6 +48,7 @@ import org.opensearch.sql.ast.tree.Aggregation; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; +import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Limit; @@ -471,4 +474,22 @@ public static Parse parse( java.util.Map arguments) { return new Parse(parseMethod, sourceField, pattern, arguments, input); } + + public static FillNull fillNull(UnresolvedExpression replaceNullWithMe, Field... fields) { + return new FillNull( + FillNull.ContainNullableFieldFill.ofSameValue( + replaceNullWithMe, ImmutableList.copyOf(fields))); + } + + public static FillNull fillNull( + List> fieldAndReplacements) { + ImmutableList.Builder replacementsBuilder = ImmutableList.builder(); + for (ImmutablePair fieldAndReplacement : fieldAndReplacements) { + replacementsBuilder.add( + new FillNull.NullableFieldFill( + fieldAndReplacement.getLeft(), fieldAndReplacement.getRight())); + } + return new FillNull( + FillNull.ContainNullableFieldFill.ofVariousValue(replacementsBuilder.build())); + } } diff --git a/core/src/main/java/org/opensearch/sql/ast/tree/FillNull.java b/core/src/main/java/org/opensearch/sql/ast/tree/FillNull.java new file mode 100644 index 0000000000..e1e56229b4 --- /dev/null +++ b/core/src/main/java/org/opensearch/sql/ast/tree/FillNull.java @@ -0,0 +1,89 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ast.tree; + +import java.util.List; +import java.util.Objects; +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.NonNull; +import lombok.RequiredArgsConstructor; +import org.opensearch.sql.ast.AbstractNodeVisitor; +import org.opensearch.sql.ast.Node; +import org.opensearch.sql.ast.expression.Field; +import org.opensearch.sql.ast.expression.UnresolvedExpression; + +/** AST node represent FillNull operation. */ +@RequiredArgsConstructor +@AllArgsConstructor +public class FillNull extends UnresolvedPlan { + + @Getter + @RequiredArgsConstructor + public static class NullableFieldFill { + @NonNull private final Field nullableFieldReference; + @NonNull private final UnresolvedExpression replaceNullWithMe; + } + + public interface ContainNullableFieldFill { + List getNullFieldFill(); + + static ContainNullableFieldFill ofVariousValue(List replacements) { + return new VariousValueNullFill(replacements); + } + + static ContainNullableFieldFill ofSameValue( + UnresolvedExpression replaceNullWithMe, List nullableFieldReferences) { + return new SameValueNullFill(replaceNullWithMe, nullableFieldReferences); + } + } + + private static class SameValueNullFill implements ContainNullableFieldFill { + @Getter(onMethod_ = @Override) + private final List nullFieldFill; + + public SameValueNullFill( + UnresolvedExpression replaceNullWithMe, List nullableFieldReferences) { + Objects.requireNonNull(replaceNullWithMe, "Null replacement is required"); + this.nullFieldFill = + Objects.requireNonNull(nullableFieldReferences, "Nullable field reference is required") + .stream() + .map(nullableReference -> new NullableFieldFill(nullableReference, replaceNullWithMe)) + .toList(); + } + } + + @RequiredArgsConstructor + private static class VariousValueNullFill implements ContainNullableFieldFill { + @NonNull + @Getter(onMethod_ = @Override) + private final List nullFieldFill; + } + + private UnresolvedPlan child; + + @NonNull private final ContainNullableFieldFill containNullableFieldFill; + + public List getNullableFieldFills() { + return containNullableFieldFill.getNullFieldFill(); + } + + @Override + public UnresolvedPlan attach(UnresolvedPlan child) { + this.child = child; + return this; + } + + @Override + public List getChild() { + return child == null ? List.of() : List.of(child); + } + + @Override + public T accept(AbstractNodeVisitor nodeVisitor, C context) { + return nodeVisitor.visitFillNull(this, context); + } +} diff --git a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java index 2412bd9474..4f06ce9d23 100644 --- a/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java +++ b/core/src/test/java/org/opensearch/sql/analysis/AnalyzerTest.java @@ -73,6 +73,7 @@ import org.opensearch.sql.ast.dsl.AstDSL; import org.opensearch.sql.ast.expression.Argument; import org.opensearch.sql.ast.expression.DataType; +import org.opensearch.sql.ast.expression.Field; import org.opensearch.sql.ast.expression.HighlightFunction; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.expression.ParseMethod; @@ -81,6 +82,7 @@ import org.opensearch.sql.ast.tree.AD; import org.opensearch.sql.ast.tree.CloseCursor; import org.opensearch.sql.ast.tree.FetchCursor; +import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.Paginate; @@ -1437,6 +1439,48 @@ public void kmeanns_relation() { new Kmeans(AstDSL.relation("schema"), argumentMap)); } + @Test + public void fillnull_same_value() { + assertAnalyzeEqual( + LogicalPlanDSL.eval( + LogicalPlanDSL.relation("schema", table), + ImmutablePair.of( + DSL.ref("integer_value", INTEGER), + DSL.ifnull(DSL.ref("integer_value", INTEGER), DSL.literal(0))), + ImmutablePair.of( + DSL.ref("int_null_value", INTEGER), + DSL.ifnull(DSL.ref("int_null_value", INTEGER), DSL.literal(0)))), + new FillNull( + AstDSL.relation("schema"), + FillNull.ContainNullableFieldFill.ofSameValue( + AstDSL.intLiteral(0), + ImmutableList.builder() + .add(AstDSL.field("integer_value")) + .add(AstDSL.field("int_null_value")) + .build()))); + } + + @Test + public void fillnull_various_values() { + assertAnalyzeEqual( + LogicalPlanDSL.eval( + LogicalPlanDSL.relation("schema", table), + ImmutablePair.of( + DSL.ref("integer_value", INTEGER), + DSL.ifnull(DSL.ref("integer_value", INTEGER), DSL.literal(0))), + ImmutablePair.of( + DSL.ref("int_null_value", INTEGER), + DSL.ifnull(DSL.ref("int_null_value", INTEGER), DSL.literal(1)))), + new FillNull( + AstDSL.relation("schema"), + FillNull.ContainNullableFieldFill.ofVariousValue( + ImmutableList.of( + new FillNull.NullableFieldFill( + AstDSL.field("integer_value"), AstDSL.intLiteral(0)), + new FillNull.NullableFieldFill( + AstDSL.field("int_null_value"), AstDSL.intLiteral(1)))))); + } + @Test public void ad_batchRCF_relation() { Map argumentMap = diff --git a/docs/category.json b/docs/category.json index ca3d345e8b..aacfc43478 100644 --- a/docs/category.json +++ b/docs/category.json @@ -14,6 +14,7 @@ "user/ppl/cmd/information_schema.rst", "user/ppl/cmd/eval.rst", "user/ppl/cmd/fields.rst", + "user/ppl/cmd/fillnull.rst", "user/ppl/cmd/grok.rst", "user/ppl/cmd/head.rst", "user/ppl/cmd/parse.rst", diff --git a/docs/user/ppl/cmd/fillnull.rst b/docs/user/ppl/cmd/fillnull.rst new file mode 100644 index 0000000000..4a9e38d353 --- /dev/null +++ b/docs/user/ppl/cmd/fillnull.rst @@ -0,0 +1,62 @@ +============= +fillnull +============= + +.. rubric:: Table of contents + +.. contents:: + :local: + :depth: 2 + + +Description +============ +Using ``fillnull`` command to fill null with provided value in one or more fields in the search result. + + +Syntax +============ +`fillnull [with in ["," ]] | [using = ["," = ]]` + +* null-replacement: mandatory. The value used to replace `null`s. +* nullable-field: mandatory. Field reference. The `null` values in the field referred to by the property will be replaced with the values from the null-replacement. + +Example 1: fillnull one field +====================================================================== + +The example show fillnull one field. + +PPL query:: + + os> source=accounts | fields email, employer | fillnull with '' in email ; + fetched rows / total rows = 4/4 + +-----------------------+----------+ + | email | employer | + |-----------------------+----------| + | amberduke@pyrami.com | Pyrami | + | hattiebond@netagy.com | Netagy | + | | Quility | + | daleadams@boink.com | null | + +-----------------------+----------+ + +Example 2: fillnull applied to multiple fields +======================================================================== + +The example show fillnull applied to multiple fields. + +PPL query:: + + os> source=accounts | fields email, employer | fillnull using email = '', employer = '' ; + fetched rows / total rows = 4/4 + +-----------------------+---------------+ + | email | employer | + |-----------------------+---------------| + | amberduke@pyrami.com | Pyrami | + | hattiebond@netagy.com | Netagy | + | | Quility | + | daleadams@boink.com | | + +-----------------------+---------------+ + +Limitation +========== +The ``fillnull`` command is not rewritten to OpenSearch DSL, it is only executed on the coordination node. \ No newline at end of file diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java index c6b21e1605..b9c7f89ba0 100644 --- a/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java @@ -89,6 +89,17 @@ public void testLimitPushDownExplain() throws Exception { + "| fields ageMinus")); } + @Test + public void testFillNullPushDownExplain() throws Exception { + String expected = loadFromFile("expectedOutput/ppl/explain_fillnull_push.json"); + + assertJsonEquals( + expected, + explainQueryToString( + "source=opensearch-sql_test_index_account" + + " | fillnull with -1 in age,balance | fields age, balance")); + } + String loadFromFile(String filename) throws Exception { URI uri = Resources.getResource(filename).toURI(); return new String(Files.readAllBytes(Paths.get(uri))); diff --git a/integ-test/src/test/java/org/opensearch/sql/ppl/FillNullCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/ppl/FillNullCommandIT.java new file mode 100644 index 0000000000..d88d31c997 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/ppl/FillNullCommandIT.java @@ -0,0 +1,214 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_CALCS; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; + +public class FillNullCommandIT extends PPLIntegTestCase { + @Override + public void init() throws IOException { + loadIndex(Index.CALCS); + } + + @Test + public void testFillNullSameValueOneField() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | fields str2, num0 | fillnull with -1 in num0", TEST_INDEX_CALCS)); + verifyDataRows( + result, + rows("one", 12.3), + rows("two", -12.3), + rows("three", 15.7), + rows(null, -15.7), + rows("five", 3.5), + rows("six", -3.5), + rows(null, 0), + rows("eight", -1), + rows("nine", 10), + rows("ten", -1), + rows("eleven", -1), + rows("twelve", -1), + rows(null, -1), + rows("fourteen", -1), + rows("fifteen", -1), + rows("sixteen", -1), + rows(null, -1)); + } + + @Test + public void testFillNullSameValueTwoFields() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | fields num0, num2 | fillnull with -1 in num0,num2", TEST_INDEX_CALCS)); + verifyDataRows( + result, + rows(12.3, 17.86), + rows(-12.3, 16.73), + rows(15.7, -1), + rows(-15.7, 8.51), + rows(3.5, 6.46), + rows(-3.5, 8.98), + rows(0, 11.69), + rows(-1, 17.25), + rows(10, -1), + rows(-1, 11.5), + rows(-1, 6.8), + rows(-1, 3.79), + rows(-1, -1), + rows(-1, 13.04), + rows(-1, -1), + rows(-1, 10.98), + rows(-1, 7.87)); + } + + @Test + public void testFillNullVariousValuesOneField() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | fields str2, num0 | fillnull using num0 = -1", TEST_INDEX_CALCS)); + verifyDataRows( + result, + rows("one", 12.3), + rows("two", -12.3), + rows("three", 15.7), + rows(null, -15.7), + rows("five", 3.5), + rows("six", -3.5), + rows(null, 0), + rows("eight", -1), + rows("nine", 10), + rows("ten", -1), + rows("eleven", -1), + rows("twelve", -1), + rows(null, -1), + rows("fourteen", -1), + rows("fifteen", -1), + rows("sixteen", -1), + rows(null, -1)); + } + + @Test + public void testFillNullVariousValuesTwoFields() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | fields num0, num2 | fillnull using num0 = -1, num2 = -2", + TEST_INDEX_CALCS)); + verifyDataRows( + result, + rows(12.3, 17.86), + rows(-12.3, 16.73), + rows(15.7, -2), + rows(-15.7, 8.51), + rows(3.5, 6.46), + rows(-3.5, 8.98), + rows(0, 11.69), + rows(-1, 17.25), + rows(10, -2), + rows(-1, 11.5), + rows(-1, 6.8), + rows(-1, 3.79), + rows(-1, -2), + rows(-1, 13.04), + rows(-1, -2), + rows(-1, 10.98), + rows(-1, 7.87)); + } + + @Test + public void testFillNullWithOtherField() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | fillnull using num0 = num1 | fields str2, num0", TEST_INDEX_CALCS)); + verifyDataRows( + result, + rows("one", 12.3), + rows("two", -12.3), + rows("three", 15.7), + rows(null, -15.7), + rows("five", 3.5), + rows("six", -3.5), + rows(null, 0), + rows("eight", 11.38), + rows("nine", 10), + rows("ten", 12.4), + rows("eleven", 10.32), + rows("twelve", 2.47), + rows(null, 12.05), + rows("fourteen", 10.37), + rows("fifteen", 7.1), + rows("sixteen", 16.81), + rows(null, 7.12)); + } + + @Test + public void testFillNullWithFunctionOnOtherField() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | fillnull with ceil(num1) in num0 | fields str2, num0", + TEST_INDEX_CALCS)); + verifyDataRows( + result, + rows("one", 12.3), + rows("two", -12.3), + rows("three", 15.7), + rows(null, -15.7), + rows("five", 3.5), + rows("six", -3.5), + rows(null, 0), + rows("eight", 12), + rows("nine", 10), + rows("ten", 13), + rows("eleven", 11), + rows("twelve", 3), + rows(null, 13), + rows("fourteen", 11), + rows("fifteen", 8), + rows("sixteen", 17), + rows(null, 8)); + } + + @Test + public void testFillNullWithFunctionMultipleCommands() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | fillnull with num1 in num0 | fields str2, num0 | fillnull with" + + " 'unknown' in str2", + TEST_INDEX_CALCS)); + verifyDataRows( + result, + rows("one", 12.3), + rows("two", -12.3), + rows("three", 15.7), + rows("unknown", -15.7), + rows("five", 3.5), + rows("six", -3.5), + rows("unknown", 0), + rows("eight", 11.38), + rows("nine", 10), + rows("ten", 12.4), + rows("eleven", 10.32), + rows("twelve", 2.47), + rows("unknown", 12.05), + rows("fourteen", 10.37), + rows("fifteen", 7.1), + rows("sixteen", 16.81), + rows("unknown", 7.12)); + } +} diff --git a/integ-test/src/test/resources/expectedOutput/ppl/explain_fillnull_push.json b/integ-test/src/test/resources/expectedOutput/ppl/explain_fillnull_push.json new file mode 100644 index 0000000000..7e5e1c1c20 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/ppl/explain_fillnull_push.json @@ -0,0 +1,28 @@ +{ + "root": { + "name": "ProjectOperator", + "description": { + "fields": "[age, balance]" + }, + "children": [ + { + "name": "EvalOperator", + "description": { + "expressions": { + "balance": "ifnull(balance, -1)", + "age": "ifnull(age, -1)" + } + }, + "children": [ + { + "name": "OpenSearchIndexScan", + "description": { + "request": "OpenSearchQueryRequest(indexName=opensearch-sql_test_index_account, sourceBuilder={\"from\":0,\"size\":10000,\"timeout\":\"1m\"}, needClean=true, searchDone=false, pitId=null, cursorKeepAlive=null, searchAfter=null, searchResponse=null)" + }, + "children": [] + } + ] + } + ] + } +} diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 21cee12675..3ba8da74f4 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -35,6 +35,7 @@ NEW_FIELD: 'NEW_FIELD'; KMEANS: 'KMEANS'; AD: 'AD'; ML: 'ML'; +FILLNULL: 'FILLNULL'; // COMMAND ASSIST KEYWORDS AS: 'AS'; @@ -44,6 +45,8 @@ INDEX: 'INDEX'; D: 'D'; DESC: 'DESC'; DATASOURCES: 'DATASOURCES'; +USING: 'USING'; +WITH: 'WITH'; // CLAUSE KEYWORDS SORTBY: 'SORTBY'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 54ec23dcb9..89a32abe23 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -49,6 +49,7 @@ commands | kmeansCommand | adCommand | mlCommand + | fillnullCommand ; searchCommand @@ -127,6 +128,23 @@ patternsMethod | REGEX ; +fillnullCommand + : FILLNULL (fillNullWithTheSameValue + | fillNullWithFieldVariousValues) + ; + +fillNullWithTheSameValue + : WITH nullReplacement = valueExpression IN nullableFieldList = fieldList + ; + +fillNullWithFieldVariousValues + : USING nullReplacementExpression (COMMA nullReplacementExpression)* + ; + +nullReplacementExpression + : nullableField = fieldExpression EQUAL nullReplacement = valueExpression + ; + kmeansCommand : KMEANS (kmeansParameter)* ; @@ -843,6 +861,7 @@ keywordsCanBeId | DEDUP | SORT | EVAL + | FILLNULL | HEAD | TOP | RARE diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 78fe28b49e..2fccb8e635 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -51,6 +51,7 @@ import org.opensearch.sql.ast.tree.Aggregation; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; +import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Kmeans; @@ -392,6 +393,34 @@ public UnresolvedPlan visitMlCommand(OpenSearchPPLParser.MlCommandContext ctx) { return new ML(builder.build()); } + /** fillnull command. */ + @Override + public UnresolvedPlan visitFillNullWithTheSameValue( + OpenSearchPPLParser.FillNullWithTheSameValueContext ctx) { + return new FillNull( + FillNull.ContainNullableFieldFill.ofSameValue( + internalVisitExpression(ctx.nullReplacement), + ctx.nullableFieldList.fieldExpression().stream() + .map(f -> (Field) internalVisitExpression(f)) + .toList())); + } + + /** fillnull command. */ + @Override + public UnresolvedPlan visitFillNullWithFieldVariousValues( + OpenSearchPPLParser.FillNullWithFieldVariousValuesContext ctx) { + ImmutableList.Builder replacementsBuilder = ImmutableList.builder(); + for (int i = 0; i < ctx.nullReplacementExpression().size(); i++) { + replacementsBuilder.add( + new FillNull.NullableFieldFill( + (Field) internalVisitExpression(ctx.nullReplacementExpression(i).nullableField), + internalVisitExpression(ctx.nullReplacementExpression(i).nullReplacement))); + } + + return new FillNull( + FillNull.ContainNullableFieldFill.ofVariousValue(replacementsBuilder.build())); + } + /** Get original text in query. */ private String getTextInQuery(ParserRuleContext ctx) { Token start = ctx.getStart(); diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java index d28e5d122b..a1ca0fd69a 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizer.java @@ -34,6 +34,7 @@ import org.opensearch.sql.ast.tree.Aggregation; import org.opensearch.sql.ast.tree.Dedupe; import org.opensearch.sql.ast.tree.Eval; +import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Head; import org.opensearch.sql.ast.tree.Project; @@ -234,6 +235,33 @@ private String visitExpression(UnresolvedExpression expression) { return expressionAnalyzer.analyze(expression, null); } + @Override + public String visitFillNull(FillNull node, String context) { + String child = node.getChild().get(0).accept(this, context); + List fieldFills = node.getNullableFieldFills(); + final UnresolvedExpression firstReplacement = fieldFills.getFirst().getReplaceNullWithMe(); + if (fieldFills.stream().allMatch(n -> firstReplacement == n.getReplaceNullWithMe())) { + return StringUtils.format( + "%s | fillnull with %s in %s", + child, + firstReplacement, + node.getNullableFieldFills().stream() + .map(n -> visitExpression(n.getNullableFieldReference())) + .collect(Collectors.joining(", "))); + } else { + return StringUtils.format( + "%s | fillnull using %s", + child, + node.getNullableFieldFills().stream() + .map( + n -> + StringUtils.format( + "%s = %s", + visitExpression(n.getNullableFieldReference()), n.getReplaceNullWithMe())) + .collect(Collectors.joining(", "))); + } + } + private String groupBy(String groupBy) { return Strings.isNullOrEmpty(groupBy) ? "" : StringUtils.format("by %s", groupBy); } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java index 943953d416..5601bda485 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/antlr/PPLSyntaxParserTest.java @@ -417,4 +417,16 @@ public void testCanParseTimestampdiffFunction() { new PPLSyntaxParser() .parse("SOURCE=test | eval k = TIMESTAMPDIFF(WEEK,'2003-01-02','2003-01-02')")); } + + @Test + public void testCanParseFillNullSameValue() { + assertNotNull(new PPLSyntaxParser().parse("SOURCE=test | fillnull with 0 in a")); + assertNotNull(new PPLSyntaxParser().parse("SOURCE=test | fillnull with 0 in a, b")); + } + + @Test + public void testCanParseFillNullVariousValues() { + assertNotNull(new PPLSyntaxParser().parse("SOURCE=test | fillnull using a = 0")); + assertNotNull(new PPLSyntaxParser().parse("SOURCE=test | fillnull using a = 0, b = 1")); + } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java index ced266ed78..488cb7dc14 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/parser/AstBuilderTest.java @@ -42,6 +42,7 @@ import static org.opensearch.sql.utils.SystemIndexUtils.DATASOURCES_TABLE_NAME; import static org.opensearch.sql.utils.SystemIndexUtils.mappingTable; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import java.util.Arrays; import org.junit.Ignore; @@ -50,10 +51,12 @@ import org.junit.rules.ExpectedException; import org.opensearch.sql.ast.Node; import org.opensearch.sql.ast.expression.DataType; +import org.opensearch.sql.ast.expression.Field; import org.opensearch.sql.ast.expression.Literal; import org.opensearch.sql.ast.expression.ParseMethod; import org.opensearch.sql.ast.expression.SpanUnit; import org.opensearch.sql.ast.tree.AD; +import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.RareTopN.CommandType; @@ -660,6 +663,35 @@ public void testMLCommand() { .build())); } + @Test + public void testFillNullCommandSameValue() { + assertEqual( + "source=t | fillnull with 0 in a, b, c", + new FillNull( + relation("t"), + FillNull.ContainNullableFieldFill.ofSameValue( + intLiteral(0), + ImmutableList.builder() + .add(field("a")) + .add(field("b")) + .add(field("c")) + .build()))); + } + + @Test + public void testFillNullCommandVariousValues() { + assertEqual( + "source=t | fillnull using a = 1, b = 2, c = 3", + new FillNull( + relation("t"), + FillNull.ContainNullableFieldFill.ofVariousValue( + ImmutableList.builder() + .add(new FillNull.NullableFieldFill(field("a"), intLiteral(1))) + .add(new FillNull.NullableFieldFill(field("b"), intLiteral(2))) + .add(new FillNull.NullableFieldFill(field("c"), intLiteral(3))) + .build()))); + } + @Test public void testDescribeCommand() { assertEqual("describe t", relation(mappingTable("t"))); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java index cd51ea07df..b5b4c97f13 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/utils/PPLQueryDataAnonymizerTest.java @@ -105,6 +105,19 @@ public void testEvalCommand() { assertEquals("source=t | eval r=abs(f)", anonymize("source=t | eval r=abs(f)")); } + @Test + public void testFillNullSameValue() { + assertEquals( + "source=t | fillnull with 0 in f1, f2", anonymize("source=t | fillnull with 0 in f1, f2")); + } + + @Test + public void testFillNullVariousValues() { + assertEquals( + "source=t | fillnull using f1 = 0, f2 = -1", + anonymize("source=t | fillnull using f1 = 0, f2 = -1")); + } + @Test public void testRareCommandWithGroupBy() { assertEquals("source=t | rare 10 a by b", anonymize("source=t | rare a by b"));