From 5ff7692665e049a9363ff7640c22e005fb02efb3 Mon Sep 17 00:00:00 2001 From: Xin Huang Date: Fri, 20 Dec 2024 15:48:28 -0800 Subject: [PATCH] Resolve comments 1/n, added a unicode test case --- .../io/delta/kernel/expressions/Predicate.java | 2 +- .../expressions/DefaultExpressionEvaluator.java | 8 ++++---- .../expressions/DefaultExpressionUtils.java | 13 ------------- .../expressions/LikeExpressionEvaluator.java | 14 ++++++++++++++ .../DefaultExpressionEvaluatorSuite.scala | 12 +++++++++++- 5 files changed, 30 insertions(+), 19 deletions(-) diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Predicate.java b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Predicate.java index 0966ccc3a79..0da9479c553 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Predicate.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/Predicate.java @@ -104,7 +104,7 @@ *
  • Name: STARTS WITH * * * diff --git a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluator.java b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluator.java index d8ec0e63a06..38fb59e7146 100644 --- a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluator.java +++ b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluator.java @@ -307,12 +307,12 @@ ExpressionTransformResult visitStartsWith(final Predicate startsWith) { "Invalid number of inputs to STARTS_WITH expression. " + "Example usage: STARTS_WITH(column, 'test')"); } - ExpressionTransformResult leftResult = visit(startsWith.getChildren().get(0)); - ExpressionTransformResult rightResult = visit(startsWith.getChildren().get(1)); + ExpressionTransformResult leftResult = visit(childAt(startsWith, 0)); + ExpressionTransformResult rightResult = visit(childAt(startsWith, 1)); if (!(StringType.STRING.equivalent(leftResult.outputType) && StringType.STRING.equivalent(rightResult.outputType))) { throw unsupportedExpressionException( - startsWith, "'starts with' is only supported for string type expressions"); + startsWith, "'STARTS_WITH' is expects STRING type inputs"); } // TODO: support non literal as the second input of starts with. if (!(rightResult.expression instanceof Literal)) { @@ -329,7 +329,7 @@ ExpressionTransformResult visitStartsWith(final Predicate startsWith) { right.getValue() == null ? right : Literal.ofString( - DefaultExpressionUtils.escape( + LikeExpressionEvaluator.escape( String.valueOf(right.getValue()), /*escapeChar=*/ '%') .concat("%")))), BooleanType.BOOLEAN); diff --git a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultExpressionUtils.java b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultExpressionUtils.java index 4cbeacd7f61..b59db8689ab 100644 --- a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultExpressionUtils.java +++ b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/DefaultExpressionUtils.java @@ -383,17 +383,4 @@ private ColumnVector getVector(int rowId) { } }; } - /** Escapes characters escapeChar in the input String */ - static String escape(String input, char escapeChar) { - final int len = input.length(); - final StringBuilder escapedString = new StringBuilder(len + len); - for (int i = 0; i < len; i++) { - char c = input.charAt(i); - if (c == escapeChar) { - escapedString.append('\\'); - } - escapedString.append(c); - } - return escapedString.toString(); - } } diff --git a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/LikeExpressionEvaluator.java b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/LikeExpressionEvaluator.java index 1dc80d2b880..6584caf3dfd 100644 --- a/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/LikeExpressionEvaluator.java +++ b/kernel/kernel-defaults/src/main/java/io/delta/kernel/defaults/internal/expressions/LikeExpressionEvaluator.java @@ -183,4 +183,18 @@ private static String escapeLikeRegex(String pattern, char escape) { } return "(?s)" + javaPattern; } + + /** Escapes characters escapeChar in the input String */ + static String escape(String input, char escapeChar) { + final int len = input.length(); + final StringBuilder escapedString = new StringBuilder(len + len); + for (int i = 0; i < len; i++) { + char c = input.charAt(i); + if (c == escapeChar) { + escapedString.append('\\'); + } + escapedString.append(c); + } + return escapedString.toString(); + } } diff --git a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluatorSuite.scala b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluatorSuite.scala index ed87f204805..bb164bc89c4 100644 --- a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluatorSuite.scala +++ b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/internal/expressions/DefaultExpressionEvaluatorSuite.scala @@ -615,6 +615,16 @@ class DefaultExpressionEvaluatorSuite extends AnyFunSuite with ExpressionSuiteBa checkBooleanVectors(new DefaultExpressionEvaluator( schema, startsWithExpressionAlwaysFalse, BooleanType.BOOLEAN).eval(input), allFalseVector) + val colUnicode = stringVector(Seq[String]("中文", "中", "文")) + val schemaUnicode = new StructType().add("col", StringType.STRING) + val inputUnicode = new DefaultColumnarBatch(colUnicode.getSize, + schemaUnicode, Array(colUnicode)) + val startsWithExpressionUnicode = startsWith(new Column("col"), Literal.ofString("中")) + val expOutputVectorLiteralUnicode = booleanVector(Seq[BooleanJ](true, true, false)) + checkBooleanVectors(new DefaultExpressionEvaluator(schemaUnicode, + startsWithExpressionUnicode, + BooleanType.BOOLEAN).eval(inputUnicode), expOutputVectorLiteralUnicode) + val startsWithExpressionExpression = startsWith(new Column("col1"), new Column("col2")) val e = intercept[UnsupportedOperationException] { new DefaultExpressionEvaluator( @@ -634,7 +644,7 @@ class DefaultExpressionEvaluatorSuite extends AnyFunSuite with ExpressionSuiteBa new DefaultExpressionEvaluator( schema, expr, BooleanType.BOOLEAN).eval(input) } - assert(e.getMessage.contains("'starts with' is only supported for string type expressions")) + assert(e.getMessage.contains("'STARTS_WITH' is expects STRING type inputs")) } checkUnsupportedTypes(BooleanType.BOOLEAN, BooleanType.BOOLEAN)