Skip to content

Commit

Permalink
Resolve comments 1/n, added a unicode test case
Browse files Browse the repository at this point in the history
  • Loading branch information
Xin Huang committed Dec 20, 2024
1 parent d13b6b6 commit 5ff7692
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@
* <li>Name: <code>STARTS WITH</code>
* <ul>
* <li>SQL semantic: <code>expr STARTS WITH expr</code>
* <li>Since version: 3.3.0
* <li>Since version: 3.4.0
* </ul>
* </ol>
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -307,12 +307,12 @@ ExpressionTransformResult visitStartsWith(final Predicate startsWith) {
"Invalid number of inputs to STARTS_WITH expression. "
+ "Example usage: STARTS_WITH(column, 'test')");
}
ExpressionTransformResult leftResult = visit(startsWith.getChildren().get(0));
ExpressionTransformResult rightResult = visit(startsWith.getChildren().get(1));
ExpressionTransformResult leftResult = visit(childAt(startsWith, 0));
ExpressionTransformResult rightResult = visit(childAt(startsWith, 1));
if (!(StringType.STRING.equivalent(leftResult.outputType)
&& StringType.STRING.equivalent(rightResult.outputType))) {
throw unsupportedExpressionException(
startsWith, "'starts with' is only supported for string type expressions");
startsWith, "'STARTS_WITH' is expects STRING type inputs");
}
// TODO: support non literal as the second input of starts with.
if (!(rightResult.expression instanceof Literal)) {
Expand All @@ -329,7 +329,7 @@ ExpressionTransformResult visitStartsWith(final Predicate startsWith) {
right.getValue() == null
? right
: Literal.ofString(
DefaultExpressionUtils.escape(
LikeExpressionEvaluator.escape(
String.valueOf(right.getValue()), /*escapeChar=*/ '%')
.concat("%")))),
BooleanType.BOOLEAN);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -383,17 +383,4 @@ private ColumnVector getVector(int rowId) {
}
};
}
/** Escapes characters escapeChar in the input String */
static String escape(String input, char escapeChar) {
final int len = input.length();
final StringBuilder escapedString = new StringBuilder(len + len);
for (int i = 0; i < len; i++) {
char c = input.charAt(i);
if (c == escapeChar) {
escapedString.append('\\');
}
escapedString.append(c);
}
return escapedString.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -183,4 +183,18 @@ private static String escapeLikeRegex(String pattern, char escape) {
}
return "(?s)" + javaPattern;
}

/** Escapes characters escapeChar in the input String */
static String escape(String input, char escapeChar) {
final int len = input.length();
final StringBuilder escapedString = new StringBuilder(len + len);
for (int i = 0; i < len; i++) {
char c = input.charAt(i);
if (c == escapeChar) {
escapedString.append('\\');
}
escapedString.append(c);
}
return escapedString.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,16 @@ class DefaultExpressionEvaluatorSuite extends AnyFunSuite with ExpressionSuiteBa
checkBooleanVectors(new DefaultExpressionEvaluator(
schema, startsWithExpressionAlwaysFalse, BooleanType.BOOLEAN).eval(input), allFalseVector)

val colUnicode = stringVector(Seq[String]("中文", "", ""))
val schemaUnicode = new StructType().add("col", StringType.STRING)
val inputUnicode = new DefaultColumnarBatch(colUnicode.getSize,
schemaUnicode, Array(colUnicode))
val startsWithExpressionUnicode = startsWith(new Column("col"), Literal.ofString(""))
val expOutputVectorLiteralUnicode = booleanVector(Seq[BooleanJ](true, true, false))
checkBooleanVectors(new DefaultExpressionEvaluator(schemaUnicode,
startsWithExpressionUnicode,
BooleanType.BOOLEAN).eval(inputUnicode), expOutputVectorLiteralUnicode)

val startsWithExpressionExpression = startsWith(new Column("col1"), new Column("col2"))
val e = intercept[UnsupportedOperationException] {
new DefaultExpressionEvaluator(
Expand All @@ -634,7 +644,7 @@ class DefaultExpressionEvaluatorSuite extends AnyFunSuite with ExpressionSuiteBa
new DefaultExpressionEvaluator(
schema, expr, BooleanType.BOOLEAN).eval(input)
}
assert(e.getMessage.contains("'starts with' is only supported for string type expressions"))
assert(e.getMessage.contains("'STARTS_WITH' is expects STRING type inputs"))
}

checkUnsupportedTypes(BooleanType.BOOLEAN, BooleanType.BOOLEAN)
Expand Down

0 comments on commit 5ff7692

Please sign in to comment.