From 762bfd1066b939dda1e5f459a5cd0dfe8f92998a Mon Sep 17 00:00:00 2001 From: ZKpLo <14148880+zkplo@user.noreply.gitee.com> Date: Tue, 1 Oct 2024 08:54:01 +0800 Subject: [PATCH] [INLONG-11236][SDK] Transform SQL supports FIND_IN_SET function --- .../process/function/FindInSetFunction.java | 66 +++++++++++++++++ .../function/TestFindInSetFunction.java | 70 +++++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/FindInSetFunction.java create mode 100644 inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/TestFindInSetFunction.java diff --git a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/FindInSetFunction.java b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/FindInSetFunction.java new file mode 100644 index 00000000000..abd740bee0d --- /dev/null +++ b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/FindInSetFunction.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function; + +import org.apache.inlong.sdk.transform.decode.SourceData; +import org.apache.inlong.sdk.transform.process.Context; +import org.apache.inlong.sdk.transform.process.operator.OperatorTools; +import org.apache.inlong.sdk.transform.process.parser.ValueParser; + +import net.sf.jsqlparser.expression.Function; + +/** + * LengthFunction -> FindInSetFunction(str,strList) + * description: + * - return a value in the range of 1 to N if the string str is in the string list strList consisting of N substrings. + * - return 0 if str is not in strList or if strList is the empty string. + * - return NULL if either argument is NULL. + * Note: `strList` is a string composed of substrings separated by ',' characters. This function does not work properly + * if the first argument contains a comma (,) character. + */ +@TransformFunction(names = {"find_in_set"}) +public class FindInSetFunction implements ValueParser { + + private final ValueParser strParser; + private final ValueParser strListParser; + + public FindInSetFunction(Function expr) { + strParser = OperatorTools.buildParser(expr.getParameters().getExpressions().get(0)); + strListParser = OperatorTools.buildParser(expr.getParameters().getExpressions().get(1)); + } + + @Override + public Object parse(SourceData sourceData, int rowIndex, Context context) { + Object strObj = strParser.parse(sourceData, rowIndex, context); + Object strListObj = strListParser.parse(sourceData, rowIndex, context); + if (strObj == null || strListObj == null) { + return null; + } + String str = OperatorTools.parseString(strObj); + String strList = OperatorTools.parseString(strListObj); + if (!strList.isEmpty()) { + String[] strArray = strList.split(","); + for (int i = 0; i < strArray.length; i++) { + if (str.equals(strArray[i])) { + return i + 1; + } + } + } + return 0; + } +} diff --git a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/TestFindInSetFunction.java b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/TestFindInSetFunction.java new file mode 100644 index 00000000000..509d1e7b2e7 --- /dev/null +++ b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/TestFindInSetFunction.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function; + +import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory; +import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory; +import org.apache.inlong.sdk.transform.pojo.TransformConfig; +import org.apache.inlong.sdk.transform.process.TransformProcessor; +import org.apache.inlong.sdk.transform.process.function.string.AbstractFunctionStringTestBase; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; +import java.util.List; + +public class TestFindInSetFunction extends AbstractFunctionStringTestBase { + + @Test + public void testFindInSetFunction() throws Exception { + String transformSql = null, data = null; + TransformConfig config = null; + TransformProcessor processor = null; + List output = null; + + transformSql = "select FIND_IN_SET(string1,string2) from source"; + config = new TransformConfig(transformSql); + processor = TransformProcessor + .create(config, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case1: FIND_IN_SET('b','a,b,b,c,d') + data = "b|a,b,b,c,d|cloud|5|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=2", output.get(0)); + + // case2: FIND_IN_SET('','a,,b,c,d'); + data = "|a,,b,c,d|cloud|5|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=2", output.get(0)); + + // case3: FIND_IN_SET(',','a,,b,c,d'); + data = ",|a,,b,c,d|cloud|5|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=0", output.get(0)); + + // case4: FIND_IN_SET('',''); This situation returns 0 + data = "||cloud|5|3|3"; + output = processor.transform(data, new HashMap<>()); + Assert.assertEquals(1, output.size()); + Assert.assertEquals("result=0", output.get(0)); + } +}