diff --git a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpCountFunction.java b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpCountFunction.java new file mode 100644 index 00000000000..bc7091a0cea --- /dev/null +++ b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpCountFunction.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function; + +import org.apache.inlong.sdk.transform.decode.SourceData; +import org.apache.inlong.sdk.transform.process.Context; +import org.apache.inlong.sdk.transform.process.operator.OperatorTools; +import org.apache.inlong.sdk.transform.process.parser.ValueParser; + +import net.sf.jsqlparser.expression.Expression; +import net.sf.jsqlparser.expression.Function; + +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * RegexpCountFunction + * description: REGEXP_COUNT(str, regexp)--Returns the number of times str matches the regexp pattern. + * regexp must be a Java regular expression. + * Returns an INTEGER representation of the number of matches. + * NULL if any of the arguments are NULL or regexp is invalid. + */ +@TransformFunction(names = {"regexp_count"}) +public class RegexpCountFunction implements ValueParser { + + private ValueParser inputStringParser; + + private ValueParser patternStringParser; + + public RegexpCountFunction(Function expr) { + if (expr.getParameters() != null) { + List expressions = expr.getParameters().getExpressions(); + if (expressions != null && expressions.size() >= 2) { + inputStringParser = OperatorTools.buildParser(expressions.get(0)); + patternStringParser = OperatorTools.buildParser(expressions.get(1)); + } + } + } + + @Override + public Object parse(SourceData sourceData, int rowIndex, Context context) { + if (inputStringParser == null || patternStringParser == null) { + return null; + } + String inputString = OperatorTools.parseString(inputStringParser.parse(sourceData, rowIndex, context)); + String patternString = OperatorTools.parseString(patternStringParser.parse(sourceData, rowIndex, context)); + Pattern pattern = Pattern.compile(patternString); + Matcher matcher = pattern.matcher(inputString); + int count = 0; + while (matcher.find()) { + count++; + } + return count; + } +} diff --git a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpExtractAllFunction.java b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpExtractAllFunction.java new file mode 100644 index 00000000000..5c825915d38 --- /dev/null +++ b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpExtractAllFunction.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function; + +import org.apache.inlong.sdk.transform.decode.SourceData; +import org.apache.inlong.sdk.transform.process.Context; +import org.apache.inlong.sdk.transform.process.operator.OperatorTools; +import org.apache.inlong.sdk.transform.process.parser.ValueParser; + +import net.sf.jsqlparser.expression.Expression; +import net.sf.jsqlparser.expression.Function; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * RegexpExtractAllFunction + * description: REGEXP_EXTRACT_ALL(str, regexp[, extractIndex])--Returns an ARRAY representation of all the matched substrings. + * NULL if any of the arguments are NULL or invalid.Extracts all the substrings in str that match the regexp + * expression and correspond to the regexp group extractIndex. regexp may contain multiple groups. extractIndex + * indicates which regexp group to extract and starts from 1, also the default value if not specified. + * 0 means matching the entire regular expression. + * for example: REGEXP_EXTRACT_ALL("abc123def456ghi789", "(\\d+)", 0)--return [123, 456, 789] + * REGEXP_EXTRACT_ALL("Name: John, Age: 25, Location: NY", "Name: (\\w+), Age: (\\d+), Location: (\\w+)", 1)--return [John] + * REGEXP_EXTRACT_ALL("Name: John, Age: 25, Location: NY", "Name: (\\w+), Age: (\\d+), Location: (\\w+)", 0)--return [Name: John, Age: 25, Location: NY] + */ +@TransformFunction(names = {"regexp_extract_all"}) +public class RegexpExtractAllFunction implements ValueParser { + + private ValueParser inputStringParser; + + private ValueParser patternStringParser; + + private ValueParser indexIntegerParser; + + public RegexpExtractAllFunction(Function expr) { + if (expr.getParameters() != null) { + List expressions = expr.getParameters().getExpressions(); + if (expressions != null && expressions.size() >= 2) { + inputStringParser = OperatorTools.buildParser(expressions.get(0)); + patternStringParser = OperatorTools.buildParser(expressions.get(1)); + if (expressions.size() >= 3) { + indexIntegerParser = OperatorTools.buildParser(expressions.get(2)); + } + } + } + } + + @Override + public Object parse(SourceData sourceData, int rowIndex, Context context) { + if (inputStringParser == null || patternStringParser == null) { + return null; + } + String inputString = OperatorTools.parseString(inputStringParser.parse(sourceData, rowIndex, context)); + String patternString = OperatorTools.parseString(patternStringParser.parse(sourceData, rowIndex, context)); + int index = 0; + if (indexIntegerParser != null) { + index = OperatorTools.parseBigDecimal(indexIntegerParser.parse(sourceData, rowIndex, context)).intValue(); + } + if (index < 0) { + return null; + } + List resultList = new ArrayList<>(); + + Pattern pattern = Pattern.compile(patternString); + Matcher matcher = pattern.matcher(inputString); + while (matcher.find()) { + if (index <= matcher.groupCount()) { + resultList.add(matcher.group(index)); + } else { + return null; + } + } + + return resultList.isEmpty() ? null : resultList; + } +} diff --git a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpExtractFunction.java b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpExtractFunction.java new file mode 100644 index 00000000000..20a2d28925b --- /dev/null +++ b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpExtractFunction.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function; + +import org.apache.inlong.sdk.transform.decode.SourceData; +import org.apache.inlong.sdk.transform.process.Context; +import org.apache.inlong.sdk.transform.process.operator.OperatorTools; +import org.apache.inlong.sdk.transform.process.parser.ValueParser; + +import net.sf.jsqlparser.expression.Expression; +import net.sf.jsqlparser.expression.Function; + +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * RegexpExtractFunction + * description: REGEXP_EXTRACT(string1, string2[, integer])--Returns a string from string1 which extracted with a specified + * regular expression string2 and a regexp match group index integer.The regexp match group index starts + * from 1 and 0 means matching the whole regexp. In addition, the regexp match group index should not exceed + * the number of the defined groups. + * for example: REGEXP_EXTRACT("abc123def", "(\\d+)", 1)--return 123 + * REGEXP_EXTRACT("Name: John, Age: 25, Location: NY", "Name: (\\w+), Age: (\\d+), Location: (\\w+)", 2)--return 25 + * REGEXP_EXTRACT("abc123def", "(\\d+)", 2)--return null + * REGEXP_EXTRACT("abc123def", "abcdef", 1)--return null + */ +@TransformFunction(names = {"regexp_extract"}) +public class RegexpExtractFunction implements ValueParser { + + private ValueParser inputStringParser; + + private ValueParser patternStringParser; + + private ValueParser indexIntegerParser; + + public RegexpExtractFunction(Function expr) { + if (expr.getParameters() != null) { + List expressions = expr.getParameters().getExpressions(); + if (expressions != null && expressions.size() >= 3) { + inputStringParser = OperatorTools.buildParser(expressions.get(0)); + patternStringParser = OperatorTools.buildParser(expressions.get(1)); + indexIntegerParser = OperatorTools.buildParser(expressions.get(2)); + } + } + } + + @Override + public Object parse(SourceData sourceData, int rowIndex, Context context) { + if (inputStringParser == null || patternStringParser == null || indexIntegerParser == null) { + return null; + } + String inputString = OperatorTools.parseString(inputStringParser.parse(sourceData, rowIndex, context)); + String patternString = OperatorTools.parseString(patternStringParser.parse(sourceData, rowIndex, context)); + int indexInteger = + OperatorTools.parseBigDecimal(indexIntegerParser.parse(sourceData, rowIndex, context)).intValue(); + if (indexInteger < 0) { + return null; + } + Pattern pattern = Pattern.compile(patternString); + Matcher matcher = pattern.matcher(inputString); + if (matcher.find()) { + if (indexInteger <= matcher.groupCount()) { + return matcher.group(indexInteger); + } + } + return null; + } +} diff --git a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpFunction.java b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpFunction.java new file mode 100644 index 00000000000..518f4e7d852 --- /dev/null +++ b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpFunction.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function; + +import org.apache.inlong.sdk.transform.decode.SourceData; +import org.apache.inlong.sdk.transform.process.Context; +import org.apache.inlong.sdk.transform.process.operator.OperatorTools; +import org.apache.inlong.sdk.transform.process.parser.ValueParser; + +import net.sf.jsqlparser.expression.Expression; +import net.sf.jsqlparser.expression.Function; + +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +/** + * RegexpFunction + * description: REGEX(string1, string2)--Returns TRUE if any (possibly empty) substring of string1 matches the Java + * regular expression string2, otherwise FALSE. Returns NULL if any of arguments is NULL. + */ +@TransformFunction(names = {"regex"}) +public class RegexpFunction implements ValueParser { + + private ValueParser inputParser; + + private ValueParser patternParser; + + public RegexpFunction(Function expr) { + if (expr.getParameters() != null) { + List expressions = expr.getParameters().getExpressions(); + if (expressions != null && expressions.size() == 2) { + inputParser = OperatorTools.buildParser(expressions.get(0)); + patternParser = OperatorTools.buildParser(expressions.get(1)); + } + } + } + + @Override + public Object parse(SourceData sourceData, int rowIndex, Context context) { + if (inputParser == null || patternParser == null) { + return null; + } + String inputString = OperatorTools.parseString(inputParser.parse(sourceData, rowIndex, context)); + String patternString = OperatorTools.parseString(patternParser.parse(sourceData, rowIndex, context)); + Pattern pattern = Pattern.compile(patternString); + Matcher matcher = pattern.matcher(inputString); + return matcher.find(); + } +} diff --git a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpInstrFunction.java b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpInstrFunction.java new file mode 100644 index 00000000000..9cd128e5329 --- /dev/null +++ b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpInstrFunction.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function; + +import org.apache.inlong.sdk.transform.decode.SourceData; +import org.apache.inlong.sdk.transform.process.Context; +import org.apache.inlong.sdk.transform.process.operator.OperatorTools; +import org.apache.inlong.sdk.transform.process.parser.ValueParser; + +import net.sf.jsqlparser.expression.Expression; +import net.sf.jsqlparser.expression.Function; + +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * RegexpInstrFunction + * description: REGEXP_INSTR(str, regexp)--Returns the position of the first substring in str that matches regexp. + * Result indexes begin at 1, 0 if there is no match. + * Returns an INTEGER representation of the first matched substring index. + * NULL if any of the arguments are NULL or regexp is invalid. + */ +@TransformFunction(names = {"regexp_instr"}) +public class RegexpInstrFunction implements ValueParser { + + private ValueParser inputStringParser; + + private ValueParser patternStringParser; + + public RegexpInstrFunction(Function expr) { + if (expr.getParameters() != null) { + List expressions = expr.getParameters().getExpressions(); + if (expressions != null && expressions.size() >= 2) { + inputStringParser = OperatorTools.buildParser(expressions.get(0)); + patternStringParser = OperatorTools.buildParser(expressions.get(1)); + } + } + } + + @Override + public Object parse(SourceData sourceData, int rowIndex, Context context) { + if (inputStringParser == null || patternStringParser == null) { + return null; + } + String inputString = OperatorTools.parseString(inputStringParser.parse(sourceData, rowIndex, context)); + String patternString = OperatorTools.parseString(patternStringParser.parse(sourceData, rowIndex, context)); + Pattern pattern = Pattern.compile(patternString); + Matcher matcher = pattern.matcher(inputString); + if (matcher.find()) { + return matcher.start() + 1; + } else { + return 0; + } + } +} diff --git a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpReplaceFunction.java b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpReplaceFunction.java new file mode 100644 index 00000000000..834739b801d --- /dev/null +++ b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpReplaceFunction.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function; + +import org.apache.inlong.sdk.transform.decode.SourceData; +import org.apache.inlong.sdk.transform.process.Context; +import org.apache.inlong.sdk.transform.process.operator.OperatorTools; +import org.apache.inlong.sdk.transform.process.parser.ValueParser; + +import net.sf.jsqlparser.expression.Expression; +import net.sf.jsqlparser.expression.Function; + +import java.util.List; +import java.util.regex.Pattern; + +/** + * RegexpReplaceFunction + * description: REGEXP_REPLACE(string1, string2, string3)--Returns a string from STRING1 with all the substrings that + * match a regular expression STRING2 consecutively being replaced with STRING3. + */ +@TransformFunction(names = {"regexp_replace"}) +public class RegexpReplaceFunction implements ValueParser { + + private ValueParser inputStringParser; + + private ValueParser patternStringParser; + + private ValueParser replaceStringParser; + + public RegexpReplaceFunction(Function expr) { + if (expr.getParameters() != null) { + List expressions = expr.getParameters().getExpressions(); + if (expressions != null && expressions.size() >= 3) { + inputStringParser = OperatorTools.buildParser(expressions.get(0)); + patternStringParser = OperatorTools.buildParser(expressions.get(1)); + replaceStringParser = OperatorTools.buildParser(expressions.get(2)); + } + } + } + + @Override + public Object parse(SourceData sourceData, int rowIndex, Context context) { + if (inputStringParser == null || patternStringParser == null || replaceStringParser == null) { + return null; + } + String inputString = OperatorTools.parseString(inputStringParser.parse(sourceData, rowIndex, context)); + String patternString = OperatorTools.parseString(patternStringParser.parse(sourceData, rowIndex, context)); + String replaceString = OperatorTools.parseString(replaceStringParser.parse(sourceData, rowIndex, context)); + Pattern pattern = Pattern.compile(patternString); + return pattern.matcher(inputString).replaceAll(replaceString); + } +} diff --git a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpSubstrFunction.java b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpSubstrFunction.java new file mode 100644 index 00000000000..9e2a46af36e --- /dev/null +++ b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpSubstrFunction.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function; + +import org.apache.inlong.sdk.transform.decode.SourceData; +import org.apache.inlong.sdk.transform.process.Context; +import org.apache.inlong.sdk.transform.process.operator.OperatorTools; +import org.apache.inlong.sdk.transform.process.parser.ValueParser; + +import net.sf.jsqlparser.expression.Expression; +import net.sf.jsqlparser.expression.Function; + +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * RegexpSubstrFunction + * description: REGEXP_SUBSTR(str, regexp)--Returns the first substring in str that matches regexp. + * Returns an STRING representation of the first matched substring. NULL if any of the arguments are NULL + * or regexp if invalid or pattern is not found. + */ +@TransformFunction(names = {"regex_substr"}) +public class RegexpSubstrFunction implements ValueParser { + + private ValueParser inputStringParser; + + private ValueParser patternStringParser; + + public RegexpSubstrFunction(Function expr) { + if (expr.getParameters() != null) { + List expressions = expr.getParameters().getExpressions(); + if (expressions != null && expressions.size() >= 2) { + inputStringParser = OperatorTools.buildParser(expressions.get(0)); + patternStringParser = OperatorTools.buildParser(expressions.get(1)); + } + } + } + + @Override + public Object parse(SourceData sourceData, int rowIndex, Context context) { + if (inputStringParser == null || patternStringParser == null) { + return null; + } + String inputString = OperatorTools.parseString(inputStringParser.parse(sourceData, rowIndex, context)); + String patternString = OperatorTools.parseString(patternStringParser.parse(sourceData, rowIndex, context)); + Pattern pattern = Pattern.compile(patternString); + Matcher matcher = pattern.matcher(inputString); + if (matcher.find()) { + return matcher.group(0); + } else { + return null; + } + } +} diff --git a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexExtractFunction.java b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexExtractFunction.java new file mode 100644 index 00000000000..706aba29a1f --- /dev/null +++ b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexExtractFunction.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function.string; + +import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory; +import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory; +import org.apache.inlong.sdk.transform.pojo.TransformConfig; +import org.apache.inlong.sdk.transform.process.TransformProcessor; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; +import java.util.List; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; + +public class TestRegexExtractFunction extends AbstractFunctionStringTestBase { + + @Test + public void testRegexpExtractFunction() throws Exception { + String transformSql1 = "select regexp_extract(string1,string2,numeric1) from source"; + TransformConfig config1 = new TransformConfig(transformSql1); + TransformProcessor processor1 = TransformProcessor + .create(config1, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case1: regexp_extract("abc123def", "(\\d+)", 1) + List output1 = processor1.transform("abc123def|(\\\\d+)|2|1|3|4", new HashMap<>()); + Assert.assertEquals(1, output1.size()); + Assert.assertEquals(output1.get(0), "result=123"); + // case2: regexp_extract("abc123def123", "(\\d+)", 0) + List output2 = processor1.transform("abc123def124|(\\\\d+)|1|0|3", new HashMap<>()); + Assert.assertEquals(1, output2.size()); + Assert.assertEquals(output2.get(0), "result=123"); + // case3: regexp_extract("Name: John, Age: 25, Location: NY", "Name: (\\w+), Age: (\\d+), Location: (\\w+)", 2) + List output3 = processor1.transform( + "Name: John, Age: 25, Location: NY|Name: (\\\\w+), Age: (\\\\d+), Location: (\\\\w+)|1|2|3", + new HashMap<>()); + Assert.assertEquals(1, output3.size()); + Assert.assertEquals(output3.get(0), "result=25"); + // case4: regexp_extract("Email: john.doe@example.com", "([a-zA-Z]+)\\.([a-zA-Z]+)@([a-zA-Z]+)\\.([a-zA-Z]+)", + // 3) + List output4 = processor1.transform( + "Email: john.doe@example.com|([a-zA-Z]+)\\\\.([a-zA-Z]+)@([a-zA-Z]+)\\\\.([a-zA-Z]+)|1|3|2", + new HashMap<>()); + Assert.assertEquals(1, output4.size()); + Assert.assertEquals(output4.get(0), "result=example"); + + String transformSql2 = "select regexp_extract(string1) from source"; + TransformConfig config2 = new TransformConfig(transformSql2); + TransformProcessor processor2 = TransformProcessor + .create(config2, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case4: regexp_extract("The quick brown fox quick") + List output5 = + processor2.transform("The quick brown fox quick|quick|QAQ|2|1|3", new HashMap<>()); + Assert.assertEquals(1, output5.size()); + Assert.assertEquals(output5.get(0), "result=null"); + String transformSql3 = "select regexp_extract(string1,string2) from source"; + TransformConfig config3 = new TransformConfig(transformSql3); + TransformProcessor processor3 = TransformProcessor + .create(config3, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case5: regexp_extract("The quick brown fox quick", "[q-") + List output6 = + processor3.transform("The quick brown fox quick|[q-|QAQ|2|1|3", new HashMap<>()); + Assert.assertEquals(1, output6.size()); + PatternSyntaxException exception = assertThrows(PatternSyntaxException.class, () -> { + Pattern.compile("[q-"); + }); + assertTrue(exception.getMessage().contains("Illegal character range near index 3")); + } +} diff --git a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpCountFunction.java b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpCountFunction.java new file mode 100644 index 00000000000..8f090633e44 --- /dev/null +++ b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpCountFunction.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function.string; + +import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory; +import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory; +import org.apache.inlong.sdk.transform.pojo.TransformConfig; +import org.apache.inlong.sdk.transform.process.TransformProcessor; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; +import java.util.List; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; + +public class TestRegexpCountFunction extends AbstractFunctionStringTestBase { + + @Test + public void testRegexpCountFunction() throws Exception { + String transformSql1 = "select regexp_count(string1,string2) from source"; + TransformConfig config1 = new TransformConfig(transformSql1); + TransformProcessor processor1 = TransformProcessor + .create(config1, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case1: regexp_count("The quick brown fox quick", "quick") + List output1 = processor1.transform("The quick brown fox quick|quick|slow|2|1|3", new HashMap<>()); + Assert.assertEquals(1, output1.size()); + Assert.assertEquals(output1.get(0), "result=2"); + // case2: regexp_count("The quick brown fox quick", "slow") + List output2 = processor1.transform("The quick brown fox quick|slow|2|1|3", new HashMap<>()); + Assert.assertEquals(1, output2.size()); + Assert.assertEquals(output2.get(0), "result=0"); + String transformSql2 = "select regexp_count(string1) from source"; + TransformConfig config2 = new TransformConfig(transformSql2); + TransformProcessor processor2 = TransformProcessor + .create(config2, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case3: regexp_count("The quick brown fox quick") + List output3 = + processor2.transform("The quick brown fox quick|quick|QAQ|2|1|3", new HashMap<>()); + Assert.assertEquals(1, output3.size()); + Assert.assertEquals(output3.get(0), "result=null"); + String transformSql3 = "select regexp_count(string1,string2) from source"; + TransformConfig config3 = new TransformConfig(transformSql3); + TransformProcessor processor3 = TransformProcessor + .create(config3, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case4: regexp_count("The quick brown fox quick", "[q-") + List output4 = + processor3.transform("The quick brown fox quick|[q-|QAQ|2|1|3", new HashMap<>()); + Assert.assertEquals(1, output4.size()); + PatternSyntaxException exception = assertThrows(PatternSyntaxException.class, () -> { + Pattern.compile("[q-"); + }); + assertTrue(exception.getMessage().contains("Illegal character range near index 3")); + } +} diff --git a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpExtractAllFunction.java b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpExtractAllFunction.java new file mode 100644 index 00000000000..db0676f326c --- /dev/null +++ b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpExtractAllFunction.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function.string; + +import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory; +import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory; +import org.apache.inlong.sdk.transform.pojo.TransformConfig; +import org.apache.inlong.sdk.transform.process.TransformProcessor; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; +import java.util.List; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; + +public class TestRegexpExtractAllFunction extends AbstractFunctionStringTestBase { + + @Test + public void testRegexpExtractAllFunction() throws Exception { + String transformSql1 = "select regexp_extract_all(string1,string2,numeric1) from source"; + TransformConfig config1 = new TransformConfig(transformSql1); + TransformProcessor processor1 = TransformProcessor + .create(config1, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case1: regexp_extract_all("abc123def456ghi789", "(\\d+)", 1) + List output1 = processor1.transform("abc123def456ghi789|(\\\\d+)|2|1|3|4", new HashMap<>()); + Assert.assertEquals(1, output1.size()); + Assert.assertEquals(output1.get(0), "result=[123, 456, 789]"); + // case2: regexp_extract_all("abc123def124", "(\\d+)", 0) + List output2 = processor1.transform("abc123def124|(\\\\d+)|1|0|3", new HashMap<>()); + Assert.assertEquals(1, output2.size()); + Assert.assertEquals(output2.get(0), "result=[123, 124]"); + // case3: regexp_extract_all("Name: John, Age: 25, Location: NY", "Name: (\\w+), Age: (\\d+) + // , Location: (\\w+)", 1) + List output3 = processor1.transform( + "Name: John, Age: 25, Location: NY|Name: (\\\\w+), Age: (\\\\d+), Location: (\\\\w+)|2|1|3", + new HashMap<>()); + Assert.assertEquals(1, output3.size()); + Assert.assertEquals(output3.get(0), "result=[John]"); + // case4: regexp_extract_all("Name: John, Age: 25, Location: NY", "Name: (\\w+), Age: (\\d+) + // , Location: (\\w+)", 4) + List output4 = processor1.transform( + "Name: John, Age: 25, Location: NY|Name: (\\\\w+), Age: (\\\\d+), Location: (\\\\w+)|1|4|3", + new HashMap<>()); + Assert.assertEquals(1, output4.size()); + Assert.assertEquals(output4.get(0), "result=null"); + + String transformSql2 = "select regexp_extract_all(string1,string2) from source"; + TransformConfig config2 = new TransformConfig(transformSql2); + TransformProcessor processor2 = TransformProcessor + .create(config2, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case4: regexp_extract_all("The quick brown fox quick",quick) + List output5 = + processor2.transform("The quick brown fox quick|quick|QAQ|2|1|3", new HashMap<>()); + Assert.assertEquals(1, output5.size()); + Assert.assertEquals(output5.get(0), "result=[quick, quick]"); + String transformSql3 = "select regexp_extract_all(string1,string2) from source"; + TransformConfig config3 = new TransformConfig(transformSql3); + TransformProcessor processor3 = TransformProcessor + .create(config3, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case5: regexp_extract_all("The quick brown fox quick", "[q-") + List output6 = + processor3.transform("The quick brown fox quick|[q-|QAQ|2|1|3", new HashMap<>()); + Assert.assertEquals(1, output6.size()); + PatternSyntaxException exception = assertThrows(PatternSyntaxException.class, () -> { + Pattern.compile("[q-"); + }); + assertTrue(exception.getMessage().contains("Illegal character range near index 3")); + } +} diff --git a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpFunction.java b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpFunction.java new file mode 100644 index 00000000000..bc49170a648 --- /dev/null +++ b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpFunction.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function.string; + +import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory; +import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory; +import org.apache.inlong.sdk.transform.pojo.TransformConfig; +import org.apache.inlong.sdk.transform.process.TransformProcessor; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; +import java.util.List; + +public class TestRegexpFunction extends AbstractFunctionStringTestBase { + + @Test + public void testRegexFunction() throws Exception { + String transformSql1 = "select regex(string1, string2) from source"; + TransformConfig config1 = new TransformConfig(transformSql1); + TransformProcessor processor1 = TransformProcessor + .create(config1, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case1: regexp("The quick brown fox", "quick") + List output1 = processor1.transform("The quick brown fox|quick|5|2|1|3", new HashMap<>()); + Assert.assertEquals(1, output1.size()); + Assert.assertEquals(output1.get(0), "result=true"); + + // case2: regexp("The quick brown fox", "cold") + List output2 = processor1.transform("The quick brown fox|cold|5|2|1|3", new HashMap<>()); + Assert.assertEquals(1, output2.size()); + Assert.assertEquals(output2.get(0), "result=false"); + + String transformSql2 = "select regex(string1) from source"; + TransformConfig config2 = new TransformConfig(transformSql2); + TransformProcessor processor2 = TransformProcessor + .create(config2, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case3: regexp("User: Alice, ID: 12345") + List output3 = + processor2.transform("User: Alice, ID: 12345|User: (\\\\w+), ID: (\\\\d+)|5|2|1|3", new HashMap<>()); + Assert.assertEquals(1, output3.size()); + Assert.assertEquals(output3.get(0), "result=null"); + } +} diff --git a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpInstrFunction.java b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpInstrFunction.java new file mode 100644 index 00000000000..801d1d89265 --- /dev/null +++ b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpInstrFunction.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function.string; + +import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory; +import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory; +import org.apache.inlong.sdk.transform.pojo.TransformConfig; +import org.apache.inlong.sdk.transform.process.TransformProcessor; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; +import java.util.List; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; + +public class TestRegexpInstrFunction extends AbstractFunctionStringTestBase { + + @Test + public void testRegexpInstrFunction() throws Exception { + String transformSql1 = "select regexp_instr(string1,string2) from source"; + TransformConfig config1 = new TransformConfig(transformSql1); + TransformProcessor processor1 = TransformProcessor + .create(config1, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case1: regexp_instr("abc123def", "(\\d+)") + List output1 = processor1.transform("abc123def|(\\\\d+)|2|1|3|4", new HashMap<>()); + Assert.assertEquals(1, output1.size()); + Assert.assertEquals(output1.get(0), "result=4"); + // case2: regexp_instr("hello world!", "world") + List output2 = processor1.transform("hello world!|world|1|0|3", new HashMap<>()); + Assert.assertEquals(1, output2.size()); + Assert.assertEquals(output2.get(0), "result=7"); + // case3: regexp_instr("abcdef", "\\d+") + List output3 = processor1.transform( + "abcdef|\\\\d+|1|2|3", + new HashMap<>()); + Assert.assertEquals(1, output3.size()); + Assert.assertEquals(output3.get(0), "result=0"); + + String transformSql2 = "select regexp_instr(string1) from source"; + TransformConfig config2 = new TransformConfig(transformSql2); + TransformProcessor processor2 = TransformProcessor + .create(config2, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case4: regexp_instr("The quick brown fox quick") + List output5 = + processor2.transform("The quick brown fox quick|quick|QAQ|2|1|3", new HashMap<>()); + Assert.assertEquals(1, output5.size()); + Assert.assertEquals(output5.get(0), "result=null"); + String transformSql3 = "select regexp_instr(string1,string2) from source"; + TransformConfig config3 = new TransformConfig(transformSql3); + TransformProcessor processor3 = TransformProcessor + .create(config3, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case5: regexp_instr("abc123def", "[q-") + List output6 = + processor3.transform("abc123def|[q-|QAQ|2|1|3", new HashMap<>()); + Assert.assertEquals(1, output6.size()); + PatternSyntaxException exception = assertThrows(PatternSyntaxException.class, () -> { + Pattern.compile("[q-"); + }); + assertTrue(exception.getMessage().contains("Illegal character range near index 3")); + } +} diff --git a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpReplaceFunction.java b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpReplaceFunction.java new file mode 100644 index 00000000000..f2f586af2aa --- /dev/null +++ b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpReplaceFunction.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function.string; + +import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory; +import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory; +import org.apache.inlong.sdk.transform.pojo.TransformConfig; +import org.apache.inlong.sdk.transform.process.TransformProcessor; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; +import java.util.List; + +public class TestRegexpReplaceFunction extends AbstractFunctionStringTestBase { + + @Test + public void testRegexpReplaceFunction() throws Exception { + String transformSql1 = "select regexp_replace(string1,string2,string3) from source"; + TransformConfig config1 = new TransformConfig(transformSql1); + TransformProcessor processor1 = TransformProcessor + .create(config1, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case1: regexp_replace("The quick brown fox quick", "quick", "slow") + List output1 = processor1.transform("The quick brown fox quick|quick|slow|2|1|3", new HashMap<>()); + Assert.assertEquals(1, output1.size()); + Assert.assertEquals(output1.get(0), "result=The slow brown fox slow"); + String transformSql2 = "select regexp_replace(string1,string2,string3) from source"; + TransformConfig config2 = new TransformConfig(transformSql2); + TransformProcessor processor2 = TransformProcessor + .create(config2, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case2: regexp_replace("User: Alice, ID: 12345", "\\d+", "QAQ") + List output2 = + processor2.transform("User: Alice, ID: 12345|\\\\d+|QAQ|2|1|3", new HashMap<>()); + Assert.assertEquals(1, output2.size()); + Assert.assertEquals(output2.get(0), "result=User: Alice, ID: QAQ"); + String transformSql3 = "select regexp_replace(string1,string2) from source"; + TransformConfig config3 = new TransformConfig(transformSql3); + TransformProcessor processor3 = TransformProcessor + .create(config3, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case3: regexp_replace("User: Alice, ID: 12345", "\\d+") + List output3 = + processor3.transform("User: Alice, ID: 12345|\\\\d+|QAQ|2|1|3", new HashMap<>()); + Assert.assertEquals(1, output3.size()); + Assert.assertEquals(output3.get(0), "result=null"); + } +} diff --git a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpSubstrFunction.java b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpSubstrFunction.java new file mode 100644 index 00000000000..d51c0e4e99b --- /dev/null +++ b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestRegexpSubstrFunction.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function.string; + +import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory; +import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory; +import org.apache.inlong.sdk.transform.pojo.TransformConfig; +import org.apache.inlong.sdk.transform.process.TransformProcessor; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; +import java.util.List; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; + +public class TestRegexpSubstrFunction extends AbstractFunctionStringTestBase { + + @Test + public void testRegexpSubstrFunction() throws Exception { + String transformSql1 = "select regex_substr(string1,string2) from source"; + TransformConfig config1 = new TransformConfig(transformSql1); + TransformProcessor processor1 = TransformProcessor + .create(config1, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case1: regex_substr("abc123def", "(\\d+)") + List output1 = processor1.transform("abc123def|(\\\\d+)|2|1|3|4", new HashMap<>()); + Assert.assertEquals(1, output1.size()); + Assert.assertEquals(output1.get(0), "result=123"); + // case2: regex_substr("hello world!", "\\w+") + List output2 = processor1.transform("hello world!|\\\\w+|1|0|3", new HashMap<>()); + Assert.assertEquals(1, output2.size()); + Assert.assertEquals(output2.get(0), "result=hello"); + // case3: regex_substr("abcdef", "\\d+") + List output3 = processor1.transform( + "abcdef|\\\\d+|1|2|3", + new HashMap<>()); + Assert.assertEquals(1, output3.size()); + Assert.assertEquals(output3.get(0), "result=null"); + + String transformSql2 = "select regex_substr(string1) from source"; + TransformConfig config2 = new TransformConfig(transformSql2); + TransformProcessor processor2 = TransformProcessor + .create(config2, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case4: regex_substr("The quick brown fox quick") + List output5 = + processor2.transform("The quick brown fox quick|quick|QAQ|2|1|3", new HashMap<>()); + Assert.assertEquals(1, output5.size()); + Assert.assertEquals(output5.get(0), "result=null"); + String transformSql3 = "select regex_substr(string1,string2) from source"; + TransformConfig config3 = new TransformConfig(transformSql3); + TransformProcessor processor3 = TransformProcessor + .create(config3, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + // case5: regex_substr("abc123def", "[q-") + List output6 = + processor3.transform("abc123def|[q-|QAQ|2|1|3", new HashMap<>()); + Assert.assertEquals(1, output6.size()); + PatternSyntaxException exception = assertThrows(PatternSyntaxException.class, () -> { + Pattern.compile("[q-"); + }); + assertTrue(exception.getMessage().contains("Illegal character range near index 3")); + } +}