forked from apache/inlong
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[INLONG-11053][SDK] Transform support SPLIT_INDEX function (apache#11054
- Loading branch information
1 parent
639c5f7
commit 647f29d
Showing
2 changed files
with
181 additions
and
0 deletions.
There are no files selected for viewing
78 changes: 78 additions & 0 deletions
78
...dk/src/main/java/org/apache/inlong/sdk/transform/process/function/SplitIndexFunction.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.inlong.sdk.transform.process.function; | ||
|
||
import org.apache.inlong.sdk.transform.decode.SourceData; | ||
import org.apache.inlong.sdk.transform.process.Context; | ||
import org.apache.inlong.sdk.transform.process.operator.OperatorTools; | ||
import org.apache.inlong.sdk.transform.process.parser.ValueParser; | ||
|
||
import net.sf.jsqlparser.expression.Expression; | ||
import net.sf.jsqlparser.expression.Function; | ||
|
||
import java.util.List; | ||
/* | ||
* SplitIndexFunction | ||
* | ||
* Description: | ||
* Split_index(string1, string2, integer) -> string | ||
* Splits string1 by delimiter string2 and returns the string at the given index integer(zero-based). | ||
* - Returns null if the index is negative or any of the arguments is null. | ||
* - Returns null if the index is out of bounds of the split strings. | ||
* | ||
*/ | ||
@TransformFunction(names = {"split_index", "splitindex"}) | ||
public class SplitIndexFunction implements ValueParser { | ||
|
||
private final ValueParser strParser; | ||
private final ValueParser delimiterParser; | ||
private final ValueParser indexParser; | ||
|
||
public SplitIndexFunction(Function expr) { | ||
List<Expression> expressions = expr.getParameters().getExpressions(); | ||
strParser = OperatorTools.buildParser(expressions.get(0)); | ||
delimiterParser = OperatorTools.buildParser(expressions.get(1)); | ||
indexParser = OperatorTools.buildParser(expressions.get(2)); | ||
} | ||
|
||
@Override | ||
public Object parse(SourceData sourceData, int rowIndex, Context context) { | ||
Object strObject = strParser.parse(sourceData, rowIndex, context); | ||
Object delimiterObject = delimiterParser.parse(sourceData, rowIndex, context); | ||
Object indexObject = indexParser.parse(sourceData, rowIndex, context); | ||
|
||
if (strObject == null || delimiterObject == null || indexObject == null) { | ||
return null; | ||
} | ||
|
||
String str = OperatorTools.parseString(strObject); | ||
String delimiter = OperatorTools.parseString(delimiterObject); | ||
int index = OperatorTools.parseBigDecimal(indexObject).intValue(); | ||
|
||
if (str == null || delimiter == null || index < 0) { | ||
return null; | ||
} | ||
|
||
String[] splitStrings = str.split(delimiter); | ||
if (index >= splitStrings.length) { | ||
return null; | ||
} | ||
|
||
return splitStrings[index]; | ||
} | ||
} |
103 changes: 103 additions & 0 deletions
103
.../java/org/apache/inlong/sdk/transform/process/function/string/TestSplitIndexFunction.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.inlong.sdk.transform.process.function.string; | ||
|
||
import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory; | ||
import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory; | ||
import org.apache.inlong.sdk.transform.pojo.TransformConfig; | ||
import org.apache.inlong.sdk.transform.process.TransformProcessor; | ||
|
||
import org.junit.Assert; | ||
import org.junit.Test; | ||
|
||
import java.util.HashMap; | ||
import java.util.List; | ||
|
||
public class TestSplitIndexFunction extends AbstractFunctionStringTestBase { | ||
|
||
@Test | ||
public void testSplitIndexFunction() throws Exception { | ||
String transformSql = "select split_index(string1, string2, numeric1) from source"; | ||
TransformConfig config = new TransformConfig(transformSql); | ||
TransformProcessor<String, String> processor = TransformProcessor | ||
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource), | ||
SinkEncoderFactory.createKvEncoder(kvSink)); | ||
|
||
// case1: split_index('a,b,c', ',', 1) | ||
String data = "a,b,c|,|cloud|1|3|3"; | ||
List<String> output = processor.transform(data, new HashMap<>()); | ||
Assert.assertEquals(1, output.size()); | ||
Assert.assertEquals("result=b", output.get(0)); | ||
|
||
// case2: split_index('a,b,c', ',', -1) | ||
data = "a,b,c|,|cloud|-1|3|3"; | ||
output = processor.transform(data, new HashMap<>()); | ||
Assert.assertEquals(1, output.size()); | ||
Assert.assertEquals("result=null", output.get(0)); | ||
|
||
// case3: split_index('a,b,c', ',', 3) | ||
data = "a,b,c|,|cloud|3|3|3"; | ||
output = processor.transform(data, new HashMap<>()); | ||
Assert.assertEquals(1, output.size()); | ||
Assert.assertEquals("result=null", output.get(0)); | ||
|
||
// case4: split_index(null, ',', 1) | ||
transformSql = "select split_index(xxd, string2, numeric1) from source"; | ||
config = new TransformConfig(transformSql); | ||
processor = TransformProcessor | ||
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource), | ||
SinkEncoderFactory.createKvEncoder(kvSink)); | ||
data = "abc|,|cloud|1|3|3"; | ||
output = processor.transform(data, new HashMap<>()); | ||
Assert.assertEquals(1, output.size()); | ||
Assert.assertEquals("result=null", output.get(0)); | ||
|
||
// case5: split_index('a,b,c', null, 1) | ||
transformSql = "select split_index(string1, xxd, numeric1) from source"; | ||
config = new TransformConfig(transformSql); | ||
processor = TransformProcessor | ||
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource), | ||
SinkEncoderFactory.createKvEncoder(kvSink)); | ||
data = "a,b,c|xxd|cloud|1|3|3"; | ||
output = processor.transform(data, new HashMap<>()); | ||
Assert.assertEquals(1, output.size()); | ||
Assert.assertEquals("result=null", output.get(0)); | ||
|
||
// case6: split_index('a,b,c', ',', null) | ||
transformSql = "select split_index(string1, string2, xxd) from source"; | ||
config = new TransformConfig(transformSql); | ||
processor = TransformProcessor | ||
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource), | ||
SinkEncoderFactory.createKvEncoder(kvSink)); | ||
data = "a,b,c|,|cloud|xxd|3|3"; | ||
output = processor.transform(data, new HashMap<>()); | ||
Assert.assertEquals(1, output.size()); | ||
Assert.assertEquals("result=null", output.get(0)); | ||
|
||
// case7: split_index('', ',', 0) | ||
transformSql = "select split_index(string1, string2, numeric1) from source"; | ||
config = new TransformConfig(transformSql); | ||
processor = TransformProcessor | ||
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource), | ||
SinkEncoderFactory.createKvEncoder(kvSink)); | ||
data = "|,|cloud|0|3|3"; | ||
output = processor.transform(data, new HashMap<>()); | ||
Assert.assertEquals(1, output.size()); | ||
Assert.assertEquals("result=", output.get(0)); | ||
} | ||
} |