Skip to content

Commit

Permalink
[INLONG-11053][SDK] Transform support SPLIT_INDEX function (apache#11054
Browse files Browse the repository at this point in the history
)
  • Loading branch information
ying-hua authored and MOONSakura0614 committed Sep 21, 2024
1 parent 639c5f7 commit 647f29d
Show file tree
Hide file tree
Showing 2 changed files with 181 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.inlong.sdk.transform.process.function;

import org.apache.inlong.sdk.transform.decode.SourceData;
import org.apache.inlong.sdk.transform.process.Context;
import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
import org.apache.inlong.sdk.transform.process.parser.ValueParser;

import net.sf.jsqlparser.expression.Expression;
import net.sf.jsqlparser.expression.Function;

import java.util.List;
/*
* SplitIndexFunction
*
* Description:
* Split_index(string1, string2, integer) -> string
* Splits string1 by delimiter string2 and returns the string at the given index integer(zero-based).
* - Returns null if the index is negative or any of the arguments is null.
* - Returns null if the index is out of bounds of the split strings.
*
*/
@TransformFunction(names = {"split_index", "splitindex"})
public class SplitIndexFunction implements ValueParser {

private final ValueParser strParser;
private final ValueParser delimiterParser;
private final ValueParser indexParser;

public SplitIndexFunction(Function expr) {
List<Expression> expressions = expr.getParameters().getExpressions();
strParser = OperatorTools.buildParser(expressions.get(0));
delimiterParser = OperatorTools.buildParser(expressions.get(1));
indexParser = OperatorTools.buildParser(expressions.get(2));
}

@Override
public Object parse(SourceData sourceData, int rowIndex, Context context) {
Object strObject = strParser.parse(sourceData, rowIndex, context);
Object delimiterObject = delimiterParser.parse(sourceData, rowIndex, context);
Object indexObject = indexParser.parse(sourceData, rowIndex, context);

if (strObject == null || delimiterObject == null || indexObject == null) {
return null;
}

String str = OperatorTools.parseString(strObject);
String delimiter = OperatorTools.parseString(delimiterObject);
int index = OperatorTools.parseBigDecimal(indexObject).intValue();

if (str == null || delimiter == null || index < 0) {
return null;
}

String[] splitStrings = str.split(delimiter);
if (index >= splitStrings.length) {
return null;
}

return splitStrings[index];
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.inlong.sdk.transform.process.function.string;

import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory;
import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory;
import org.apache.inlong.sdk.transform.pojo.TransformConfig;
import org.apache.inlong.sdk.transform.process.TransformProcessor;

import org.junit.Assert;
import org.junit.Test;

import java.util.HashMap;
import java.util.List;

public class TestSplitIndexFunction extends AbstractFunctionStringTestBase {

@Test
public void testSplitIndexFunction() throws Exception {
String transformSql = "select split_index(string1, string2, numeric1) from source";
TransformConfig config = new TransformConfig(transformSql);
TransformProcessor<String, String> processor = TransformProcessor
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));

// case1: split_index('a,b,c', ',', 1)
String data = "a,b,c|,|cloud|1|3|3";
List<String> output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=b", output.get(0));

// case2: split_index('a,b,c', ',', -1)
data = "a,b,c|,|cloud|-1|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=null", output.get(0));

// case3: split_index('a,b,c', ',', 3)
data = "a,b,c|,|cloud|3|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=null", output.get(0));

// case4: split_index(null, ',', 1)
transformSql = "select split_index(xxd, string2, numeric1) from source";
config = new TransformConfig(transformSql);
processor = TransformProcessor
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
data = "abc|,|cloud|1|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=null", output.get(0));

// case5: split_index('a,b,c', null, 1)
transformSql = "select split_index(string1, xxd, numeric1) from source";
config = new TransformConfig(transformSql);
processor = TransformProcessor
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
data = "a,b,c|xxd|cloud|1|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=null", output.get(0));

// case6: split_index('a,b,c', ',', null)
transformSql = "select split_index(string1, string2, xxd) from source";
config = new TransformConfig(transformSql);
processor = TransformProcessor
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
data = "a,b,c|,|cloud|xxd|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=null", output.get(0));

// case7: split_index('', ',', 0)
transformSql = "select split_index(string1, string2, numeric1) from source";
config = new TransformConfig(transformSql);
processor = TransformProcessor
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
data = "|,|cloud|0|3|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=", output.get(0));
}
}

0 comments on commit 647f29d

Please sign in to comment.