diff --git a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/DecodeFunction.java b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/DecodeFunction.java new file mode 100644 index 00000000000..042e6b4f98d --- /dev/null +++ b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/DecodeFunction.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function; + +import org.apache.inlong.sdk.transform.decode.SourceData; +import org.apache.inlong.sdk.transform.process.Context; +import org.apache.inlong.sdk.transform.process.operator.OperatorTools; +import org.apache.inlong.sdk.transform.process.parser.ValueParser; + +import net.sf.jsqlparser.expression.Expression; +import net.sf.jsqlparser.expression.Function; + +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +/** + * DecodeFunction + * description: decode(binary, string) + * Decode using the supplied character set (' US-ASCII ', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16'). + * If either parameter is empty, the result will also be empty. + */ +@TransformFunction(names = {"decode"}) +public class DecodeFunction implements ValueParser { + + private ValueParser binaryParser; + + private ValueParser characterSetParser; + + private static final Set SUPPORTED_CHARSETS; + + static { + Set charsets = new HashSet<>(); + charsets.add(StandardCharsets.US_ASCII.name()); + charsets.add(StandardCharsets.ISO_8859_1.name()); + charsets.add(StandardCharsets.UTF_8.name()); + charsets.add(StandardCharsets.UTF_16.name()); + charsets.add(StandardCharsets.UTF_16BE.name()); + charsets.add(StandardCharsets.UTF_16LE.name()); + SUPPORTED_CHARSETS = Collections.unmodifiableSet(charsets); + } + + public DecodeFunction(Function expr) { + List expressions = expr.getParameters().getExpressions(); + if (expressions != null && expressions.size() == 2) { + binaryParser = OperatorTools.buildParser(expressions.get(0)); + characterSetParser = OperatorTools.buildParser(expressions.get(1)); + } + } + + @Override + public Object parse(SourceData sourceData, int rowIndex, Context context) { + Object binaryObj = binaryParser.parse(sourceData, rowIndex, context); + Object characterObj = characterSetParser.parse(sourceData, rowIndex, context); + if (binaryObj == null || characterObj == null) { + return null; + } + String binaryString = OperatorTools.parseString(binaryObj); + String characterSetValue = OperatorTools.parseString(characterObj).toUpperCase(); + return decode(binaryString, characterSetValue); + } + + private String decode(String binaryString, String charsetName) { + if (binaryString == null || binaryString.isEmpty() || charsetName == null || charsetName.isEmpty()) { + return ""; + } + String[] byteValues = binaryString.split(" "); + byte[] byteArray = new byte[byteValues.length]; + for (int i = 0; i < byteValues.length; i++) { + byteArray[i] = (byte) Integer.parseInt(byteValues[i]); + } + if (Charset.isSupported(charsetName) && SUPPORTED_CHARSETS.contains(charsetName)) { + Charset charset = Charset.forName(charsetName); + return new String(byteArray, charset); + } + return ""; + } +} diff --git a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/EncodeFunction.java b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/EncodeFunction.java new file mode 100644 index 00000000000..8196c529fcd --- /dev/null +++ b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/EncodeFunction.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function; + +import org.apache.inlong.sdk.transform.decode.SourceData; +import org.apache.inlong.sdk.transform.process.Context; +import org.apache.inlong.sdk.transform.process.operator.OperatorTools; +import org.apache.inlong.sdk.transform.process.parser.ValueParser; + +import net.sf.jsqlparser.expression.Expression; +import net.sf.jsqlparser.expression.Function; + +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +/** + * EncodeFunction + * description: encode(string1, string2) + * Encode using the provided character set (' US-ASCII ', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16'). + * If either parameter is empty, the result will also be empty. + */ +@TransformFunction(names = {"encode"}) +public class EncodeFunction implements ValueParser { + + private ValueParser stringParser; + + private ValueParser characterSetParser; + + private static final Set SUPPORTED_CHARSETS; + + static { + Set charsets = new HashSet<>(); + charsets.add(StandardCharsets.US_ASCII.name()); + charsets.add(StandardCharsets.ISO_8859_1.name()); + charsets.add(StandardCharsets.UTF_8.name()); + charsets.add(StandardCharsets.UTF_16.name()); + charsets.add(StandardCharsets.UTF_16BE.name()); + charsets.add(StandardCharsets.UTF_16LE.name()); + SUPPORTED_CHARSETS = Collections.unmodifiableSet(charsets); + } + + public EncodeFunction(Function expr) { + List expressions = expr.getParameters().getExpressions(); + if (expressions != null && expressions.size() == 2) { + stringParser = OperatorTools.buildParser(expressions.get(0)); + characterSetParser = OperatorTools.buildParser(expressions.get(1)); + } + } + + @Override + public Object parse(SourceData sourceData, int rowIndex, Context context) { + Object stringObj = stringParser.parse(sourceData, rowIndex, context); + Object characterObj = characterSetParser.parse(sourceData, rowIndex, context); + if (stringObj == null || characterObj == null) { + return null; + } + String stringValue = OperatorTools.parseString(stringObj); + String characterSetValue = OperatorTools.parseString(characterObj).toUpperCase(); + byte[] encodeBytes = encode(stringValue, characterSetValue); + StringBuilder res = new StringBuilder(); + if (encodeBytes != null) { + for (byte encodeByte : encodeBytes) { + res.append((int) encodeByte).append(" "); + } + } + return res.toString().trim(); + } + + private byte[] encode(String stringValue, String characterSetValue) { + if (stringValue == null || stringValue.isEmpty() || characterSetValue == null || characterSetValue.isEmpty()) { + return new byte[0]; + } + if (Charset.isSupported(characterSetValue) && SUPPORTED_CHARSETS.contains(characterSetValue)) { + Charset charset = Charset.forName(characterSetValue); + return stringValue.getBytes(charset); + } + return null; + } +} diff --git a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/TruncateFunction.java b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/TruncateFunction.java new file mode 100644 index 00000000000..8267efff552 --- /dev/null +++ b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/TruncateFunction.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function; + +import org.apache.inlong.sdk.transform.decode.SourceData; +import org.apache.inlong.sdk.transform.process.Context; +import org.apache.inlong.sdk.transform.process.operator.OperatorTools; +import org.apache.inlong.sdk.transform.process.parser.ValueParser; + +import net.sf.jsqlparser.expression.Expression; +import net.sf.jsqlparser.expression.Function; + +import java.math.BigDecimal; +import java.math.RoundingMode; +import java.util.List; +/** + * TruncateFunction + * description: returns the number that intercepts integer2 decimal places. + * If numeric1 or integer2 is NULL, NULL is returned. + * If integer2 is 0, the result has no decimal point or fractional part. + * integer2 can be negative, making the integer2 digit to the left of the decimal point of the value zero. + * This function can also be used by passing only one numeric1 argument without setting Integer2. + * If Integer2 is not set, Integer2 is 0 + * for example: truncate(42.324, 2)--return 42.32 + * truncate(42.324)--return 42.0 + */ +@TransformFunction(names = {"truncate"}) +public class TruncateFunction implements ValueParser { + + private ValueParser bigDecimalParser; + + private ValueParser integerParser; + + public TruncateFunction(Function expr) { + List expressions = expr.getParameters().getExpressions(); + if (expressions != null) { + bigDecimalParser = OperatorTools.buildParser(expressions.get(0)); + if (expressions.size() >= 2) { + integerParser = OperatorTools.buildParser(expressions.get(1)); + } + } + } + + @Override + public Object parse(SourceData sourceData, int rowIndex, Context context) { + Object bigDecimalObj = bigDecimalParser.parse(sourceData, rowIndex, context); + BigDecimal bigDecimal = OperatorTools.parseBigDecimal(bigDecimalObj); + if (integerParser != null) { + Object integerObj = integerParser.parse(sourceData, rowIndex, context); + int integer = OperatorTools.parseBigDecimal(integerObj).intValue(); + return truncate(bigDecimal, integer); + } + return truncate(bigDecimal); + } + + private BigDecimal truncate(BigDecimal numeric1, Integer integer2) { + if (numeric1 == null || integer2 == null) { + return null; + } + if (integer2 < 0) { + BigDecimal scaled = numeric1.movePointLeft(-integer2); + BigDecimal truncated = scaled.setScale(0, RoundingMode.DOWN); + return truncated.movePointRight(-integer2); + } + return numeric1.setScale(integer2, RoundingMode.DOWN); + } + + private BigDecimal truncate(BigDecimal numeric1) { + if (numeric1 == null) { + return null; + } + return numeric1.setScale(0, RoundingMode.DOWN); + } + +} diff --git a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/arithmetic/TestTruncateFunction.java b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/arithmetic/TestTruncateFunction.java new file mode 100644 index 00000000000..ddc3cabf7cf --- /dev/null +++ b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/arithmetic/TestTruncateFunction.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function.arithmetic; + +import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory; +import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory; +import org.apache.inlong.sdk.transform.pojo.TransformConfig; +import org.apache.inlong.sdk.transform.process.TransformProcessor; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.List; + +public class TestTruncateFunction extends AbstractFunctionArithmeticTestBase { + + @Test + public void testTruncateFunction() throws Exception { + String transformSql1 = "select truncate(numeric1,numeric2) from source"; + TransformConfig config1 = new TransformConfig(transformSql1); + TransformProcessor processor1 = TransformProcessor + .create(config1, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + + // case1: truncate(42.324, 2) + List output1 = processor1.transform("42.324|2|6|8"); + Assert.assertEquals(1, output1.size()); + Assert.assertEquals(output1.get(0), "result=42.32"); + + // case2: truncate(42.324, -1) + List output2 = processor1.transform("42.324|-1|6|8"); + Assert.assertEquals(1, output2.size()); + Assert.assertEquals(output2.get(0), "result=40"); + + // case3: truncate(12345.6789, -3) + List output3 = processor1.transform("12345.6789|-3|6|8"); + Assert.assertEquals(1, output3.size()); + Assert.assertEquals(output3.get(0), "result=12000"); + + String transformSql2 = "select truncate(numeric1) from source"; + TransformConfig config2 = new TransformConfig(transformSql2); + TransformProcessor processor2 = TransformProcessor + .create(config2, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + + // case4: truncate(12345) + List output4 = processor2.transform("12345.6789|-3|6|8"); + Assert.assertEquals(1, output4.size()); + Assert.assertEquals(output4.get(0), "result=12345"); + } +} diff --git a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestDecodeFunction.java b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestDecodeFunction.java new file mode 100644 index 00000000000..4368334b790 --- /dev/null +++ b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestDecodeFunction.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function.string; + +import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory; +import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory; +import org.apache.inlong.sdk.transform.pojo.TransformConfig; +import org.apache.inlong.sdk.transform.process.TransformProcessor; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; +import java.util.List; + +public class TestDecodeFunction extends AbstractFunctionStringTestBase { + + @Test + public void testDecodeFunction() throws Exception { + String transformSql = "select decode(string1,string2) from source"; + TransformConfig config = new TransformConfig(transformSql); + TransformProcessor processor = TransformProcessor + .create(config, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + + // case1: decode('72 101 108 108 111','UTF-8') + List output1 = processor.transform("72 101 108 108 111|UTF-8|banana|cloud|1", new HashMap<>()); + Assert.assertEquals(1, output1.size()); + Assert.assertEquals(output1.get(0), "result=Hello"); + + // case2: decode('72 101 108 108 111','US-ASCII') + List output2 = processor.transform("72 101 108 108 111|US-ASCII|banana|cloud|1", new HashMap<>()); + Assert.assertEquals(1, output2.size()); + Assert.assertEquals(output2.get(0), "result=Hello"); + + // case3: decode('72 101 108 108 111','ISO-8859-1') + List output3 = processor.transform("72 101 108 108 111|ISO-8859-1|banana|cloud|1", new HashMap<>()); + Assert.assertEquals(1, output3.size()); + Assert.assertEquals(output3.get(0), "result=Hello"); + + // case4: decode('0 72 0 101 0 108 0 108 0 111','UTF-16BE') + List output4 = + processor.transform("0 72 0 101 0 108 0 108 0 111|UTF-16BE|banana|cloud|1", new HashMap<>()); + Assert.assertEquals(1, output4.size()); + Assert.assertEquals(output4.get(0), "result=Hello"); + + // case5: decode('72 0 101 0 108 0 108 0 111 0','UTF-16LE') + List output5 = + processor.transform("72 0 101 0 108 0 108 0 111 0|UTf-16LE|banana|cloud|1", new HashMap<>()); + Assert.assertEquals(1, output5.size()); + Assert.assertEquals(output5.get(0), "result=Hello"); + + // case6: decode('-2 -1 0 72 0 101 0 108 0 108 0 111','UTF-16') + List output6 = + processor.transform("-2 -1 0 72 0 101 0 108 0 108 0 111|UtF-16|banana|cloud|1", new HashMap<>()); + Assert.assertEquals(1, output6.size()); + Assert.assertEquals(output6.get(0), "result=Hello"); + + // case7: decode('-2 -1 0 72 0 101 0 108 0 108 0 111','UTF-16--') + List output7 = + processor.transform("-2 -1 0 72 0 101 0 108 0 108 0 111|UTF-16--|banana|cloud|1", new HashMap<>()); + Assert.assertEquals(1, output7.size()); + Assert.assertEquals(output7.get(0), "result="); + } +} diff --git a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestEncodeFunction.java b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestEncodeFunction.java new file mode 100644 index 00000000000..73ff2f48768 --- /dev/null +++ b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestEncodeFunction.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function.string; + +import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory; +import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory; +import org.apache.inlong.sdk.transform.pojo.TransformConfig; +import org.apache.inlong.sdk.transform.process.TransformProcessor; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; +import java.util.List; + +public class TestEncodeFunction extends AbstractFunctionStringTestBase { + + @Test + public void testEncodeFunction() throws Exception { + String transformSql = "select encode(string1,string2) from source"; + TransformConfig config = new TransformConfig(transformSql); + TransformProcessor processor = TransformProcessor + .create(config, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + + // case1: encode('Hello','UTF-8') + List output1 = processor.transform("Hello|UTF-8|banana|cloud|1", new HashMap<>()); + Assert.assertEquals(1, output1.size()); + Assert.assertEquals(output1.get(0), "result=72 101 108 108 111"); + + // case2: encode('Hello','US-ASCII') + List output2 = processor.transform("Hello|US-ASCII|banana|cloud|1", new HashMap<>()); + Assert.assertEquals(1, output2.size()); + Assert.assertEquals(output2.get(0), "result=72 101 108 108 111"); + + // case3: encode('Hello','ISO-8859-1') + List output3 = processor.transform("Hello|ISO-8859-1|banana|cloud|1", new HashMap<>()); + Assert.assertEquals(1, output3.size()); + Assert.assertEquals(output3.get(0), "result=72 101 108 108 111"); + + // case4: encode('Hello','UTF-16BE') + List output4 = processor.transform("Hello|UTF-16BE|banana|cloud|1", new HashMap<>()); + Assert.assertEquals(1, output4.size()); + Assert.assertEquals(output4.get(0), "result=0 72 0 101 0 108 0 108 0 111"); + + // case5: encode('Hello','UTF-16LE') + List output5 = processor.transform("Hello|UTf-16LE|banana|cloud|1", new HashMap<>()); + Assert.assertEquals(1, output5.size()); + Assert.assertEquals(output5.get(0), "result=72 0 101 0 108 0 108 0 111 0"); + + // case6: encode('Hello','UTF-16') + List output6 = processor.transform("Hello|UtF-16|banana|cloud|1", new HashMap<>()); + Assert.assertEquals(1, output6.size()); + Assert.assertEquals(output6.get(0), "result=-2 -1 0 72 0 101 0 108 0 108 0 111"); + + // case7: encode('Hello','UTF-16--') + List output7 = processor.transform("Hello|UTF-16--|banana|cloud|1", new HashMap<>()); + Assert.assertEquals(1, output7.size()); + Assert.assertEquals(output7.get(0), "result="); + } +}