From 91582d2180ab0053d1e40e8a6ba713b5f1eb6905 Mon Sep 17 00:00:00 2001 From: emptyOVO Date: Sat, 7 Sep 2024 00:14:08 +0800 Subject: [PATCH] [INLONG-11037][SDK] Transform support ENCODE() and DECODE() function --- .../process/function/DecodeFunction.java | 68 ++++++++++++++++ .../process/function/EncodeFunction.java | 70 ++++++++++++++++ .../function/string/TestDecodeFunction.java | 80 +++++++++++++++++++ .../function/string/TestEncodeFunction.java | 76 ++++++++++++++++++ 4 files changed, 294 insertions(+) create mode 100644 inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/DecodeFunction.java create mode 100644 inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/EncodeFunction.java create mode 100644 inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestDecodeFunction.java create mode 100644 inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestEncodeFunction.java diff --git a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/DecodeFunction.java b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/DecodeFunction.java new file mode 100644 index 00000000000..ad3ab2e0a20 --- /dev/null +++ b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/DecodeFunction.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function; + +import org.apache.inlong.sdk.transform.decode.SourceData; +import org.apache.inlong.sdk.transform.process.Context; +import org.apache.inlong.sdk.transform.process.operator.OperatorTools; +import org.apache.inlong.sdk.transform.process.parser.ValueParser; + +import net.sf.jsqlparser.expression.Expression; +import net.sf.jsqlparser.expression.Function; + +import java.nio.charset.Charset; +import java.util.List; +@TransformFunction(names = {"decode"}) +public class DecodeFunction implements ValueParser { + + private ValueParser binaryParser; + + private ValueParser characterSetParser; + + public DecodeFunction(Function expr) { + List expressions = expr.getParameters().getExpressions(); + if (expressions != null && expressions.size() == 2) { + binaryParser = OperatorTools.buildParser(expressions.get(0)); + characterSetParser = OperatorTools.buildParser(expressions.get(1)); + } + } + + @Override + public Object parse(SourceData sourceData, int rowIndex, Context context) { + String binaryString = OperatorTools.parseString(binaryParser.parse(sourceData, rowIndex, context)); + String characterSetValue = + OperatorTools.parseString(characterSetParser.parse(sourceData, rowIndex, context)).toUpperCase(); + return decode(binaryString, characterSetValue); + } + + private String decode(String binaryString, String charsetName) { + if (binaryString == null || binaryString.isEmpty() || charsetName == null || charsetName.isEmpty()) { + return ""; + } + String[] byteValues = binaryString.split(" "); + byte[] byteArray = new byte[byteValues.length]; + for (int i = 0; i < byteValues.length; i++) { + byteArray[i] = (byte) Integer.parseInt(byteValues[i]); + } + if (Charset.isSupported(charsetName)) { + Charset charset = Charset.forName(charsetName); + return new String(byteArray, charset); + } + return ""; + } +} diff --git a/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/EncodeFunction.java b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/EncodeFunction.java new file mode 100644 index 00000000000..021f5950794 --- /dev/null +++ b/inlong-sdk/transform-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/EncodeFunction.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function; + +import org.apache.inlong.sdk.transform.decode.SourceData; +import org.apache.inlong.sdk.transform.process.Context; +import org.apache.inlong.sdk.transform.process.operator.OperatorTools; +import org.apache.inlong.sdk.transform.process.parser.ValueParser; + +import net.sf.jsqlparser.expression.Expression; +import net.sf.jsqlparser.expression.Function; + +import java.nio.charset.Charset; +import java.util.List; +@TransformFunction(names = {"encode"}) +public class EncodeFunction implements ValueParser { + + private ValueParser stringParser; + + private ValueParser characterSetParser; + + public EncodeFunction(Function expr) { + List expressions = expr.getParameters().getExpressions(); + if (expressions != null && expressions.size() == 2) { + stringParser = OperatorTools.buildParser(expressions.get(0)); + characterSetParser = OperatorTools.buildParser(expressions.get(1)); + } + } + + @Override + public Object parse(SourceData sourceData, int rowIndex, Context context) { + String stringValue = OperatorTools.parseString(stringParser.parse(sourceData, rowIndex, context)); + String characterSetValue = + OperatorTools.parseString(characterSetParser.parse(sourceData, rowIndex, context)).toUpperCase(); + byte[] encodeBytes = encode(stringValue, characterSetValue); + StringBuilder res = new StringBuilder(); + if (encodeBytes != null) { + for (byte encodeByte : encodeBytes) { + res.append((int) encodeByte).append(" "); + } + } + return res.toString().trim(); + } + + private byte[] encode(String stringValue, String characterSetValue) { + if (stringValue == null || stringValue.isEmpty() || characterSetValue == null || characterSetValue.isEmpty()) { + return new byte[0]; + } + if (Charset.isSupported(characterSetValue)) { + Charset charset = Charset.forName(characterSetValue); + return stringValue.getBytes(charset); + } + return null; + } +} diff --git a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestDecodeFunction.java b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestDecodeFunction.java new file mode 100644 index 00000000000..4368334b790 --- /dev/null +++ b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestDecodeFunction.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function.string; + +import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory; +import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory; +import org.apache.inlong.sdk.transform.pojo.TransformConfig; +import org.apache.inlong.sdk.transform.process.TransformProcessor; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; +import java.util.List; + +public class TestDecodeFunction extends AbstractFunctionStringTestBase { + + @Test + public void testDecodeFunction() throws Exception { + String transformSql = "select decode(string1,string2) from source"; + TransformConfig config = new TransformConfig(transformSql); + TransformProcessor processor = TransformProcessor + .create(config, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + + // case1: decode('72 101 108 108 111','UTF-8') + List output1 = processor.transform("72 101 108 108 111|UTF-8|banana|cloud|1", new HashMap<>()); + Assert.assertEquals(1, output1.size()); + Assert.assertEquals(output1.get(0), "result=Hello"); + + // case2: decode('72 101 108 108 111','US-ASCII') + List output2 = processor.transform("72 101 108 108 111|US-ASCII|banana|cloud|1", new HashMap<>()); + Assert.assertEquals(1, output2.size()); + Assert.assertEquals(output2.get(0), "result=Hello"); + + // case3: decode('72 101 108 108 111','ISO-8859-1') + List output3 = processor.transform("72 101 108 108 111|ISO-8859-1|banana|cloud|1", new HashMap<>()); + Assert.assertEquals(1, output3.size()); + Assert.assertEquals(output3.get(0), "result=Hello"); + + // case4: decode('0 72 0 101 0 108 0 108 0 111','UTF-16BE') + List output4 = + processor.transform("0 72 0 101 0 108 0 108 0 111|UTF-16BE|banana|cloud|1", new HashMap<>()); + Assert.assertEquals(1, output4.size()); + Assert.assertEquals(output4.get(0), "result=Hello"); + + // case5: decode('72 0 101 0 108 0 108 0 111 0','UTF-16LE') + List output5 = + processor.transform("72 0 101 0 108 0 108 0 111 0|UTf-16LE|banana|cloud|1", new HashMap<>()); + Assert.assertEquals(1, output5.size()); + Assert.assertEquals(output5.get(0), "result=Hello"); + + // case6: decode('-2 -1 0 72 0 101 0 108 0 108 0 111','UTF-16') + List output6 = + processor.transform("-2 -1 0 72 0 101 0 108 0 108 0 111|UtF-16|banana|cloud|1", new HashMap<>()); + Assert.assertEquals(1, output6.size()); + Assert.assertEquals(output6.get(0), "result=Hello"); + + // case7: decode('-2 -1 0 72 0 101 0 108 0 108 0 111','UTF-16--') + List output7 = + processor.transform("-2 -1 0 72 0 101 0 108 0 108 0 111|UTF-16--|banana|cloud|1", new HashMap<>()); + Assert.assertEquals(1, output7.size()); + Assert.assertEquals(output7.get(0), "result="); + } +} diff --git a/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestEncodeFunction.java b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestEncodeFunction.java new file mode 100644 index 00000000000..73ff2f48768 --- /dev/null +++ b/inlong-sdk/transform-sdk/src/test/java/org/apache/inlong/sdk/transform/process/function/string/TestEncodeFunction.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.inlong.sdk.transform.process.function.string; + +import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory; +import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory; +import org.apache.inlong.sdk.transform.pojo.TransformConfig; +import org.apache.inlong.sdk.transform.process.TransformProcessor; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; +import java.util.List; + +public class TestEncodeFunction extends AbstractFunctionStringTestBase { + + @Test + public void testEncodeFunction() throws Exception { + String transformSql = "select encode(string1,string2) from source"; + TransformConfig config = new TransformConfig(transformSql); + TransformProcessor processor = TransformProcessor + .create(config, SourceDecoderFactory.createCsvDecoder(csvSource), + SinkEncoderFactory.createKvEncoder(kvSink)); + + // case1: encode('Hello','UTF-8') + List output1 = processor.transform("Hello|UTF-8|banana|cloud|1", new HashMap<>()); + Assert.assertEquals(1, output1.size()); + Assert.assertEquals(output1.get(0), "result=72 101 108 108 111"); + + // case2: encode('Hello','US-ASCII') + List output2 = processor.transform("Hello|US-ASCII|banana|cloud|1", new HashMap<>()); + Assert.assertEquals(1, output2.size()); + Assert.assertEquals(output2.get(0), "result=72 101 108 108 111"); + + // case3: encode('Hello','ISO-8859-1') + List output3 = processor.transform("Hello|ISO-8859-1|banana|cloud|1", new HashMap<>()); + Assert.assertEquals(1, output3.size()); + Assert.assertEquals(output3.get(0), "result=72 101 108 108 111"); + + // case4: encode('Hello','UTF-16BE') + List output4 = processor.transform("Hello|UTF-16BE|banana|cloud|1", new HashMap<>()); + Assert.assertEquals(1, output4.size()); + Assert.assertEquals(output4.get(0), "result=0 72 0 101 0 108 0 108 0 111"); + + // case5: encode('Hello','UTF-16LE') + List output5 = processor.transform("Hello|UTf-16LE|banana|cloud|1", new HashMap<>()); + Assert.assertEquals(1, output5.size()); + Assert.assertEquals(output5.get(0), "result=72 0 101 0 108 0 108 0 111 0"); + + // case6: encode('Hello','UTF-16') + List output6 = processor.transform("Hello|UtF-16|banana|cloud|1", new HashMap<>()); + Assert.assertEquals(1, output6.size()); + Assert.assertEquals(output6.get(0), "result=-2 -1 0 72 0 101 0 108 0 108 0 111"); + + // case7: encode('Hello','UTF-16--') + List output7 = processor.transform("Hello|UTF-16--|banana|cloud|1", new HashMap<>()); + Assert.assertEquals(1, output7.size()); + Assert.assertEquals(output7.get(0), "result="); + } +}