Skip to content

Commit

Permalink
[INLONG-11237][SDK] Transform SQL supports CHAR_LENGTH function (#11275)
Browse files Browse the repository at this point in the history
Co-authored-by: ZKpLo <[email protected]>
  • Loading branch information
Zkplo and ZKpLo authored Oct 10, 2024
1 parent 80125f0 commit 4ba289d
Show file tree
Hide file tree
Showing 5 changed files with 187 additions and 24 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.inlong.sdk.transform.process.function;

import org.apache.inlong.sdk.transform.decode.SourceData;
import org.apache.inlong.sdk.transform.process.Context;
import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
import org.apache.inlong.sdk.transform.process.parser.ValueParser;

import net.sf.jsqlparser.expression.Expression;
import net.sf.jsqlparser.expression.Function;

import java.util.List;

/**
* LengthFunction
* description: char_length(string)
* - return the character length of the string
* - return NULL if the string is NULL
*/
@TransformFunction(names = {"char_length"})
public class CharLengthFunction implements ValueParser {

private final ValueParser stringParser;

public CharLengthFunction(Function expr) {
List<Expression> expressions = expr.getParameters().getExpressions();
stringParser = OperatorTools.buildParser(expressions.get(0));
}

@Override
public Object parse(SourceData sourceData, int rowIndex, Context context) {
Object stringObject = stringParser.parse(sourceData, rowIndex, context);
if (stringObject == null) {
return null;
}
String str = OperatorTools.parseString(stringObject);
return str.length();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,31 @@
import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
import org.apache.inlong.sdk.transform.process.parser.ValueParser;

import net.sf.jsqlparser.expression.Expression;
import net.sf.jsqlparser.expression.Function;

import java.nio.charset.Charset;
import java.util.List;

/**
* LengthFunction
* description: length(string)
* - return the length of the string
* description: length(string,[charsetName])
* - return the byte length of the string
* - return NULL if the string is NULL
*/
@TransformFunction(names = {"length"})
public class LengthFunction implements ValueParser {

private final ValueParser stringParser;
private ValueParser charSetNameParser;
private final Charset DEFAULT_CHARSET = Charset.defaultCharset();

public LengthFunction(Function expr) {
stringParser = OperatorTools.buildParser(expr.getParameters().getExpressions().get(0));
List<Expression> expressions = expr.getParameters().getExpressions();
stringParser = OperatorTools.buildParser(expressions.get(0));
if (expressions.size() > 1) {
charSetNameParser = OperatorTools.buildParser(expressions.get(1));
}
}

@Override
Expand All @@ -45,6 +55,12 @@ public Object parse(SourceData sourceData, int rowIndex, Context context) {
if (stringObject == null) {
return null;
}
return OperatorTools.parseString(stringObject).length();
Charset charset = DEFAULT_CHARSET;
if (charSetNameParser != null) {
charset = Charset.forName(OperatorTools.parseString(
charSetNameParser.parse(sourceData, rowIndex, context)));
}
String str = OperatorTools.parseString(stringObject);
return str.getBytes(charset).length;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.inlong.sdk.transform.process.function.string;

import org.apache.inlong.sdk.transform.decode.SourceDecoderFactory;
import org.apache.inlong.sdk.transform.encode.SinkEncoderFactory;
import org.apache.inlong.sdk.transform.pojo.TransformConfig;
import org.apache.inlong.sdk.transform.process.TransformProcessor;

import org.junit.Assert;
import org.junit.Test;

import java.util.HashMap;
import java.util.List;

public class TestCharLengthFunction extends AbstractFunctionStringTestBase {

@Test
public void testCharLengthFunction() throws Exception {
String transformSql = null, data = null;
TransformConfig config = null;
TransformProcessor<String, String> processor = null;
List<String> output = null;

transformSql = "select char_length(string1) from source";
config = new TransformConfig(transformSql);
processor = TransformProcessor
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
// case1: char_length('hello world')
data = "hello world|";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=11", output.get(0));

// case2: char_length('应龙')
data = "应龙|";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=2", output.get(0));

transformSql = "select char_length(xxd) from source";
config = new TransformConfig(transformSql);
processor = TransformProcessor
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
// case3: char_length(null)
data = "hello world|apple|cloud|2|1|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=", output.get(0));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,52 +32,52 @@ public class TestCompressFunction extends AbstractFunctionStringTestBase {

@Test
public void testCompressFunction() throws Exception {
String transformSql = "select length(compress(replicate(string1,100))) from source";
String transformSql = "select length(compress(replicate(string1,100)),'ISO_8859_1') from source";
TransformConfig config = new TransformConfig(transformSql);
TransformProcessor<String, String> processor1 = TransformProcessor
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
// case1: length(compress(replicate(string1,100)))
// case1: length(compress(replicate(string1,100)),'ISO_8859_1')
List<String> output1 = processor1.transform("abcdefghijk|apple|cloud|2|1|3", new HashMap<>());
Assert.assertEquals(1, output1.size());
Assert.assertEquals("result=33", output1.get(0));

transformSql = "select length(compress(string1)) from source";
transformSql = "select length(compress(string1),'ISO_8859_1') from source";
config = new TransformConfig(transformSql);
processor1 = TransformProcessor
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
// case2: length(compress(''))
// case2: length(compress(''),'ISO_8859_1')
output1 = processor1.transform("|apple|cloud|2|1|3", new HashMap<>());
Assert.assertEquals(1, output1.size());
Assert.assertEquals("result=0", output1.get(0));

transformSql = "select length(compress(xxd)) from source";
transformSql = "select length(compress(xxd),'ISO_8859_1') from source";
config = new TransformConfig(transformSql);
processor1 = TransformProcessor
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
// case3: length(compress(null))
// case3: length(compress(null),'ISO_8859_1')
output1 = processor1.transform("hello world|apple|cloud|2|1|3", new HashMap<>());
Assert.assertEquals(1, output1.size());
Assert.assertEquals("result=", output1.get(0));

transformSql = "select length(compress(string1,string2)) from source";
transformSql = "select length(compress(string1,string2),'ISO_8859_1') from source";
config = new TransformConfig(transformSql);
processor1 = TransformProcessor
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
// case4: length(compress('hello world','Gzip'))
// case4: length(compress('hello world','Gzip'),'ISO_8859_1')
output1 = processor1.transform("hello world|Gzip|cloud|2|1|3", new HashMap<>());
Assert.assertEquals(1, output1.size());
Assert.assertEquals("result=35", output1.get(0));

// case5: length(compress('hello world','zip'))
// case5: length(compress('hello world','zip'),'ISO_8859_1')
output1 = processor1.transform("hello world|zip|cloud|2|1|3", new HashMap<>());
Assert.assertEquals(1, output1.size());
Assert.assertEquals("result=131", output1.get(0));

// case5: length(compress('hello world','undefinedType'))
// case5: length(compress('hello world','undefinedType'),'ISO_8859_1')
output1 = processor1.transform("hello world|undefinedType|cloud|2|1|3", new HashMap<>());
Assert.assertEquals(1, output1.size());
Assert.assertEquals("result=", output1.get(0));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,24 +32,48 @@ public class TestLengthFunction extends AbstractFunctionStringTestBase {

@Test
public void testLengthFunction() throws Exception {
String transformSql = "select length(string1) from source";
TransformConfig config = new TransformConfig(transformSql);
TransformProcessor<String, String> processor1 = TransformProcessor
String transformSql = null, data = null;
TransformConfig config = null;
TransformProcessor<String, String> processor = null;
List<String> output = null;

transformSql = "select length(string1) from source";
config = new TransformConfig(transformSql);
processor = TransformProcessor
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
// case1: length('hello world')
List<String> output1 = processor1.transform("hello world|apple|cloud|2|1|3", new HashMap<>());
Assert.assertEquals(1, output1.size());
Assert.assertEquals("result=11", output1.get(0));
data = "hello world|apple|cloud|2|1|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=11", output.get(0));

transformSql = "select length(xxd) from source";
config = new TransformConfig(transformSql);
processor1 = TransformProcessor
processor = TransformProcessor
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
// case2: length(null)
output1 = processor1.transform("hello world|apple|cloud|2|1|3", new HashMap<>());
Assert.assertEquals(1, output1.size());
Assert.assertEquals("result=", output1.get(0));
data = "hello world|apple|cloud|2|1|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=", output.get(0));

transformSql = "select length(string1,string2) from source";
config = new TransformConfig(transformSql);
processor = TransformProcessor
.create(config, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
// case3: length(应龙, utf-8)
data = "应龙|utf-8|cloud|2|1|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=6", output.get(0));

// case4: length(应龙, gbk)
data = "应龙|gbk|cloud|2|1|3";
output = processor.transform(data, new HashMap<>());
Assert.assertEquals(1, output.size());
Assert.assertEquals("result=4", output.get(0));
}
}

0 comments on commit 4ba289d

Please sign in to comment.