Skip to content

Commit

Permalink
[INLONG-10832][SDK] Transform SQL support Translate function (apache#…
Browse files Browse the repository at this point in the history
…10863)

Co-authored-by: AloysZhang <[email protected]>
  • Loading branch information
lianghuan-xatu and aloyszhang authored Aug 28, 2024
1 parent 1394dcd commit 04971c3
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.inlong.sdk.transform.process.function;

import org.apache.inlong.sdk.transform.decode.SourceData;
import org.apache.inlong.sdk.transform.process.Context;
import org.apache.inlong.sdk.transform.process.operator.OperatorTools;
import org.apache.inlong.sdk.transform.process.parser.ValueParser;

import net.sf.jsqlparser.expression.Expression;
import net.sf.jsqlparser.expression.Function;

import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
* TranslateFunction
* translate(expression, find_chars, replace_chars)
* Description:
* For a given expression, replaces all occurrences of specified characters with specified substitutes.
* Existing characters are mapped to replacement characters by their positions in the find_chars and replace_chars arguments.
* If more characters are specified in the find_chars argument than in the replace_chars argument, the extra characters from the find_chars argument are omitted in the return value.
*
* Translate function is similar to the replace function and the regexp_replace function,
* except that replace substitutes one entire string with another string and regexp_replace lets you search a string for a regular expression pattern,
* while translate makes multiple single-character substitutions.
*
* Arguments:
* expression: The expression to be translated.
* find_chars: A string containing the characters to be replaced.
* replace_chars: A string containing the characters to substitute.
* examples:
* case1: translate(email, '@', '.') -> original_expression: [email protected] target_expression: harry.inlong.com
* case2: translate(hello WorD, 'WD', 'wd') -> original_expression: hello WorD target_expression: hello word
*/
public class TranslateFunction implements ValueParser {

private ValueParser originalStrParser;

private ValueParser findCharsParser;

private ValueParser replaceCharsParser;

public TranslateFunction(Function expr) {
List<Expression> expressions = expr.getParameters().getExpressions();
originalStrParser = OperatorTools.buildParser(expressions.get(0));
findCharsParser = OperatorTools.buildParser(expressions.get(1));
replaceCharsParser = OperatorTools.buildParser(expressions.get(2));
}

@Override
public Object parse(SourceData sourceData, int rowIndex, Context context) {
Object originalStrObject = originalStrParser.parse(sourceData, rowIndex, context);
Object findCharsObject = findCharsParser.parse(sourceData, rowIndex, context);
Object replaceCharsObject = replaceCharsParser.parse(sourceData, rowIndex, context);
String originalStr = OperatorTools.parseString(originalStrObject);
String findChars = OperatorTools.parseString(findCharsObject);
String replaceChars = OperatorTools.parseString(replaceCharsObject);

if (originalStr == null) {
return "";
}
StringBuilder builder = null;
final int findSize = findChars == null ? 0 : findChars.length();
final int replaceSize = replaceChars == null ? 0 : replaceChars.length();
final int commonSize = Math.min(findSize, replaceSize);
// Create a map to store character replacements
Map<Character, Character> replacementMap = new HashMap<>();
for (int i = 0; i < commonSize; i++) {
char findChar = findChars.charAt(i);
char replaceChar = replaceChars.charAt(i);
replacementMap.put(findChar, replaceChar);
}
for (int i = 0, size = originalStr.length(); i < size; i++) {
char ch = originalStr.charAt(i);
if (replacementMap.containsKey(ch)) {
// Find the index of the current character in findChars,
// and replace the character at that index with the character at the same index in replaceChars.
if (builder == null) {
builder = new StringBuilder(size);
if (i > 0) {
builder.append(originalStr, 0, i);
}
}
ch = replacementMap.get(ch);
}
if (builder != null) {
builder.append(ch);
}
}
return builder == null ? originalStr : builder.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
import org.apache.inlong.sdk.transform.process.function.ToBase64Function;
import org.apache.inlong.sdk.transform.process.function.ToDateFunction;
import org.apache.inlong.sdk.transform.process.function.ToTimestampFunction;
import org.apache.inlong.sdk.transform.process.function.TranslateFunction;
import org.apache.inlong.sdk.transform.process.function.TrimFunction;
import org.apache.inlong.sdk.transform.process.function.UnixTimestampFunction;
import org.apache.inlong.sdk.transform.process.function.UpperFunction;
Expand Down Expand Up @@ -111,7 +112,7 @@

/**
* OperatorTools
*
*
*/
public class OperatorTools {

Expand Down Expand Up @@ -179,6 +180,7 @@ public class OperatorTools {
functionMap.put("right", RightFunction::new);
functionMap.put("timestampadd", TimestampAddFunction::new);
functionMap.put("md5", Md5Function::new);
functionMap.put("translate", TranslateFunction::new);
}

public static ExpressionOperator buildOperator(Expression expr) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -442,4 +442,31 @@ public void testLeftFunction() throws Exception {
Assert.assertEquals(1, output1.size());
Assert.assertEquals("result=null", output1.get(0));
}

@Test
public void testTranslateFunction() throws Exception {
String transformSql1 = "select translate(string1, string2, string3) from source";
TransformConfig config1 = new TransformConfig(transformSql1);
TransformProcessor<String, String> processor1 = TransformProcessor
.create(config1, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
// case1: translate("hello word!", "el", "EL")
List<String> output1 = processor1.transform("hello word!|el|EL|2|1|3", new HashMap<>());
Assert.assertEquals(1, output1.size());
Assert.assertEquals(output1.get(0), "result=hELLo word!");
String transformSql2 = "select translate(string3, string1, string2) from source";
TransformConfig config2 = new TransformConfig(transformSql2);
TransformProcessor<String, String> processor2 = TransformProcessor
.create(config2, SourceDecoderFactory.createCsvDecoder(csvSource),
SinkEncoderFactory.createKvEncoder(kvSink));
// case2: translate("hello word!", "el", "EL")
List<String> output2 = processor2.transform("el|EL|hello word!|1|1|3", new HashMap<>());
Assert.assertEquals(1, output2.size());
Assert.assertEquals(output2.get(0), "result=hELLo word!");
// case3: translate('ApaCHe Inlong', CH, ch)
List<String> output3 = processor2.transform("CH|ch|ApaCHe Inlong|2|1|9", new HashMap<>());
Assert.assertEquals(1, output3.size());
Assert.assertEquals(output3.get(0), "result=Apache Inlong");
}

}

0 comments on commit 04971c3

Please sign in to comment.