forked from apache/inlong
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[INLONG-11060][SDK] Transform support REGEXP_...() related functions
- Loading branch information
Showing
14 changed files
with
1,075 additions
and
0 deletions.
There are no files selected for viewing
71 changes: 71 additions & 0 deletions
71
...k/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpCountFunction.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.inlong.sdk.transform.process.function; | ||
|
||
import org.apache.inlong.sdk.transform.decode.SourceData; | ||
import org.apache.inlong.sdk.transform.process.Context; | ||
import org.apache.inlong.sdk.transform.process.operator.OperatorTools; | ||
import org.apache.inlong.sdk.transform.process.parser.ValueParser; | ||
|
||
import net.sf.jsqlparser.expression.Expression; | ||
import net.sf.jsqlparser.expression.Function; | ||
|
||
import java.util.List; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
|
||
/** | ||
* RegexpCountFunction | ||
* description: REGEXP_COUNT(str, regexp)--Returns the number of times str matches the regexp pattern. | ||
* regexp must be a Java regular expression. | ||
* Returns an INTEGER representation of the number of matches. | ||
* NULL if any of the arguments are NULL or regexp is invalid. | ||
*/ | ||
@TransformFunction(names = {"regexp_count"}) | ||
public class RegexpCountFunction implements ValueParser { | ||
|
||
private ValueParser inputStringParser; | ||
|
||
private ValueParser patternStringParser; | ||
|
||
public RegexpCountFunction(Function expr) { | ||
if (expr.getParameters() != null) { | ||
List<Expression> expressions = expr.getParameters().getExpressions(); | ||
if (expressions != null && expressions.size() >= 2) { | ||
inputStringParser = OperatorTools.buildParser(expressions.get(0)); | ||
patternStringParser = OperatorTools.buildParser(expressions.get(1)); | ||
} | ||
} | ||
} | ||
|
||
@Override | ||
public Object parse(SourceData sourceData, int rowIndex, Context context) { | ||
if (inputStringParser == null || patternStringParser == null) { | ||
return null; | ||
} | ||
String inputString = OperatorTools.parseString(inputStringParser.parse(sourceData, rowIndex, context)); | ||
String patternString = OperatorTools.parseString(patternStringParser.parse(sourceData, rowIndex, context)); | ||
Pattern pattern = Pattern.compile(patternString); | ||
Matcher matcher = pattern.matcher(inputString); | ||
int count = 0; | ||
while (matcher.find()) { | ||
count++; | ||
} | ||
return count; | ||
} | ||
} |
94 changes: 94 additions & 0 deletions
94
.../main/java/org/apache/inlong/sdk/transform/process/function/RegexpExtractAllFunction.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.inlong.sdk.transform.process.function; | ||
|
||
import org.apache.inlong.sdk.transform.decode.SourceData; | ||
import org.apache.inlong.sdk.transform.process.Context; | ||
import org.apache.inlong.sdk.transform.process.operator.OperatorTools; | ||
import org.apache.inlong.sdk.transform.process.parser.ValueParser; | ||
|
||
import net.sf.jsqlparser.expression.Expression; | ||
import net.sf.jsqlparser.expression.Function; | ||
|
||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
|
||
/** | ||
* RegexpExtractAllFunction | ||
* description: REGEXP_EXTRACT_ALL(str, regexp[, extractIndex])--Returns an ARRAY representation of all the matched substrings. | ||
* NULL if any of the arguments are NULL or invalid.Extracts all the substrings in str that match the regexp | ||
* expression and correspond to the regexp group extractIndex. regexp may contain multiple groups. extractIndex | ||
* indicates which regexp group to extract and starts from 1, also the default value if not specified. | ||
* 0 means matching the entire regular expression. | ||
* for example: REGEXP_EXTRACT_ALL("abc123def456ghi789", "(\\d+)", 0)--return [123, 456, 789] | ||
* REGEXP_EXTRACT_ALL("Name: John, Age: 25, Location: NY", "Name: (\\w+), Age: (\\d+), Location: (\\w+)", 1)--return [John] | ||
* REGEXP_EXTRACT_ALL("Name: John, Age: 25, Location: NY", "Name: (\\w+), Age: (\\d+), Location: (\\w+)", 0)--return [Name: John, Age: 25, Location: NY] | ||
*/ | ||
@TransformFunction(names = {"regexp_extract_all"}) | ||
public class RegexpExtractAllFunction implements ValueParser { | ||
|
||
private ValueParser inputStringParser; | ||
|
||
private ValueParser patternStringParser; | ||
|
||
private ValueParser indexIntegerParser; | ||
|
||
public RegexpExtractAllFunction(Function expr) { | ||
if (expr.getParameters() != null) { | ||
List<Expression> expressions = expr.getParameters().getExpressions(); | ||
if (expressions != null && expressions.size() >= 2) { | ||
inputStringParser = OperatorTools.buildParser(expressions.get(0)); | ||
patternStringParser = OperatorTools.buildParser(expressions.get(1)); | ||
if (expressions.size() >= 3) { | ||
indexIntegerParser = OperatorTools.buildParser(expressions.get(2)); | ||
} | ||
} | ||
} | ||
} | ||
|
||
@Override | ||
public Object parse(SourceData sourceData, int rowIndex, Context context) { | ||
if (inputStringParser == null || patternStringParser == null) { | ||
return null; | ||
} | ||
String inputString = OperatorTools.parseString(inputStringParser.parse(sourceData, rowIndex, context)); | ||
String patternString = OperatorTools.parseString(patternStringParser.parse(sourceData, rowIndex, context)); | ||
int index = 0; | ||
if (indexIntegerParser != null) { | ||
index = OperatorTools.parseBigDecimal(indexIntegerParser.parse(sourceData, rowIndex, context)).intValue(); | ||
} | ||
if (index < 0) { | ||
return null; | ||
} | ||
List<String> resultList = new ArrayList<>(); | ||
|
||
Pattern pattern = Pattern.compile(patternString); | ||
Matcher matcher = pattern.matcher(inputString); | ||
while (matcher.find()) { | ||
if (index <= matcher.groupCount()) { | ||
resultList.add(matcher.group(index)); | ||
} else { | ||
return null; | ||
} | ||
} | ||
|
||
return resultList.isEmpty() ? null : resultList; | ||
} | ||
} |
84 changes: 84 additions & 0 deletions
84
...src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpExtractFunction.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.inlong.sdk.transform.process.function; | ||
|
||
import org.apache.inlong.sdk.transform.decode.SourceData; | ||
import org.apache.inlong.sdk.transform.process.Context; | ||
import org.apache.inlong.sdk.transform.process.operator.OperatorTools; | ||
import org.apache.inlong.sdk.transform.process.parser.ValueParser; | ||
|
||
import net.sf.jsqlparser.expression.Expression; | ||
import net.sf.jsqlparser.expression.Function; | ||
|
||
import java.util.List; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
|
||
/** | ||
* RegexpExtractFunction | ||
* description: REGEXP_EXTRACT(string1, string2[, integer])--Returns a string from string1 which extracted with a specified | ||
* regular expression string2 and a regexp match group index integer.The regexp match group index starts | ||
* from 1 and 0 means matching the whole regexp. In addition, the regexp match group index should not exceed | ||
* the number of the defined groups. | ||
* for example: REGEXP_EXTRACT("abc123def", "(\\d+)", 1)--return 123 | ||
* REGEXP_EXTRACT("Name: John, Age: 25, Location: NY", "Name: (\\w+), Age: (\\d+), Location: (\\w+)", 2)--return 25 | ||
* REGEXP_EXTRACT("abc123def", "(\\d+)", 2)--return null | ||
* REGEXP_EXTRACT("abc123def", "abcdef", 1)--return null | ||
*/ | ||
@TransformFunction(names = {"regexp_extract"}) | ||
public class RegexpExtractFunction implements ValueParser { | ||
|
||
private ValueParser inputStringParser; | ||
|
||
private ValueParser patternStringParser; | ||
|
||
private ValueParser indexIntegerParser; | ||
|
||
public RegexpExtractFunction(Function expr) { | ||
if (expr.getParameters() != null) { | ||
List<Expression> expressions = expr.getParameters().getExpressions(); | ||
if (expressions != null && expressions.size() >= 3) { | ||
inputStringParser = OperatorTools.buildParser(expressions.get(0)); | ||
patternStringParser = OperatorTools.buildParser(expressions.get(1)); | ||
indexIntegerParser = OperatorTools.buildParser(expressions.get(2)); | ||
} | ||
} | ||
} | ||
|
||
@Override | ||
public Object parse(SourceData sourceData, int rowIndex, Context context) { | ||
if (inputStringParser == null || patternStringParser == null || indexIntegerParser == null) { | ||
return null; | ||
} | ||
String inputString = OperatorTools.parseString(inputStringParser.parse(sourceData, rowIndex, context)); | ||
String patternString = OperatorTools.parseString(patternStringParser.parse(sourceData, rowIndex, context)); | ||
int indexInteger = | ||
OperatorTools.parseBigDecimal(indexIntegerParser.parse(sourceData, rowIndex, context)).intValue(); | ||
if (indexInteger < 0) { | ||
return null; | ||
} | ||
Pattern pattern = Pattern.compile(patternString); | ||
Matcher matcher = pattern.matcher(inputString); | ||
if (matcher.find()) { | ||
if (indexInteger <= matcher.groupCount()) { | ||
return matcher.group(indexInteger); | ||
} | ||
} | ||
return null; | ||
} | ||
} |
64 changes: 64 additions & 0 deletions
64
...rm-sdk/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpFunction.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.inlong.sdk.transform.process.function; | ||
|
||
import org.apache.inlong.sdk.transform.decode.SourceData; | ||
import org.apache.inlong.sdk.transform.process.Context; | ||
import org.apache.inlong.sdk.transform.process.operator.OperatorTools; | ||
import org.apache.inlong.sdk.transform.process.parser.ValueParser; | ||
|
||
import net.sf.jsqlparser.expression.Expression; | ||
import net.sf.jsqlparser.expression.Function; | ||
|
||
import java.util.List; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
/** | ||
* RegexpFunction | ||
* description: REGEX(string1, string2)--Returns TRUE if any (possibly empty) substring of string1 matches the Java | ||
* regular expression string2, otherwise FALSE. Returns NULL if any of arguments is NULL. | ||
*/ | ||
@TransformFunction(names = {"regex"}) | ||
public class RegexpFunction implements ValueParser { | ||
|
||
private ValueParser inputParser; | ||
|
||
private ValueParser patternParser; | ||
|
||
public RegexpFunction(Function expr) { | ||
if (expr.getParameters() != null) { | ||
List<Expression> expressions = expr.getParameters().getExpressions(); | ||
if (expressions != null && expressions.size() == 2) { | ||
inputParser = OperatorTools.buildParser(expressions.get(0)); | ||
patternParser = OperatorTools.buildParser(expressions.get(1)); | ||
} | ||
} | ||
} | ||
|
||
@Override | ||
public Object parse(SourceData sourceData, int rowIndex, Context context) { | ||
if (inputParser == null || patternParser == null) { | ||
return null; | ||
} | ||
String inputString = OperatorTools.parseString(inputParser.parse(sourceData, rowIndex, context)); | ||
String patternString = OperatorTools.parseString(patternParser.parse(sourceData, rowIndex, context)); | ||
Pattern pattern = Pattern.compile(patternString); | ||
Matcher matcher = pattern.matcher(inputString); | ||
return matcher.find(); | ||
} | ||
} |
71 changes: 71 additions & 0 deletions
71
...k/src/main/java/org/apache/inlong/sdk/transform/process/function/RegexpInstrFunction.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.inlong.sdk.transform.process.function; | ||
|
||
import org.apache.inlong.sdk.transform.decode.SourceData; | ||
import org.apache.inlong.sdk.transform.process.Context; | ||
import org.apache.inlong.sdk.transform.process.operator.OperatorTools; | ||
import org.apache.inlong.sdk.transform.process.parser.ValueParser; | ||
|
||
import net.sf.jsqlparser.expression.Expression; | ||
import net.sf.jsqlparser.expression.Function; | ||
|
||
import java.util.List; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
|
||
/** | ||
* RegexpInstrFunction | ||
* description: REGEXP_INSTR(str, regexp)--Returns the position of the first substring in str that matches regexp. | ||
* Result indexes begin at 1, 0 if there is no match. | ||
* Returns an INTEGER representation of the first matched substring index. | ||
* NULL if any of the arguments are NULL or regexp is invalid. | ||
*/ | ||
@TransformFunction(names = {"regexp_instr"}) | ||
public class RegexpInstrFunction implements ValueParser { | ||
|
||
private ValueParser inputStringParser; | ||
|
||
private ValueParser patternStringParser; | ||
|
||
public RegexpInstrFunction(Function expr) { | ||
if (expr.getParameters() != null) { | ||
List<Expression> expressions = expr.getParameters().getExpressions(); | ||
if (expressions != null && expressions.size() >= 2) { | ||
inputStringParser = OperatorTools.buildParser(expressions.get(0)); | ||
patternStringParser = OperatorTools.buildParser(expressions.get(1)); | ||
} | ||
} | ||
} | ||
|
||
@Override | ||
public Object parse(SourceData sourceData, int rowIndex, Context context) { | ||
if (inputStringParser == null || patternStringParser == null) { | ||
return null; | ||
} | ||
String inputString = OperatorTools.parseString(inputStringParser.parse(sourceData, rowIndex, context)); | ||
String patternString = OperatorTools.parseString(patternStringParser.parse(sourceData, rowIndex, context)); | ||
Pattern pattern = Pattern.compile(patternString); | ||
Matcher matcher = pattern.matcher(inputString); | ||
if (matcher.find()) { | ||
return matcher.start() + 1; | ||
} else { | ||
return 0; | ||
} | ||
} | ||
} |
Oops, something went wrong.