Skip to content

Commit

Permalink
[INLONG-11005][SDK] Add YAML formatted data source for Transform
Browse files Browse the repository at this point in the history
  • Loading branch information
emptyOVO committed Sep 3, 2024
1 parent e394d74 commit 85773c4
Show file tree
Hide file tree
Showing 6 changed files with 352 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.inlong.sdk.transform.pojo.JsonSourceInfo;
import org.apache.inlong.sdk.transform.pojo.KvSourceInfo;
import org.apache.inlong.sdk.transform.pojo.PbSourceInfo;
import org.apache.inlong.sdk.transform.pojo.YamlSourceInfo;

public class SourceDecoderFactory {

Expand All @@ -39,4 +40,9 @@ public static JsonSourceDecoder createJsonDecoder(JsonSourceInfo sourceInfo) {
public static PbSourceDecoder createPbDecoder(PbSourceInfo sourceInfo) {
return new PbSourceDecoder(sourceInfo);
}

public static YamlSourceDecoder createYamlDecoder(YamlSourceInfo sourceInfo) {
return new YamlSourceDecoder(sourceInfo);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package org.apache.inlong.sdk.transform.decode;

import lombok.Data;

@Data
public class YamlNode {

private String name;
private Object value;

public YamlNode() {
}

public YamlNode(String name, Object value) {
this.name = name;
this.value = value;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package org.apache.inlong.sdk.transform.decode;

import java.util.List;
import java.util.Map;

public class YamlSourceData implements SourceData{

public static final String ROOT_KEY = "$root";

public static final String CHILD_KEY = "$child";

private YamlNode root;

private YamlNode childRoot;

public YamlSourceData(YamlNode root, YamlNode childRoot) {
this.root = root;
this.childRoot = childRoot;
}
@Override
public int getRowCount() {
if (this.childRoot == null) {
return 1;
} else {
Object value = this.childRoot.getValue();
if (value instanceof List) {
return ((List<YamlNode>) value).size();
} else {
return 1;
}
}
}

@Override
public String getField(int rowNum, String fieldName) {
try {
String[] nodeString = fieldName.split("\\.");
Object cur = null, last = null;
int start = -1;

if (nodeString[0].equals(ROOT_KEY)) {
cur = root;
} else if (nodeString[0].equals(CHILD_KEY)) {
cur = ((List<YamlNode>) childRoot.getValue()).get(rowNum);
}

for (int i = 1; i < nodeString.length; i++) {
if (cur == null) {
cur = last;
continue;
}
last = cur;
if (cur instanceof List) {
int idx = 0;
start = nodeString[i].indexOf('(');
if (start != -1) {
idx = Integer.parseInt(nodeString[1].substring(start + 1, nodeString[1].indexOf(')')));
}
cur = ((List<YamlNode>) cur).get(idx).getValue();
} else if (cur instanceof Map) {
start = nodeString[i].indexOf('(');
String key = nodeString[i];
if (start != -1) {
key = key.substring(0, start);
}
cur = ((Map<String, YamlNode>) cur).get(key);
} else if (cur instanceof YamlNode) {
cur = ((YamlNode) cur).getValue();
} else {
i++;
}
i--;
}
if (cur == null) {
return "";
}
return cur.toString();
} catch (Exception e) {
return "";
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
package org.apache.inlong.sdk.transform.decode;

import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.shaded.jackson2.org.yaml.snakeyaml.Yaml;
import org.apache.inlong.sdk.transform.pojo.YamlSourceInfo;
import org.apache.inlong.sdk.transform.process.Context;

import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

@Slf4j
public class YamlSourceDecoder implements SourceDecoder<String> {

protected YamlSourceInfo sourceInfo;
private Charset srcCharset = Charset.defaultCharset();
private String rowsNodePath;
private List<String> childNodes;

public YamlSourceDecoder(YamlSourceInfo sourceInfo) {
this.sourceInfo = sourceInfo;
if (!StringUtils.isBlank(sourceInfo.getCharset())) {
this.srcCharset = Charset.forName(sourceInfo.getCharset());
}
this.rowsNodePath = sourceInfo.getRowsNodePath();
if (!StringUtils.isBlank(rowsNodePath)) {
this.childNodes = new ArrayList<>();
String[] nodeStrings = this.rowsNodePath.split("\\.");
childNodes.addAll(Arrays.asList(nodeStrings));
}
}
@Override
public SourceData decode(byte[] srcBytes, Context context) {
String srcString = new String(srcBytes, srcCharset);
return this.decode(srcString, context);
}

@Override
public SourceData decode(String srcString, Context context) {
try {
Yaml yaml = new Yaml();
Map<String, Object> yamlData = yaml.load(srcString);
if (yamlData == null || yamlData.isEmpty()) {
log.error("YAML data is empty or null.");
return null;
}
Map<String, YamlNode> rootMap = new HashMap<>();
List<YamlNode> childList = new ArrayList<>();

for (Map.Entry<String, Object> entry : yamlData.entrySet()) {
String key = entry.getKey();
Object value = entry.getValue();

if (value instanceof Map) {
// 递归处理子Map,并将其作为根节点的一部分
Map<String, YamlNode> childNodes = parser((Map<String, Object>) value);
rootMap.put(key, new YamlNode(key, childNodes));
} else if (value instanceof List) {
// 处理列表,并将其作为子节点的一部分
for (Object item : (List<?>) value) {
if (item instanceof Map) {
Map<String, YamlNode> childNodes = parser((Map<String, Object>) item);
childList.add(new YamlNode(key, childNodes));
} else {
childList.add(new YamlNode(key, item));
}
}
rootMap.put(key, new YamlNode(key, value));
} else {
// 处理简单类型,将其作为根节点的一部分
rootMap.put(key, new YamlNode(key, value));
}
}
YamlNode root = new YamlNode(YamlSourceData.ROOT_KEY, rootMap.isEmpty() ? null : rootMap);
YamlNode childRoot = new YamlNode(YamlSourceData.CHILD_KEY, childList.isEmpty() ? null : childList);
return new YamlSourceData(root,childRoot);
} catch (Exception e) {
log.error("Data parsing failed", e);
return null;
}
}

private static Map<String, YamlNode> parser(Map<String, Object> yamlData) {
Map<String, YamlNode> yamlNodes = new HashMap<>();
for (Map.Entry<String, Object> entry : yamlData.entrySet()) {
String key = entry.getKey();
Object value = entry.getValue();

/*if (value instanceof Map) {
yamlNodes.put(key, new YamlNode(key, parser((Map<String, Object>) value)));
} else */if (value instanceof List) {
List<YamlNode> list = new ArrayList<>();
for (Object item : (List<?>) value) {
if (item instanceof Map) {
list.add(new YamlNode(key, parser((Map<String, Object>) item)));
} else {
list.add(new YamlNode(key, item));
}
}
yamlNodes.put(key, new YamlNode(key, list));
} else {
yamlNodes.put(key, new YamlNode(key, value));
}
}
return yamlNodes;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package org.apache.inlong.sdk.transform.pojo;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.annotation.JsonProperty;

@JsonIgnoreProperties(ignoreUnknown = true)
public class YamlSourceInfo extends SourceInfo {
private String rowsNodePath;

@JsonCreator
public YamlSourceInfo(
@JsonProperty("charset") String charset,
@JsonProperty("rowsNodePath") String rowsNodePath) {
super(charset);
this.rowsNodePath = rowsNodePath;
}

/**
* get rowsNodePath
* @return the rowsNodePath
*/
@JsonProperty("rowsNodePath")
public String getRowsNodePath() {
return rowsNodePath;
}

/**
* set rowsNodePath
* @param rowsNodePath the rowsNodePath to set
*/
public void setRowsNodePath(String rowsNodePath) {
this.rowsNodePath = rowsNodePath;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.inlong.sdk.transform.pojo.PbSourceInfo;
import org.apache.inlong.sdk.transform.pojo.TransformConfig;

import org.apache.inlong.sdk.transform.pojo.YamlSourceInfo;
import org.junit.Assert;
import org.junit.Test;

Expand Down Expand Up @@ -350,4 +351,103 @@ public void testPb2CsvForNow() throws Exception {
List<String> output = processor.transform(srcBytes, new HashMap<>());
Assert.assertEquals(2, output.size());
}

@Test
public void testYaml2Csv() throws Exception {
List<FieldInfo> fields = null;
YamlSourceInfo yamlSource = null;
CsvSinkInfo csvSink = null;
String transformSql = null;
TransformConfig config = null;
TransformProcessor<String, String> processor = null;
String srcString = null;
List<String> output = null;

// case1
fields = this.getTestFieldList("sid", "packageID", "msgTime", "msg");
yamlSource = new YamlSourceInfo("UTF-8", "msgs");
csvSink = new CsvSinkInfo("UTF-8", '|', '\\', fields);
transformSql = "select $root.sid,$root.packageID,$child.data,$child.msgTime from source";
config = new TransformConfig(transformSql);
processor = TransformProcessor
.create(config, SourceDecoderFactory.createYamlDecoder(yamlSource),
SinkEncoderFactory.createCsvEncoder(csvSink));
srcString = "sid: sid1\n" +
"packageID: pid1\n" +
"msgs:\n" +
" - data: value1\n" +
" msgTime: Time1\n" +
" - data: value2\n" +
" msgTime: Time2\n";
output = processor.transform(srcString, new HashMap<>());
Assert.assertEquals(2, output.size());
Assert.assertEquals(output.get(0), "sid1|pid1|value1|Time1");
Assert.assertEquals(output.get(1), "sid1|pid1|value2|Time2");

// case2
yamlSource = new YamlSourceInfo("UTF-8", "Persons");
csvSink = new CsvSinkInfo("UTF-8", '|', '\\', fields);
transformSql = "select $root.sid,$root.packageID,$child.data,$child.habbies(2).name from source";
config = new TransformConfig(transformSql);
processor = TransformProcessor
.create(config, SourceDecoderFactory.createYamlDecoder(yamlSource),
SinkEncoderFactory.createCsvEncoder(csvSink));
srcString = "sid: sid\n" +
"packageID: pid\n" +
"Persons:\n" +
" - data: value1\n" +
" habbies:\n" +
" - index: 1\n" +
" name: sing1\n" +
" - index: 2\n" +
" name: dance1\n" +
" - index: 3\n" +
" name: rap1\n" +
" - data: value2\n" +
" habbies:\n" +
" - index: 1\n" +
" name: sing2\n" +
" - index: 2\n" +
" name: dance2\n" +
" - index: 3\n" +
" name: rap2\n";
output = processor.transform(srcString, new HashMap<>());
Assert.assertEquals(2, output.size());
Assert.assertEquals("sid|pid|value1|rap1", output.get(0));
Assert.assertEquals("sid|pid|value2|rap2", output.get(1));
}

@Test
public void testYaml2CsvForOne() throws Exception {
List<FieldInfo> fields = null;
YamlSourceInfo yamlSource = null;
CsvSinkInfo csvSink = null;
String transformSql = null;
TransformConfig config = null;
TransformProcessor<String, String> processor = null;
String srcString = null;
List<String> output = null;

// case1
fields = this.getTestFieldList();
yamlSource = new YamlSourceInfo("UTF-8", "");
csvSink = new CsvSinkInfo("UTF-8", '|', '\\', fields);
//transformSql = "select $root.sid,$root.packageID,$root.msgs(1).msgTime,$root.msgs(0).data from source";
transformSql = "select $root.msgs(1).msgTime,$root.msgs(0).data from source";
config = new TransformConfig(transformSql);
processor = TransformProcessor
.create(config, SourceDecoderFactory.createYamlDecoder(yamlSource),
SinkEncoderFactory.createCsvEncoder(csvSink));
srcString =
"msgs:\n" +
" - data: value1\n" +
" msgTime: Time1\n" +
" - data: value2\n" +
" msgTime: Time2\n";
output = processor.transform(srcString, new HashMap<>());
Assert.assertEquals(2, output.size());
//Assert.assertEquals(output.get(0), "sid1|pid1|Time2|value1");
Assert.assertEquals(output.get(0), "Time2|value1");
}

}

0 comments on commit 85773c4

Please sign in to comment.