-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #66 from codefuse-ai/xxh_dev
[Feat]Add xml extractor source code
- Loading branch information
Showing
46 changed files
with
2,288 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
# Introduction | ||
The codefuse-query xml extractor transforms the source code of xml file into standardized coref-xml data, which is utilized for further analysis by codefuse-query. | ||
|
||
# Quick Start | ||
1. Set `JAVA_HOME`. Execute `echo $JAVA_HOME` to display its current setting. If it displays as empty, then it has not been configured yet. | ||
2. Build. Execute `mvn clean install`. | ||
3. Run. Execute `java -jar target/xml-extractor-1.0-SNAPSHOT-jar-with-dependencies.jar ${YOUR_REPO} ./db`. | ||
|
||
After execution, a file named coref_xml_src.db will be generated in the ./db directory. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
# 简介 | ||
Codefuse-query XML 提取器将 XML 文件的源代码转换为标准化的 coref-xml 数据,这些数据用于 codefuse-query 进行进一步分析。 | ||
|
||
# 快速开始 | ||
1. 设置 JAVA_HOME。执行 echo $JAVA_HOME 来显示当前的设置。如果显示为空,则表示尚未配置。 | ||
2. 构建。执行 mvn clean install。 | ||
3. 运行。执行 java -jar target/xml-extractor-1.0-SNAPSHOT-jar-with-dependencies.jar ${YOUR_REPO} ./db。 | ||
|
||
执行后,一个名为 coref_xml_src.db 的文件将生成在 ./db 目录下。 |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
|
||
<groupId>com.alipay.codequery</groupId> | ||
<artifactId>xml-extractor</artifactId> | ||
<version>1.0-SNAPSHOT</version> | ||
|
||
<packaging>jar</packaging> | ||
|
||
<name>xml-extractor</name> | ||
<url>http://maven.apache.org</url> | ||
|
||
<properties> | ||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> | ||
</properties> | ||
|
||
<dependencies> | ||
<dependency> | ||
<groupId>junit</groupId> | ||
<artifactId>junit</artifactId> | ||
<version>4.12</version> | ||
<scope>test</scope> | ||
</dependency> | ||
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 --> | ||
<dependency> | ||
<groupId>org.apache.commons</groupId> | ||
<artifactId>commons-lang3</artifactId> | ||
<version>3.11</version> | ||
</dependency> | ||
|
||
<!-- https://mvnrepository.com/artifact/stax/stax-api --> | ||
<dependency> | ||
<groupId>stax</groupId> | ||
<artifactId>stax-api</artifactId> | ||
<version>1.0.1</version> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.codehaus.woodstox</groupId> | ||
<artifactId>stax2-api</artifactId> | ||
<version>4.2</version> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>com.fasterxml.woodstox</groupId> | ||
<artifactId>woodstox-core</artifactId> | ||
<version>6.4.1-SNAPSHOT</version> | ||
<scope>system</scope> | ||
<systemPath>${project.basedir}/lib/woodstox-core-6.4.1-SNAPSHOT.jar</systemPath> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.projectlombok</groupId> | ||
<artifactId>lombok</artifactId> | ||
<version>1.18.16</version> | ||
<scope>provided</scope> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.xerial</groupId> | ||
<artifactId>sqlite-jdbc</artifactId> | ||
<version>3.36.0.2</version> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.mybatis</groupId> | ||
<artifactId>mybatis</artifactId> | ||
<version>3.5.6</version> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>tk.mybatis</groupId> | ||
<artifactId>mapper</artifactId> | ||
<!-- 建议使用最新版本,最新版本请从项目首页查找 --> | ||
<version>4.1.5</version> | ||
</dependency> | ||
|
||
<dependency> | ||
<groupId>org.apache.logging.log4j</groupId> | ||
<artifactId>log4j-core</artifactId> | ||
<version>2.14.1</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.apache.logging.log4j</groupId> | ||
<artifactId>log4j-api</artifactId> | ||
<version>2.14.1</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.apache.logging.log4j</groupId> | ||
<artifactId>log4j-slf4j-impl</artifactId> | ||
<version>2.14.1</version> | ||
</dependency> | ||
|
||
</dependencies> | ||
<build> | ||
<plugins> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-compiler-plugin</artifactId> | ||
<configuration> | ||
<source>8</source> | ||
<target>8</target> | ||
</configuration> | ||
</plugin> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-surefire-plugin</artifactId> | ||
<version>2.4.2</version> | ||
<configuration> | ||
<skipTests>true</skipTests> | ||
</configuration> | ||
</plugin> | ||
<plugin> | ||
<groupId>org.mybatis.generator</groupId> | ||
<artifactId>mybatis-generator-maven-plugin</artifactId> | ||
<version>1.3.7</version> | ||
<configuration> | ||
<verbose>true</verbose> | ||
<overwrite>true</overwrite> | ||
</configuration> | ||
<dependencies> | ||
<dependency> | ||
<groupId>org.xerial</groupId> | ||
<artifactId>sqlite-jdbc</artifactId> | ||
<version>3.36.0.2</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>tk.mybatis</groupId> | ||
<artifactId>mapper</artifactId> | ||
<version>4.1.5</version> | ||
</dependency> | ||
</dependencies> | ||
<executions> | ||
<execution> | ||
<id>Generate MyBatis Artifacts</id> | ||
<goals> | ||
<goal>generate</goal> | ||
</goals> | ||
</execution> | ||
</executions> | ||
</plugin> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-assembly-plugin</artifactId> | ||
<version>2.5.5</version> | ||
<configuration> | ||
<archive> | ||
<manifest> | ||
<mainClass>com.alipay.codequery.Extractor</mainClass> | ||
</manifest> | ||
</archive> | ||
<descriptorRefs> | ||
<descriptorRef>jar-with-dependencies</descriptorRef> | ||
</descriptorRefs> | ||
</configuration> | ||
<executions> | ||
<execution> | ||
<id>make-assembly</id> | ||
<phase>package</phase> | ||
<goals> | ||
<goal>single</goal> | ||
</goals> | ||
</execution> | ||
</executions> | ||
</plugin> | ||
|
||
</plugins> | ||
</build> | ||
</project> |
68 changes: 68 additions & 0 deletions
68
language/xml/extractor/src/main/java/com/alipay/codequery/Extractor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
|
||
package com.alipay.codequery; | ||
import com.alipay.codequery.stax.StaxCorefExtractor; | ||
import com.alipay.codequery.util.CorefStorage; | ||
import com.alipay.codequery.util.LoggerUtil; | ||
import org.apache.logging.log4j.Level; | ||
import org.apache.logging.log4j.LogManager; | ||
import org.apache.logging.log4j.Logger; | ||
|
||
import javax.xml.stream.XMLStreamException; | ||
import java.io.File; | ||
import java.io.IOException; | ||
|
||
public class Extractor { | ||
private static final Logger logger = LogManager.getLogger(Extractor.class); | ||
public static final String XML_EXT = ".xml"; | ||
public static final String AXML_EXT = ".axml"; | ||
public static final String[] FILE_EXT_ARRAY = { | ||
XML_EXT, | ||
AXML_EXT, | ||
}; | ||
|
||
public static void main(String[] args) throws IOException, XMLStreamException { | ||
LoggerUtil.initLogger(Level.INFO); | ||
|
||
long start = System.currentTimeMillis(); | ||
// repoDir和destDir是设置的本地测试目录,在生产中会被替换掉 | ||
String repoDir = ""; | ||
String destDir = ""; | ||
if (args.length > 0) { | ||
repoDir = args[0]; | ||
} | ||
if (args.length > 1) { | ||
destDir = args[1]; | ||
} | ||
if (!destDir.endsWith(File.separator)) { | ||
destDir += File.separator; | ||
} | ||
CorefStorage corefStorage = new CorefStorage(destDir); | ||
File sourceDir = new File(repoDir); | ||
parse(sourceDir, sourceDir, corefStorage); | ||
logger.info("Time to completion (TTC): " + (System.currentTimeMillis() - start)); | ||
} | ||
|
||
private static void parse(File sourceDir, File rootDir, CorefStorage corefStorage) { | ||
File[] files = rootDir.listFiles(); | ||
if (files == null) { | ||
return; | ||
} | ||
for (File file: files) { | ||
if (file.isDirectory()) { | ||
parse(sourceDir, file, corefStorage); | ||
} else { | ||
for (String fileExt: FILE_EXT_ARRAY) { | ||
if (file.getName().endsWith(fileExt)) { | ||
logger.info("Start Extracting xml file: {}", file.getAbsolutePath()); | ||
try { | ||
StaxCorefExtractor extractor = new StaxCorefExtractor(file, corefStorage, sourceDir.getAbsolutePath()); | ||
extractor.parse(); | ||
} catch (Exception e) { | ||
logger.error("Extraction failed, error message:{} on file {}", e.getMessage(), file.getAbsolutePath()); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
} |
119 changes: 119 additions & 0 deletions
119
...age/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlAttribute.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
package com.alipay.codequery.dal.mybatis.domain; | ||
|
||
import javax.persistence.*; | ||
|
||
@Table(name = "xml_attribute") | ||
public class XmlAttribute { | ||
@Id | ||
private Integer id; | ||
|
||
@Column(name = "element_id") | ||
private Integer elementId; | ||
|
||
private String name; | ||
|
||
private String value; | ||
|
||
@Column(name = "index_order") | ||
private Integer indexOrder; | ||
|
||
@Column(name = "location_id") | ||
private Integer locationId; | ||
|
||
public XmlAttribute(Integer id, Integer elementId, String name, String value, Integer indexOrder, Integer locationId) { | ||
this.id = id; | ||
this.elementId = elementId; | ||
this.name = name; | ||
this.value = value; | ||
this.indexOrder = indexOrder; | ||
this.locationId = locationId; | ||
} | ||
|
||
public XmlAttribute() { | ||
super(); | ||
} | ||
|
||
/** | ||
* @return id | ||
*/ | ||
public Integer getId() { | ||
return id; | ||
} | ||
|
||
/** | ||
* @param id | ||
*/ | ||
public void setId(Integer id) { | ||
this.id = id; | ||
} | ||
|
||
/** | ||
* @return element_id | ||
*/ | ||
public Integer getElementId() { | ||
return elementId; | ||
} | ||
|
||
/** | ||
* @param elementId | ||
*/ | ||
public void setElementId(Integer elementId) { | ||
this.elementId = elementId; | ||
} | ||
|
||
/** | ||
* @return name | ||
*/ | ||
public String getName() { | ||
return name; | ||
} | ||
|
||
/** | ||
* @param name | ||
*/ | ||
public void setName(String name) { | ||
this.name = name == null ? null : name.trim(); | ||
} | ||
|
||
/** | ||
* @return value | ||
*/ | ||
public String getValue() { | ||
return value; | ||
} | ||
|
||
/** | ||
* @param value | ||
*/ | ||
public void setValue(String value) { | ||
this.value = value == null ? null : value.trim(); | ||
} | ||
|
||
/** | ||
* @return index_order | ||
*/ | ||
public Integer getIndexOrder() { | ||
return indexOrder; | ||
} | ||
|
||
/** | ||
* @param indexOrder | ||
*/ | ||
public void setIndexOrder(Integer indexOrder) { | ||
this.indexOrder = indexOrder; | ||
} | ||
|
||
/** | ||
* @return location_id | ||
*/ | ||
public Integer getLocationId() { | ||
return locationId; | ||
} | ||
|
||
/** | ||
* @param locationId | ||
*/ | ||
public void setLocationId(Integer locationId) { | ||
this.locationId = locationId; | ||
} | ||
} |
Oops, something went wrong.