Skip to content

Commit

Permalink
Merge pull request #66 from codefuse-ai/xxh_dev
Browse files Browse the repository at this point in the history
[Feat]Add xml extractor source code
  • Loading branch information
zhouang777 authored Aug 14, 2024
2 parents 711d834 + cd74b47 commit 1250196
Show file tree
Hide file tree
Showing 46 changed files with 2,288 additions and 0 deletions.
9 changes: 9 additions & 0 deletions language/xml/extractor/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Introduction
The codefuse-query xml extractor transforms the source code of xml file into standardized coref-xml data, which is utilized for further analysis by codefuse-query.

# Quick Start
1. Set `JAVA_HOME`. Execute `echo $JAVA_HOME` to display its current setting. If it displays as empty, then it has not been configured yet.
2. Build. Execute `mvn clean install`.
3. Run. Execute `java -jar target/xml-extractor-1.0-SNAPSHOT-jar-with-dependencies.jar ${YOUR_REPO} ./db`.

After execution, a file named coref_xml_src.db will be generated in the ./db directory.
9 changes: 9 additions & 0 deletions language/xml/extractor/README_cn.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# 简介
Codefuse-query XML 提取器将 XML 文件的源代码转换为标准化的 coref-xml 数据,这些数据用于 codefuse-query 进行进一步分析。

# 快速开始
1. 设置 JAVA_HOME。执行 echo $JAVA_HOME 来显示当前的设置。如果显示为空,则表示尚未配置。
2. 构建。执行 mvn clean install。
3. 运行。执行 java -jar target/xml-extractor-1.0-SNAPSHOT-jar-with-dependencies.jar ${YOUR_REPO} ./db。

执行后,一个名为 coref_xml_src.db 的文件将生成在 ./db 目录下。
Binary file not shown.
170 changes: 170 additions & 0 deletions language/xml/extractor/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>com.alipay.codequery</groupId>
<artifactId>xml-extractor</artifactId>
<version>1.0-SNAPSHOT</version>

<packaging>jar</packaging>

<name>xml-extractor</name>
<url>http://maven.apache.org</url>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>

<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.11</version>
</dependency>

<!-- https://mvnrepository.com/artifact/stax/stax-api -->
<dependency>
<groupId>stax</groupId>
<artifactId>stax-api</artifactId>
<version>1.0.1</version>
</dependency>

<dependency>
<groupId>org.codehaus.woodstox</groupId>
<artifactId>stax2-api</artifactId>
<version>4.2</version>
</dependency>

<dependency>
<groupId>com.fasterxml.woodstox</groupId>
<artifactId>woodstox-core</artifactId>
<version>6.4.1-SNAPSHOT</version>
<scope>system</scope>
<systemPath>${project.basedir}/lib/woodstox-core-6.4.1-SNAPSHOT.jar</systemPath>
</dependency>

<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.16</version>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>org.xerial</groupId>
<artifactId>sqlite-jdbc</artifactId>
<version>3.36.0.2</version>
</dependency>

<dependency>
<groupId>org.mybatis</groupId>
<artifactId>mybatis</artifactId>
<version>3.5.6</version>
</dependency>

<dependency>
<groupId>tk.mybatis</groupId>
<artifactId>mapper</artifactId>
<!-- 建议使用最新版本,最新版本请从项目首页查找 -->
<version>4.1.5</version>
</dependency>

<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.14.1</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.14.1</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<version>2.14.1</version>
</dependency>

</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>8</source>
<target>8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.4.2</version>
<configuration>
<skipTests>true</skipTests>
</configuration>
</plugin>
<plugin>
<groupId>org.mybatis.generator</groupId>
<artifactId>mybatis-generator-maven-plugin</artifactId>
<version>1.3.7</version>
<configuration>
<verbose>true</verbose>
<overwrite>true</overwrite>
</configuration>
<dependencies>
<dependency>
<groupId>org.xerial</groupId>
<artifactId>sqlite-jdbc</artifactId>
<version>3.36.0.2</version>
</dependency>
<dependency>
<groupId>tk.mybatis</groupId>
<artifactId>mapper</artifactId>
<version>4.1.5</version>
</dependency>
</dependencies>
<executions>
<execution>
<id>Generate MyBatis Artifacts</id>
<goals>
<goal>generate</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.5.5</version>
<configuration>
<archive>
<manifest>
<mainClass>com.alipay.codequery.Extractor</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>

</plugins>
</build>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@

package com.alipay.codequery;
import com.alipay.codequery.stax.StaxCorefExtractor;
import com.alipay.codequery.util.CorefStorage;
import com.alipay.codequery.util.LoggerUtil;
import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import javax.xml.stream.XMLStreamException;
import java.io.File;
import java.io.IOException;

public class Extractor {
private static final Logger logger = LogManager.getLogger(Extractor.class);
public static final String XML_EXT = ".xml";
public static final String AXML_EXT = ".axml";
public static final String[] FILE_EXT_ARRAY = {
XML_EXT,
AXML_EXT,
};

public static void main(String[] args) throws IOException, XMLStreamException {
LoggerUtil.initLogger(Level.INFO);

long start = System.currentTimeMillis();
// repoDir和destDir是设置的本地测试目录,在生产中会被替换掉
String repoDir = "";
String destDir = "";
if (args.length > 0) {
repoDir = args[0];
}
if (args.length > 1) {
destDir = args[1];
}
if (!destDir.endsWith(File.separator)) {
destDir += File.separator;
}
CorefStorage corefStorage = new CorefStorage(destDir);
File sourceDir = new File(repoDir);
parse(sourceDir, sourceDir, corefStorage);
logger.info("Time to completion (TTC): " + (System.currentTimeMillis() - start));
}

private static void parse(File sourceDir, File rootDir, CorefStorage corefStorage) {
File[] files = rootDir.listFiles();
if (files == null) {
return;
}
for (File file: files) {
if (file.isDirectory()) {
parse(sourceDir, file, corefStorage);
} else {
for (String fileExt: FILE_EXT_ARRAY) {
if (file.getName().endsWith(fileExt)) {
logger.info("Start Extracting xml file: {}", file.getAbsolutePath());
try {
StaxCorefExtractor extractor = new StaxCorefExtractor(file, corefStorage, sourceDir.getAbsolutePath());
extractor.parse();
} catch (Exception e) {
logger.error("Extraction failed, error message:{} on file {}", e.getMessage(), file.getAbsolutePath());
}
}
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
package com.alipay.codequery.dal.mybatis.domain;

import javax.persistence.*;

@Table(name = "xml_attribute")
public class XmlAttribute {
@Id
private Integer id;

@Column(name = "element_id")
private Integer elementId;

private String name;

private String value;

@Column(name = "index_order")
private Integer indexOrder;

@Column(name = "location_id")
private Integer locationId;

public XmlAttribute(Integer id, Integer elementId, String name, String value, Integer indexOrder, Integer locationId) {
this.id = id;
this.elementId = elementId;
this.name = name;
this.value = value;
this.indexOrder = indexOrder;
this.locationId = locationId;
}

public XmlAttribute() {
super();
}

/**
* @return id
*/
public Integer getId() {
return id;
}

/**
* @param id
*/
public void setId(Integer id) {
this.id = id;
}

/**
* @return element_id
*/
public Integer getElementId() {
return elementId;
}

/**
* @param elementId
*/
public void setElementId(Integer elementId) {
this.elementId = elementId;
}

/**
* @return name
*/
public String getName() {
return name;
}

/**
* @param name
*/
public void setName(String name) {
this.name = name == null ? null : name.trim();
}

/**
* @return value
*/
public String getValue() {
return value;
}

/**
* @param value
*/
public void setValue(String value) {
this.value = value == null ? null : value.trim();
}

/**
* @return index_order
*/
public Integer getIndexOrder() {
return indexOrder;
}

/**
* @param indexOrder
*/
public void setIndexOrder(Integer indexOrder) {
this.indexOrder = indexOrder;
}

/**
* @return location_id
*/
public Integer getLocationId() {
return locationId;
}

/**
* @param locationId
*/
public void setLocationId(Integer locationId) {
this.locationId = locationId;
}
}
Loading

0 comments on commit 1250196

Please sign in to comment.