diff --git a/language/xml/extractor/README.md b/language/xml/extractor/README.md new file mode 100644 index 00000000..a76dddb3 --- /dev/null +++ b/language/xml/extractor/README.md @@ -0,0 +1,9 @@ +# Introduction +The codefuse-query xml extractor transforms the source code of xml file into standardized coref-xml data, which is utilized for further analysis by codefuse-query. + +# Quick Start +1. Set `JAVA_HOME`. Execute `echo $JAVA_HOME` to display its current setting. If it displays as empty, then it has not been configured yet. +2. Build. Execute `mvn clean install`. +3. Run. Execute `java -jar target/xml-extractor-1.0-SNAPSHOT-jar-with-dependencies.jar ${YOUR_REPO} ./db`. + +After execution, a file named coref_xml_src.db will be generated in the ./db directory. diff --git a/language/xml/extractor/README_cn.md b/language/xml/extractor/README_cn.md new file mode 100644 index 00000000..47113e72 --- /dev/null +++ b/language/xml/extractor/README_cn.md @@ -0,0 +1,9 @@ +# 简介 +Codefuse-query XML 提取器将 XML 文件的源代码转换为标准化的 coref-xml 数据,这些数据用于 codefuse-query 进行进一步分析。 + +# 快速开始 +1. 设置 JAVA_HOME。执行 echo $JAVA_HOME 来显示当前的设置。如果显示为空,则表示尚未配置。 +2. 构建。执行 mvn clean install。 +3. 运行。执行 java -jar target/xml-extractor-1.0-SNAPSHOT-jar-with-dependencies.jar ${YOUR_REPO} ./db。 + +执行后,一个名为 coref_xml_src.db 的文件将生成在 ./db 目录下。 \ No newline at end of file diff --git a/language/xml/extractor/lib/woodstox-core-6.4.1-SNAPSHOT.jar b/language/xml/extractor/lib/woodstox-core-6.4.1-SNAPSHOT.jar new file mode 100644 index 00000000..591abd44 Binary files /dev/null and b/language/xml/extractor/lib/woodstox-core-6.4.1-SNAPSHOT.jar differ diff --git a/language/xml/extractor/pom.xml b/language/xml/extractor/pom.xml new file mode 100644 index 00000000..2d8d63ce --- /dev/null +++ b/language/xml/extractor/pom.xml @@ -0,0 +1,170 @@ + + 4.0.0 + + com.alipay.codequery + xml-extractor + 1.0-SNAPSHOT + + jar + + xml-extractor + http://maven.apache.org + + + UTF-8 + + + + + junit + junit + 4.12 + test + + + + org.apache.commons + commons-lang3 + 3.11 + + + + + stax + stax-api + 1.0.1 + + + + org.codehaus.woodstox + stax2-api + 4.2 + + + + com.fasterxml.woodstox + woodstox-core + 6.4.1-SNAPSHOT + system + ${project.basedir}/lib/woodstox-core-6.4.1-SNAPSHOT.jar + + + + org.projectlombok + lombok + 1.18.16 + provided + + + + org.xerial + sqlite-jdbc + 3.36.0.2 + + + + org.mybatis + mybatis + 3.5.6 + + + + tk.mybatis + mapper + + 4.1.5 + + + + org.apache.logging.log4j + log4j-core + 2.14.1 + + + org.apache.logging.log4j + log4j-api + 2.14.1 + + + org.apache.logging.log4j + log4j-slf4j-impl + 2.14.1 + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + 8 + 8 + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.4.2 + + true + + + + org.mybatis.generator + mybatis-generator-maven-plugin + 1.3.7 + + true + true + + + + org.xerial + sqlite-jdbc + 3.36.0.2 + + + tk.mybatis + mapper + 4.1.5 + + + + + Generate MyBatis Artifacts + + generate + + + + + + org.apache.maven.plugins + maven-assembly-plugin + 2.5.5 + + + + com.alipay.codequery.Extractor + + + + jar-with-dependencies + + + + + make-assembly + package + + single + + + + + + + + diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/Extractor.java b/language/xml/extractor/src/main/java/com/alipay/codequery/Extractor.java new file mode 100644 index 00000000..12e6d0b6 --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/Extractor.java @@ -0,0 +1,68 @@ + +package com.alipay.codequery; +import com.alipay.codequery.stax.StaxCorefExtractor; +import com.alipay.codequery.util.CorefStorage; +import com.alipay.codequery.util.LoggerUtil; +import org.apache.logging.log4j.Level; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import javax.xml.stream.XMLStreamException; +import java.io.File; +import java.io.IOException; + +public class Extractor { + private static final Logger logger = LogManager.getLogger(Extractor.class); + public static final String XML_EXT = ".xml"; + public static final String AXML_EXT = ".axml"; + public static final String[] FILE_EXT_ARRAY = { + XML_EXT, + AXML_EXT, + }; + + public static void main(String[] args) throws IOException, XMLStreamException { + LoggerUtil.initLogger(Level.INFO); + + long start = System.currentTimeMillis(); + // repoDir和destDir是设置的本地测试目录,在生产中会被替换掉 + String repoDir = ""; + String destDir = ""; + if (args.length > 0) { + repoDir = args[0]; + } + if (args.length > 1) { + destDir = args[1]; + } + if (!destDir.endsWith(File.separator)) { + destDir += File.separator; + } + CorefStorage corefStorage = new CorefStorage(destDir); + File sourceDir = new File(repoDir); + parse(sourceDir, sourceDir, corefStorage); + logger.info("Time to completion (TTC): " + (System.currentTimeMillis() - start)); + } + + private static void parse(File sourceDir, File rootDir, CorefStorage corefStorage) { + File[] files = rootDir.listFiles(); + if (files == null) { + return; + } + for (File file: files) { + if (file.isDirectory()) { + parse(sourceDir, file, corefStorage); + } else { + for (String fileExt: FILE_EXT_ARRAY) { + if (file.getName().endsWith(fileExt)) { + logger.info("Start Extracting xml file: {}", file.getAbsolutePath()); + try { + StaxCorefExtractor extractor = new StaxCorefExtractor(file, corefStorage, sourceDir.getAbsolutePath()); + extractor.parse(); + } catch (Exception e) { + logger.error("Extraction failed, error message:{} on file {}", e.getMessage(), file.getAbsolutePath()); + } + } + } + } + } + } +} diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlAttribute.java b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlAttribute.java new file mode 100644 index 00000000..4c8574d9 --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlAttribute.java @@ -0,0 +1,119 @@ +package com.alipay.codequery.dal.mybatis.domain; + +import javax.persistence.*; + +@Table(name = "xml_attribute") +public class XmlAttribute { + @Id + private Integer id; + + @Column(name = "element_id") + private Integer elementId; + + private String name; + + private String value; + + @Column(name = "index_order") + private Integer indexOrder; + + @Column(name = "location_id") + private Integer locationId; + + public XmlAttribute(Integer id, Integer elementId, String name, String value, Integer indexOrder, Integer locationId) { + this.id = id; + this.elementId = elementId; + this.name = name; + this.value = value; + this.indexOrder = indexOrder; + this.locationId = locationId; + } + + public XmlAttribute() { + super(); + } + + /** + * @return id + */ + public Integer getId() { + return id; + } + + /** + * @param id + */ + public void setId(Integer id) { + this.id = id; + } + + /** + * @return element_id + */ + public Integer getElementId() { + return elementId; + } + + /** + * @param elementId + */ + public void setElementId(Integer elementId) { + this.elementId = elementId; + } + + /** + * @return name + */ + public String getName() { + return name; + } + + /** + * @param name + */ + public void setName(String name) { + this.name = name == null ? null : name.trim(); + } + + /** + * @return value + */ + public String getValue() { + return value; + } + + /** + * @param value + */ + public void setValue(String value) { + this.value = value == null ? null : value.trim(); + } + + /** + * @return index_order + */ + public Integer getIndexOrder() { + return indexOrder; + } + + /** + * @param indexOrder + */ + public void setIndexOrder(Integer indexOrder) { + this.indexOrder = indexOrder; + } + + /** + * @return location_id + */ + public Integer getLocationId() { + return locationId; + } + + /** + * @param locationId + */ + public void setLocationId(Integer locationId) { + this.locationId = locationId; + } +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlCharacter.java b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlCharacter.java new file mode 100644 index 00000000..7f10d890 --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlCharacter.java @@ -0,0 +1,119 @@ +package com.alipay.codequery.dal.mybatis.domain; + +import javax.persistence.*; + +@Table(name = "xml_character") +public class XmlCharacter { + @Id + private Integer id; + + private String text; + + private Integer parentid; + + @Column(name = "index_order") + private Integer indexOrder; + + @Column(name = "is_cdata") + private Integer isCdata; + + @Column(name = "location_id") + private Integer locationId; + + public XmlCharacter(Integer id, String text, Integer parentid, Integer indexOrder, Integer isCdata, Integer locationId) { + this.id = id; + this.text = text; + this.parentid = parentid; + this.indexOrder = indexOrder; + this.isCdata = isCdata; + this.locationId = locationId; + } + + public XmlCharacter() { + super(); + } + + /** + * @return id + */ + public Integer getId() { + return id; + } + + /** + * @param id + */ + public void setId(Integer id) { + this.id = id; + } + + /** + * @return text + */ + public String getText() { + return text; + } + + /** + * @param text + */ + public void setText(String text) { + this.text = text == null ? null : text.trim(); + } + + /** + * @return parentid + */ + public Integer getParentid() { + return parentid; + } + + /** + * @param parentid + */ + public void setParentid(Integer parentid) { + this.parentid = parentid; + } + + /** + * @return index_order + */ + public Integer getIndexOrder() { + return indexOrder; + } + + /** + * @param indexOrder + */ + public void setIndexOrder(Integer indexOrder) { + this.indexOrder = indexOrder; + } + + /** + * @return is_cdata + */ + public Integer getIsCdata() { + return isCdata; + } + + /** + * @param isCdata + */ + public void setIsCdata(Integer isCdata) { + this.isCdata = isCdata; + } + + /** + * @return location_id + */ + public Integer getLocationId() { + return locationId; + } + + /** + * @param locationId + */ + public void setLocationId(Integer locationId) { + this.locationId = locationId; + } +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlComment.java b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlComment.java new file mode 100644 index 00000000..123763c2 --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlComment.java @@ -0,0 +1,84 @@ +package com.alipay.codequery.dal.mybatis.domain; + +import javax.persistence.*; + +@Table(name = "xml_comment") +public class XmlComment { + @Id + private Integer id; + + private String text; + + @Column(name = "parent_id") + private Integer parentId; + + @Column(name = "location_id") + private Integer locationId; + + public XmlComment(Integer id, String text, Integer parentId, Integer locationId) { + this.id = id; + this.text = text; + this.parentId = parentId; + this.locationId = locationId; + } + + public XmlComment() { + super(); + } + + /** + * @return id + */ + public Integer getId() { + return id; + } + + /** + * @param id + */ + public void setId(Integer id) { + this.id = id; + } + + /** + * @return text + */ + public String getText() { + return text; + } + + /** + * @param text + */ + public void setText(String text) { + this.text = text == null ? null : text.trim(); + } + + /** + * @return parent_id + */ + public Integer getParentId() { + return parentId; + } + + /** + * @param parentId + */ + public void setParentId(Integer parentId) { + this.parentId = parentId; + } + + /** + * @return location_id + */ + public Integer getLocationId() { + return locationId; + } + + /** + * @param locationId + */ + public void setLocationId(Integer locationId) { + this.locationId = locationId; + } +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlDtd.java b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlDtd.java new file mode 100644 index 00000000..d265721b --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlDtd.java @@ -0,0 +1,102 @@ +package com.alipay.codequery.dal.mybatis.domain; + +import javax.persistence.*; + +@Table(name = "xml_dtd") +public class XmlDtd { + @Id + private Integer id; + + private String root; + + @Column(name = "public_id") + private String publicId; + + @Column(name = "system_id") + private String systemId; + + @Column(name = "location_id") + private Integer locationId; + + public XmlDtd(Integer id, String root, String publicId, String systemId, Integer locationId) { + this.id = id; + this.root = root; + this.publicId = publicId; + this.systemId = systemId; + this.locationId = locationId; + } + + public XmlDtd() { + super(); + } + + /** + * @return id + */ + public Integer getId() { + return id; + } + + /** + * @param id + */ + public void setId(Integer id) { + this.id = id; + } + + /** + * @return root + */ + public String getRoot() { + return root; + } + + /** + * @param root + */ + public void setRoot(String root) { + this.root = root == null ? null : root.trim(); + } + + /** + * @return public_id + */ + public String getPublicId() { + return publicId; + } + + /** + * @param publicId + */ + public void setPublicId(String publicId) { + this.publicId = publicId == null ? null : publicId.trim(); + } + + /** + * @return system_id + */ + public String getSystemId() { + return systemId; + } + + /** + * @param systemId + */ + public void setSystemId(String systemId) { + this.systemId = systemId == null ? null : systemId.trim(); + } + + /** + * @return location_id + */ + public Integer getLocationId() { + return locationId; + } + + /** + * @param locationId + */ + public void setLocationId(Integer locationId) { + this.locationId = locationId; + } +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlElement.java b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlElement.java new file mode 100644 index 00000000..600e6acf --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlElement.java @@ -0,0 +1,85 @@ +package com.alipay.codequery.dal.mybatis.domain; + +import javax.persistence.*; + +@Table(name = "xml_element") +public class XmlElement { + @Id + private Integer id; + + @Column(name = "parent_id") + private Integer parentId; + + @Column(name = "index_order") + private Integer indexOrder; + + @Column(name = "location_id") + private Integer locationId; + + public XmlElement(Integer id, Integer parentId, Integer indexOrder, Integer locationId) { + this.id = id; + this.parentId = parentId; + this.indexOrder = indexOrder; + this.locationId = locationId; + } + + public XmlElement() { + super(); + } + + /** + * @return id + */ + public Integer getId() { + return id; + } + + /** + * @param id + */ + public void setId(Integer id) { + this.id = id; + } + + /** + * @return parent_id + */ + public Integer getParentId() { + return parentId; + } + + /** + * @param parentId + */ + public void setParentId(Integer parentId) { + this.parentId = parentId; + } + + /** + * @return index_order + */ + public Integer getIndexOrder() { + return indexOrder; + } + + /** + * @param indexOrder + */ + public void setIndexOrder(Integer indexOrder) { + this.indexOrder = indexOrder; + } + + /** + * @return location_id + */ + public Integer getLocationId() { + return locationId; + } + + /** + * @param locationId + */ + public void setLocationId(Integer locationId) { + this.locationId = locationId; + } +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlElementName.java b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlElementName.java new file mode 100644 index 00000000..ecfca0bd --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlElementName.java @@ -0,0 +1,84 @@ +package com.alipay.codequery.dal.mybatis.domain; + +import javax.persistence.*; + +@Table(name = "xml_element_name") +public class XmlElementName { + @Id + private Integer id; + + private String name; + + @Column(name = "parent_id") + private Integer parentId; + + @Column(name = "location_id") + private Integer locationId; + + public XmlElementName(Integer id, String name, Integer parentId, Integer locationId) { + this.id = id; + this.name = name; + this.parentId = parentId; + this.locationId = locationId; + } + + public XmlElementName() { + super(); + } + + /** + * @return id + */ + public Integer getId() { + return id; + } + + /** + * @param id + */ + public void setId(Integer id) { + this.id = id; + } + + /** + * @return name + */ + public String getName() { + return name; + } + + /** + * @param name + */ + public void setName(String name) { + this.name = name == null ? null : name.trim(); + } + + /** + * @return parent_id + */ + public Integer getParentId() { + return parentId; + } + + /** + * @param parentId + */ + public void setParentId(Integer parentId) { + this.parentId = parentId; + } + + /** + * @return location_id + */ + public Integer getLocationId() { + return locationId; + } + + /** + * @param locationId + */ + public void setLocationId(Integer locationId) { + this.locationId = locationId; + } +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlElementPrefix.java b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlElementPrefix.java new file mode 100644 index 00000000..715401ce --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlElementPrefix.java @@ -0,0 +1,84 @@ +package com.alipay.codequery.dal.mybatis.domain; + +import javax.persistence.*; + +@Table(name = "xml_element_prefix") +public class XmlElementPrefix { + @Id + private Integer id; + + private String name; + + @Column(name = "parent_id") + private Integer parentId; + + @Column(name = "location_id") + private Integer locationId; + + public XmlElementPrefix(Integer id, String name, Integer parentId, Integer locationId) { + this.id = id; + this.name = name; + this.parentId = parentId; + this.locationId = locationId; + } + + public XmlElementPrefix() { + super(); + } + + /** + * @return id + */ + public Integer getId() { + return id; + } + + /** + * @param id + */ + public void setId(Integer id) { + this.id = id; + } + + /** + * @return name + */ + public String getName() { + return name; + } + + /** + * @param name + */ + public void setName(String name) { + this.name = name == null ? null : name.trim(); + } + + /** + * @return parent_id + */ + public Integer getParentId() { + return parentId; + } + + /** + * @param parentId + */ + public void setParentId(Integer parentId) { + this.parentId = parentId; + } + + /** + * @return location_id + */ + public Integer getLocationId() { + return locationId; + } + + /** + * @param locationId + */ + public void setLocationId(Integer locationId) { + this.locationId = locationId; + } +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlEncoding.java b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlEncoding.java new file mode 100644 index 00000000..0261dab4 --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlEncoding.java @@ -0,0 +1,48 @@ +package com.alipay.codequery.dal.mybatis.domain; + +import javax.persistence.*; + +@Table(name = "xml_encoding") +public class XmlEncoding { + @Id + private Integer id; + + private String encoding; + + public XmlEncoding(Integer id, String encoding) { + this.id = id; + this.encoding = encoding; + } + + public XmlEncoding() { + super(); + } + + /** + * @return id + */ + public Integer getId() { + return id; + } + + /** + * @param id + */ + public void setId(Integer id) { + this.id = id; + } + + /** + * @return encoding + */ + public String getEncoding() { + return encoding; + } + + /** + * @param encoding + */ + public void setEncoding(String encoding) { + this.encoding = encoding == null ? null : encoding.trim(); + } +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlFile.java b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlFile.java new file mode 100644 index 00000000..b82d4f15 --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlFile.java @@ -0,0 +1,67 @@ +package com.alipay.codequery.dal.mybatis.domain; + +import javax.persistence.*; + +@Table(name = "xml_file") +public class XmlFile { + @Id + private Integer id; + + @Column(name = "file_name") + private String fileName; + + @Column(name = "relative_path") + private String relativePath; + + public XmlFile(Integer id, String fileName, String relativePath) { + this.id = id; + this.fileName = fileName; + this.relativePath = relativePath; + } + + public XmlFile() { + super(); + } + + /** + * @return id + */ + public Integer getId() { + return id; + } + + /** + * @param id + */ + public void setId(Integer id) { + this.id = id; + } + + /** + * @return file_name + */ + public String getFileName() { + return fileName; + } + + /** + * @param fileName + */ + public void setFileName(String fileName) { + this.fileName = fileName == null ? null : fileName.trim(); + } + + /** + * @return relative_path + */ + public String getRelativePath() { + return relativePath; + } + + /** + * @param relativePath + */ + public void setRelativePath(String relativePath) { + this.relativePath = relativePath == null ? null : relativePath.trim(); + } +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlHasNamespace.java b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlHasNamespace.java new file mode 100644 index 00000000..5e1ce548 --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlHasNamespace.java @@ -0,0 +1,85 @@ +package com.alipay.codequery.dal.mybatis.domain; + +import javax.persistence.*; + +@Table(name = "xml_has_namespace") +public class XmlHasNamespace { + @Id + private Integer id; + + @Column(name = "element_id") + private Integer elementId; + + @Column(name = "namespace_id") + private Integer namespaceId; + + @Column(name = "container_id") + private Integer containerId; + + public XmlHasNamespace(Integer id, Integer elementId, Integer namespaceId, Integer containerId) { + this.id = id; + this.elementId = elementId; + this.namespaceId = namespaceId; + this.containerId = containerId; + } + + public XmlHasNamespace() { + super(); + } + + /** + * @return id + */ + public Integer getId() { + return id; + } + + /** + * @param id + */ + public void setId(Integer id) { + this.id = id; + } + + /** + * @return element_id + */ + public Integer getElementId() { + return elementId; + } + + /** + * @param elementId + */ + public void setElementId(Integer elementId) { + this.elementId = elementId; + } + + /** + * @return namespace_id + */ + public Integer getNamespaceId() { + return namespaceId; + } + + /** + * @param namespaceId + */ + public void setNamespaceId(Integer namespaceId) { + this.namespaceId = namespaceId; + } + + /** + * @return container_id + */ + public Integer getContainerId() { + return containerId; + } + + /** + * @param containerId + */ + public void setContainerId(Integer containerId) { + this.containerId = containerId; + } +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlLocation.java b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlLocation.java new file mode 100644 index 00000000..4cc66307 --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlLocation.java @@ -0,0 +1,121 @@ +package com.alipay.codequery.dal.mybatis.domain; + +import javax.persistence.*; + +@Table(name = "xml_location") +public class XmlLocation { + @Id + private Integer id; + + @Column(name = "start_line_number") + private Integer startLineNumber; + + @Column(name = "start_column_number") + private Integer startColumnNumber; + + @Column(name = "end_line_number") + private Integer endLineNumber; + + @Column(name = "end_column_number") + private Integer endColumnNumber; + + @Column(name = "file_id") + private Integer fileId; + + public XmlLocation(Integer id, Integer startLineNumber, Integer startColumnNumber, Integer endLineNumber, Integer endColumnNumber, Integer fileId) { + this.id = id; + this.startLineNumber = startLineNumber; + this.startColumnNumber = startColumnNumber; + this.endLineNumber = endLineNumber; + this.endColumnNumber = endColumnNumber; + this.fileId = fileId; + } + + public XmlLocation() { + super(); + } + + /** + * @return id + */ + public Integer getId() { + return id; + } + + /** + * @param id + */ + public void setId(Integer id) { + this.id = id; + } + + /** + * @return start_line_number + */ + public Integer getStartLineNumber() { + return startLineNumber; + } + + /** + * @param startLineNumber + */ + public void setStartLineNumber(Integer startLineNumber) { + this.startLineNumber = startLineNumber; + } + + /** + * @return start_column_number + */ + public Integer getStartColumnNumber() { + return startColumnNumber; + } + + /** + * @param startColumnNumber + */ + public void setStartColumnNumber(Integer startColumnNumber) { + this.startColumnNumber = startColumnNumber; + } + + /** + * @return end_line_number + */ + public Integer getEndLineNumber() { + return endLineNumber; + } + + /** + * @param endLineNumber + */ + public void setEndLineNumber(Integer endLineNumber) { + this.endLineNumber = endLineNumber; + } + + /** + * @return end_column_number + */ + public Integer getEndColumnNumber() { + return endColumnNumber; + } + + /** + * @param endColumnNumber + */ + public void setEndColumnNumber(Integer endColumnNumber) { + this.endColumnNumber = endColumnNumber; + } + + /** + * @return file_id + */ + public Integer getFileId() { + return fileId; + } + + /** + * @param fileId + */ + public void setFileId(Integer fileId) { + this.fileId = fileId; + } +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlNamespace.java b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlNamespace.java new file mode 100644 index 00000000..aaac7837 --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/domain/XmlNamespace.java @@ -0,0 +1,84 @@ +package com.alipay.codequery.dal.mybatis.domain; + +import javax.persistence.*; + +@Table(name = "xml_namespace") +public class XmlNamespace { + @Id + private Integer id; + + @Column(name = "prefix_name") + private String prefixName; + + private String url; + + @Column(name = "location_id") + private Integer locationId; + + public XmlNamespace(Integer id, String prefixName, String url, Integer locationId) { + this.id = id; + this.prefixName = prefixName; + this.url = url; + this.locationId = locationId; + } + + public XmlNamespace() { + super(); + } + + /** + * @return id + */ + public Integer getId() { + return id; + } + + /** + * @param id + */ + public void setId(Integer id) { + this.id = id; + } + + /** + * @return prefix_name + */ + public String getPrefixName() { + return prefixName; + } + + /** + * @param prefixName + */ + public void setPrefixName(String prefixName) { + this.prefixName = prefixName == null ? null : prefixName.trim(); + } + + /** + * @return url + */ + public String getUrl() { + return url; + } + + /** + * @param url + */ + public void setUrl(String url) { + this.url = url == null ? null : url.trim(); + } + + /** + * @return location_id + */ + public Integer getLocationId() { + return locationId; + } + + /** + * @param locationId + */ + public void setLocationId(Integer locationId) { + this.locationId = locationId; + } +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlAttributeMapper.java b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlAttributeMapper.java new file mode 100644 index 00000000..bc572b34 --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlAttributeMapper.java @@ -0,0 +1,8 @@ +package com.alipay.codequery.dal.mybatis.mapper; + +import com.alipay.codequery.dal.mybatis.domain.XmlAttribute; +import org.apache.ibatis.annotations.Select; +import tk.mybatis.mapper.common.Mapper; + +public interface XmlAttributeMapper extends Mapper { +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlCharacterMapper.java b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlCharacterMapper.java new file mode 100644 index 00000000..f4dabee2 --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlCharacterMapper.java @@ -0,0 +1,8 @@ +package com.alipay.codequery.dal.mybatis.mapper; + +import com.alipay.codequery.dal.mybatis.domain.XmlCharacter; +import org.apache.ibatis.annotations.Select; +import tk.mybatis.mapper.common.Mapper; + +public interface XmlCharacterMapper extends Mapper { +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlCommentMapper.java b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlCommentMapper.java new file mode 100644 index 00000000..c79bc889 --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlCommentMapper.java @@ -0,0 +1,8 @@ +package com.alipay.codequery.dal.mybatis.mapper; + +import com.alipay.codequery.dal.mybatis.domain.XmlComment; +import org.apache.ibatis.annotations.Select; +import tk.mybatis.mapper.common.Mapper; + +public interface XmlCommentMapper extends Mapper { +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlDtdMapper.java b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlDtdMapper.java new file mode 100644 index 00000000..6d64dd96 --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlDtdMapper.java @@ -0,0 +1,8 @@ +package com.alipay.codequery.dal.mybatis.mapper; + +import com.alipay.codequery.dal.mybatis.domain.XmlDtd; +import org.apache.ibatis.annotations.Select; +import tk.mybatis.mapper.common.Mapper; + +public interface XmlDtdMapper extends Mapper { +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlElementMapper.java b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlElementMapper.java new file mode 100644 index 00000000..71d9b210 --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlElementMapper.java @@ -0,0 +1,8 @@ +package com.alipay.codequery.dal.mybatis.mapper; + +import com.alipay.codequery.dal.mybatis.domain.XmlElement; +import org.apache.ibatis.annotations.Select; +import tk.mybatis.mapper.common.Mapper; + +public interface XmlElementMapper extends Mapper { +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlElementNameMapper.java b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlElementNameMapper.java new file mode 100644 index 00000000..63eae5d6 --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlElementNameMapper.java @@ -0,0 +1,8 @@ +package com.alipay.codequery.dal.mybatis.mapper; + +import com.alipay.codequery.dal.mybatis.domain.XmlElementName; +import org.apache.ibatis.annotations.Select; +import tk.mybatis.mapper.common.Mapper; + +public interface XmlElementNameMapper extends Mapper { +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlElementPrefixMapper.java b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlElementPrefixMapper.java new file mode 100644 index 00000000..2be46c5e --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlElementPrefixMapper.java @@ -0,0 +1,8 @@ +package com.alipay.codequery.dal.mybatis.mapper; + +import com.alipay.codequery.dal.mybatis.domain.XmlElementPrefix; +import org.apache.ibatis.annotations.Select; +import tk.mybatis.mapper.common.Mapper; + +public interface XmlElementPrefixMapper extends Mapper { +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlEncodingMapper.java b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlEncodingMapper.java new file mode 100644 index 00000000..606a1fc1 --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlEncodingMapper.java @@ -0,0 +1,8 @@ +package com.alipay.codequery.dal.mybatis.mapper; + +import com.alipay.codequery.dal.mybatis.domain.XmlEncoding; +import org.apache.ibatis.annotations.Select; +import tk.mybatis.mapper.common.Mapper; + +public interface XmlEncodingMapper extends Mapper { +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlFileMapper.java b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlFileMapper.java new file mode 100644 index 00000000..fcb046fe --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlFileMapper.java @@ -0,0 +1,8 @@ +package com.alipay.codequery.dal.mybatis.mapper; + +import com.alipay.codequery.dal.mybatis.domain.XmlFile; +import org.apache.ibatis.annotations.Select; +import tk.mybatis.mapper.common.Mapper; + +public interface XmlFileMapper extends Mapper { +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlHasNamespaceMapper.java b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlHasNamespaceMapper.java new file mode 100644 index 00000000..764bb294 --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlHasNamespaceMapper.java @@ -0,0 +1,8 @@ +package com.alipay.codequery.dal.mybatis.mapper; + +import com.alipay.codequery.dal.mybatis.domain.XmlHasNamespace; +import org.apache.ibatis.annotations.Select; +import tk.mybatis.mapper.common.Mapper; + +public interface XmlHasNamespaceMapper extends Mapper { +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlLocationMapper.java b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlLocationMapper.java new file mode 100644 index 00000000..c1a02b3f --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlLocationMapper.java @@ -0,0 +1,8 @@ +package com.alipay.codequery.dal.mybatis.mapper; + +import com.alipay.codequery.dal.mybatis.domain.XmlLocation; +import org.apache.ibatis.annotations.Select; +import tk.mybatis.mapper.common.Mapper; + +public interface XmlLocationMapper extends Mapper { +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlNamespaceMapper.java b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlNamespaceMapper.java new file mode 100644 index 00000000..d200678a --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/dal/mybatis/mapper/XmlNamespaceMapper.java @@ -0,0 +1,8 @@ +package com.alipay.codequery.dal.mybatis.mapper; + +import com.alipay.codequery.dal.mybatis.domain.XmlNamespace; +import org.apache.ibatis.annotations.Select; +import tk.mybatis.mapper.common.Mapper; + +public interface XmlNamespaceMapper extends Mapper { +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlAttributeModel.java b/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlAttributeModel.java new file mode 100644 index 00000000..de1bd150 --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlAttributeModel.java @@ -0,0 +1,34 @@ + +package com.alipay.codequery.model; + +import com.alipay.codequery.dal.mybatis.domain.XmlAttribute; +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.ToString; + + +@Data +@ToString +@AllArgsConstructor +public class XmlAttributeModel extends XmlLocatable { + private int id; + private int elementId; + private String name; + private String value; + private int index; + private int fileId; + + private XmlLocationModel location; + + public XmlAttribute extractAttr() { + XmlAttribute attribute = new XmlAttribute(); + attribute.setElementId(this.elementId); + attribute.setId(this.id); + attribute.setIndexOrder(this.index); + attribute.setLocationId(getLocation().getId()); + attribute.setName(this.name); + attribute.setValue(this.value); + return attribute; + } + +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlCharacterModel.java b/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlCharacterModel.java new file mode 100644 index 00000000..4f11e618 --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlCharacterModel.java @@ -0,0 +1,27 @@ + +package com.alipay.codequery.model; + +import com.alipay.codequery.dal.mybatis.domain.XmlCharacter; +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.ToString; + + +@Data +@ToString +@AllArgsConstructor +public class XmlCharacterModel extends XmlLocatable { + private int id; + private String text; + private int parentId; + private int index; + private int isCDATA; + private int fileId; + private XmlLocationModel location; + + public XmlCharacter extractCharacter() { + return new XmlCharacter(this.id, this.text, this.parentId, this.index, this.isCDATA, getLocation().getId()); + + } + +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlCommentModel.java b/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlCommentModel.java new file mode 100644 index 00000000..8a297286 --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlCommentModel.java @@ -0,0 +1,24 @@ + +package com.alipay.codequery.model; + +import com.alipay.codequery.dal.mybatis.domain.XmlComment; +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.ToString; + + +@Data +@AllArgsConstructor +@ToString +public class XmlCommentModel extends XmlLocatable { + private int id; + private String text; + private int parentId; + private int fileId; + private XmlLocationModel location; + + public XmlComment extractXmlComment() { + return new XmlComment(this.id, this.text, this.parentId, getLocation().getId()); + } + +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlDTDModel.java b/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlDTDModel.java new file mode 100644 index 00000000..2e4d7f78 --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlDTDModel.java @@ -0,0 +1,25 @@ + +package com.alipay.codequery.model; + +import com.alipay.codequery.dal.mybatis.domain.XmlDtd; +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.ToString; + + +@ToString +@Data +@AllArgsConstructor +public class XmlDTDModel extends XmlLocatable { + private int id; + private String root; + private String publicId; + private String systemId; + + private XmlLocationModel location; + + public XmlDtd extractXmlDtd() { + return new XmlDtd(this.id, this.root, this.publicId, this.systemId, getLocation().getId()); + } + +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlDeclarationModel.java b/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlDeclarationModel.java new file mode 100644 index 00000000..fd483cf9 --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlDeclarationModel.java @@ -0,0 +1,24 @@ + +package com.alipay.codequery.model; + +import com.alipay.codequery.dal.mybatis.domain.XmlEncoding; +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.ToString; + + +@ToString +@AllArgsConstructor +@Data +public class XmlDeclarationModel extends XmlLocatable { + + private int id; + private String version; + private String encoding; + private XmlLocationModel location; + + public XmlEncoding extraceXmlEncoding() { + return new XmlEncoding(this.id, this.encoding); + } + +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlElementModel.java b/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlElementModel.java new file mode 100644 index 00000000..16b4d95a --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlElementModel.java @@ -0,0 +1,63 @@ + +package com.alipay.codequery.model; + +import com.alipay.codequery.dal.mybatis.domain.XmlElement; +import com.alipay.codequery.dal.mybatis.domain.XmlElementName; +import com.alipay.codequery.dal.mybatis.domain.XmlElementPrefix; +import com.alipay.codequery.util.IdGenerator; +import lombok.Data; +import lombok.NonNull; +import lombok.RequiredArgsConstructor; +import lombok.ToString; +import org.apache.commons.lang3.StringUtils; + +import java.util.List; + + +@RequiredArgsConstructor +@ToString +@Data +public class XmlElementModel extends XmlLocatable { + @NonNull + private Integer id; + @NonNull + private String prefix; + @NonNull + private String name; + @NonNull + private Integer parentId; + @NonNull + private Integer index; + @NonNull + private Integer fileId; + @NonNull + private XmlLocationModel location; + + @NonNull + private List attributes; + @NonNull + private List nsList; + + public XmlElement extractElement() { + return new XmlElement(this.id, this.parentId, this.index, getLocation().getId()); + } + + public XmlElementName extractElementName(){ + return new XmlElementName(IdGenerator.nextID(),this.name,this.id,getLocation().getId()); + } + + public XmlElementPrefix extractElementPrefix(){ + if(StringUtils.isNotEmpty(this.prefix)){ + return new XmlElementPrefix(IdGenerator.nextID(),this.prefix,this.id,getLocation().getId()); + }else{ + return null; + } + + } + + private int childIndexCount = 0; + + public int nextIndex() { + return childIndexCount++; + } +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlHasNamespace.java b/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlHasNamespace.java new file mode 100644 index 00000000..43a1ccc4 --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlHasNamespace.java @@ -0,0 +1,22 @@ + +package com.alipay.codequery.model; + +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.ToString; + + +@Data +@AllArgsConstructor +@ToString +public class XmlHasNamespace { + private int id; + private int elementId; + private int namespaceId; + private int containerId; + + + public XmlHasNamespace extractXmlHasNameSpace() { + return new XmlHasNamespace(this.id, this.elementId, this.namespaceId, this.containerId); + } +} diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlLocatable.java b/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlLocatable.java new file mode 100644 index 00000000..28f22357 --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlLocatable.java @@ -0,0 +1,13 @@ + +package com.alipay.codequery.model; +import com.alipay.codequery.dal.mybatis.domain.XmlLocation; + + +public abstract class XmlLocatable { + public abstract XmlLocationModel getLocation(); + + public XmlLocation extractLocation() { + return getLocation().extractLocation(); + } + +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlLocationModel.java b/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlLocationModel.java new file mode 100644 index 00000000..c2759893 --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlLocationModel.java @@ -0,0 +1,34 @@ + +package com.alipay.codequery.model; + +import com.alipay.codequery.dal.mybatis.domain.XmlLocation; +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.ToString; + + +@ToString +@Data +@AllArgsConstructor +@NoArgsConstructor +public class XmlLocationModel { + private int id; + private int startLineNumber; + private int startColumnNumber; + private int endLineNumber; + private int endColumnNumber; + private int fileId; + + public XmlLocation extractLocation() { + XmlLocation location = new XmlLocation(); + location.setId(this.id); + location.setFileId(fileId); + location.setStartLineNumber(this.startLineNumber); + location.setStartColumnNumber(this.startColumnNumber); + location.setEndLineNumber(this.endLineNumber); + location.setEndColumnNumber(this.endColumnNumber); + return location; + } + +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlNamespaceModel.java b/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlNamespaceModel.java new file mode 100644 index 00000000..6f1093b8 --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/model/XmlNamespaceModel.java @@ -0,0 +1,28 @@ + +package com.alipay.codequery.model; + +import com.alipay.codequery.dal.mybatis.domain.XmlNamespace; +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.ToString; + + + +@Data +@AllArgsConstructor +@ToString +public class XmlNamespaceModel extends XmlLocatable { + private int id; + private String prefixName; + private String URI; + private int fileId; + private int elementId; + private XmlLocationModel location; + private XmlHasNamespace xmlHasNamespace; + + public XmlNamespace extractNs() { + return new XmlNamespace(this.id, this.prefixName, this.URI, getLocation().getId()); + } + + +} diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/stax/StaxCorefExtractor.java b/language/xml/extractor/src/main/java/com/alipay/codequery/stax/StaxCorefExtractor.java new file mode 100644 index 00000000..db99483a --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/stax/StaxCorefExtractor.java @@ -0,0 +1,222 @@ +package com.alipay.codequery.stax; + +import com.alipay.codequery.Extractor; +import com.alipay.codequery.dal.mybatis.domain.XmlFile; +import com.alipay.codequery.model.*; +import com.alipay.codequery.util.CorefStorage; + +import com.alipay.codequery.util.IdGenerator; +import com.ctc.wstx.api.WstxInputProperties; +import com.ctc.wstx.exc.WstxEOFException; +import com.ctc.wstx.sr.BasicStreamReader; +import com.ctc.wstx.stax.WstxInputFactory; +import com.sun.org.apache.xerces.internal.impl.Constants; +import lombok.SneakyThrows; +import org.apache.commons.lang3.StringUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.codehaus.stax2.XMLStreamLocation2; + +import javax.xml.stream.*; +import java.io.*; +import java.util.ArrayList; +import java.util.List; +import java.util.Stack; + + +public class StaxCorefExtractor { + private static final Logger logger = LogManager.getLogger(StaxCorefExtractor.class); + private int idcount = 0; + private Stack elementStack = new Stack(); + private BasicStreamReader reader; + private CorefStorage corefStorage; + private File file; + private Integer fileId; + private String root; + + @SneakyThrows + public StaxCorefExtractor(File file, CorefStorage corefStorage, String rootPath) { + WstxInputFactory factory = new WstxInputFactory(); + factory.setProperty(Constants.ZEPHYR_PROPERTY_PREFIX + Constants.STAX_REPORT_CDATA_EVENT, Boolean.TRUE); + factory.setProperty(XMLInputFactory.SUPPORT_DTD, false); + if (file.getName().endsWith(Extractor.AXML_EXT)) { + configParserForAXML(factory); + } + this.corefStorage = corefStorage; + this.reader = (BasicStreamReader) factory.createXMLStreamReader(new FileReader(file)); + this.file = file; + this.root = rootPath; + reader.getInputElementStack().addNsBinding("a", "my"); + } + + public void configParserForAXML(WstxInputFactory factory) { + factory.getConfig().doSupportAliXMLExt(true); + factory.getConfig().doSupportNamespaces(false); + factory.getConfig().setInputParsingMode(WstxInputProperties.PARSING_MODE_DOCUMENTS); + factory.getConfig().doReplaceEntityRefs(false); + } + + public void parse() throws XMLStreamException { + this.fileId = IdGenerator.nextID(); + handleXmlFile(file); + boolean haveException = false; + while (reader.hasNext()) { + switch (reader.getEventType()) { + case XMLStreamConstants.START_ELEMENT: + handleStartElement(); + break; + case XMLStreamConstants.END_ELEMENT: + handleEndElement(); + break; + case XMLStreamConstants.SPACE: + break; + case XMLStreamConstants.CHARACTERS: + // Fixed: when parsing with unexpected character, like '&', add catch clause to catch the exception, so the parsing will continue. + try { + handleChar(false); + }catch (Exception e) { + logger.error("Extraction failed, error message:{} on file {}", e.getMessage(), file.getAbsolutePath()); + } + break; + case XMLStreamConstants.PROCESSING_INSTRUCTION: + break; + case XMLStreamConstants.CDATA: + handleChar(true); + break; + case XMLStreamConstants.COMMENT: + handleComment(); + break; + case XMLStreamConstants.ENTITY_REFERENCE: + break; + case XMLStreamConstants.START_DOCUMENT: + handleStartDocument(); + break; + case XMLStreamConstants.DTD: + hanldeDTD(); + break; + case XMLStreamConstants.NAMESPACE: + break; + case XMLStreamConstants.END_DOCUMENT: + break; + default: + throw new IllegalStateException("Unexpected value: " + reader.getEventType()); + } + try { + reader.next(); + } catch (Exception e) { + haveException = true; + logger.error("error message:{} on file {}", e.getMessage(), file.getAbsolutePath()); + if (e instanceof WstxEOFException) { + break; + } + } + } + corefStorage.store(); + if (haveException) { + throw new RuntimeException("Parse error occurred"); + } + } + + private void handleXmlFile(File file) { + XmlFile xmlFile = new XmlFile(); + xmlFile.setId(this.fileId); + xmlFile.setFileName(file.getName()); + xmlFile.setRelativePath(file.getAbsolutePath().substring(root.length() + 1)); + corefStorage.addFile(xmlFile); + } + + private void handleChar(boolean isCDATA) throws XMLStreamException { + int start = reader.getTextStart(); + int length = reader.getTextLength(); + String text = new String(reader.getTextCharacters(), start, length); + if (StringUtils.isNotBlank(text)) { + + XMLStreamLocation2 startLocation = reader.getStartLocation(); + XMLStreamLocation2 endLocation = reader.getEndLocation(); + XmlLocationModel location = new XmlLocationModel(IdGenerator.nextID(), startLocation.getLineNumber(), + startLocation.getLineNumber(), + endLocation.getLineNumber(), endLocation.getLineNumber(), this.fileId); + + XmlCharacterModel xmlCharacterModel = new XmlCharacterModel(IdGenerator.nextID(), text, this.elementStack.peek().getId(), -1, + isCDATA ? 1 : 0, -1, location); + corefStorage.addCharacter(xmlCharacterModel); + } + } + + private void handleComment() throws XMLStreamException { + String text = reader.getText(); + + int parentId = this.elementStack.isEmpty() ? -1 : this.elementStack.peek().getId(); + XmlLocationModel location = new XmlLocationModel(IdGenerator.nextID(), reader.getStartLocation().getLineNumber(), + reader.getStartLocation().getColumnNumber(), + reader.getEndLocation().getLineNumber(), reader.getEndLocation().getColumnNumber(), this.fileId); + XmlCommentModel comment = new XmlCommentModel(IdGenerator.nextID(), text, parentId, -1, location); + corefStorage.addComment(comment); + } + + private void hanldeDTD() throws XMLStreamException { + XmlLocationModel location = new XmlLocationModel(IdGenerator.nextID(), reader.getStartLocation().getLineNumber(), + reader.getStartLocation().getColumnNumber() + , reader.getEndLocation().getLineNumber(), reader.getEndLocation().getColumnNumber(), this.fileId); + XmlDTDModel dtd = new XmlDTDModel(IdGenerator.nextID(), reader.getDTDRootName(), reader.getDTDPublicId(), reader.getDTDSystemId(), + location); + corefStorage.addDTD(dtd); + } + + private void handleStartElement() { + XmlLocationModel location = new XmlLocationModel(); + XMLStreamLocation2 startLocation = ((BasicStreamReader) reader).getStartLocation(); + location.setId(IdGenerator.nextID()); + location.setStartLineNumber(startLocation.getLineNumber()); + String qName = reader.getLocalName(); + location.setStartColumnNumber(startLocation.getColumnNumber()); + String prefix = reader.getPrefix(); + int elementId = IdGenerator.nextID(); + int parentId = this.elementStack.isEmpty() ? -1 : this.elementStack.peek().getId(); + int idx = this.elementStack.isEmpty() ? 0 : this.elementStack.peek().nextIndex(); + location.setFileId(this.fileId); + List nsList = new ArrayList<>(); + for (int i = 0; i < reader.getNamespaceCount(); i++) { + String namespacePrefix = reader.getNamespacePrefix(i).isEmpty() ? "-1" : reader.getNamespacePrefix(i); + String namespaceURI = reader.getNamespaceURI(i); + int namespaceId = IdGenerator.nextID(); + XmlHasNamespace hasNamespace = new XmlHasNamespace(IdGenerator.nextID(), elementId, namespaceId, this.fileId); + XmlNamespaceModel ns = new XmlNamespaceModel(namespaceId, namespacePrefix, namespaceURI, -1, elementId, location, hasNamespace); + nsList.add(ns); + } + List list = new ArrayList<>(); + for (int i = 0; i < reader.getAttributeCount(); i++) { + String attrLocalName = reader.getAttributeLocalName(i); + String attrPrefix = reader.getAttributePrefix(i); + String attrName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix.concat(":").concat(attrLocalName); + String attrValue = reader.getAttributeValue(i); + + XmlAttributeModel attribute = new XmlAttributeModel(IdGenerator.nextID(), elementId, attrName, attrValue, i, -1, location); + list.add(attribute); + } + XmlElementModel element = new XmlElementModel(elementId, prefix, qName, parentId, idx, -1, location, list, nsList); + this.elementStack.push(element); + } + + private void handleEndElement() throws XMLStreamException { + XmlElementModel elementModel = this.elementStack.pop(); + XMLStreamLocation2 endLocation = ((BasicStreamReader) reader).getEndLocation(); + elementModel.getLocation().setEndLineNumber(endLocation.getLineNumber()); + elementModel.getLocation().setEndColumnNumber(endLocation.getColumnNumber()); + corefStorage.addElement(elementModel); + + //XmlElementMapper mapper = sqlSession.getMapper(XmlElementMapper.class); + //mapper.insert(elementModel.extractElement()); + } + + private void handleStartDocument() throws XMLStreamException { + String version = reader.getVersion(); + String encoding = reader.getCharacterEncodingScheme(); + XmlLocationModel location = new XmlLocationModel(IdGenerator.nextID(), reader.getStartLocation().getLineNumber(), + reader.getStartLocation().getColumnNumber() + , reader.getEndLocation().getLineNumber(), reader.getEndLocation().getColumnNumber(), this.fileId); + XmlDeclarationModel dtd = new XmlDeclarationModel(IdGenerator.nextID(), version, encoding, location); + corefStorage.addXmlEncoding(dtd); + + } +} diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/util/CorefStorage.java b/language/xml/extractor/src/main/java/com/alipay/codequery/util/CorefStorage.java new file mode 100644 index 00000000..51bbef89 --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/util/CorefStorage.java @@ -0,0 +1,142 @@ +package com.alipay.codequery.util; + +import com.alipay.codequery.dal.mybatis.domain.*; +import com.alipay.codequery.dal.mybatis.mapper.*; +import com.alipay.codequery.model.*; +import com.alipay.codequery.model.XmlNamespaceModel; +import org.apache.ibatis.datasource.DataSourceFactory; +import org.apache.ibatis.datasource.pooled.PooledDataSourceFactory; +import org.apache.ibatis.mapping.Environment; +import org.apache.ibatis.session.Configuration; +import org.apache.ibatis.session.SqlSession; +import org.apache.ibatis.session.SqlSessionFactory; +import org.apache.ibatis.session.SqlSessionFactoryBuilder; +import org.apache.ibatis.transaction.TransactionFactory; +import org.apache.ibatis.transaction.jdbc.JdbcTransactionFactory; +import tk.mybatis.mapper.common.BaseMapper; +import tk.mybatis.mapper.entity.Config; +import tk.mybatis.mapper.mapperhelper.MapperHelper; + +import javax.sql.DataSource; +import java.io.*; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Properties; + + +public class CorefStorage { + + private final SqlSession session; + public String dbDir; + + public CorefStorage(String dbDir) throws IOException { + this.dbDir = dbDir.endsWith(File.separator) ? dbDir : dbDir + File.separator; + Properties properties = new Properties(); + properties.setProperty("driver", "org.sqlite.JDBC"); + copyDBFiles(dbDir); + + properties.setProperty("url", "jdbc:sqlite:" + this.dbDir + "coref_xml_src.db"); + properties.setProperty("username", ""); + properties.setProperty("password", ""); + DataSourceFactory factory = new PooledDataSourceFactory(); + DataSource dataSource = factory.getDataSource(); + factory.setProperties(properties); + TransactionFactory transactionFactory = new JdbcTransactionFactory(); + Environment environment = new Environment("development", transactionFactory, dataSource); + Configuration configuration = new Configuration(environment); + configuration.addMappers("com.alipay.codequery.dal.mybatis.mapper"); + MapperHelper mapperHelper = new MapperHelper(); + Config config = new Config(); + config.setIDENTITY("MYSQL"); + config.setEnableMethodAnnotation(true); + config.setNotEmpty(true); + config.setCheckExampleEntityClass(true); + config.setUseSimpleType(true); + config.setEnumAsSimpleType(true); + config.setWrapKeyword("`{0}`"); + mapperHelper.setConfig(config); + + SqlSessionFactory sqlSessionFactory = new SqlSessionFactoryBuilder().build(configuration); + session = sqlSessionFactory.openSession(); + mapperHelper.processConfiguration(session.getConfiguration()); + + } + + private void copyDBFiles(String dbDir) throws IOException { + Path dir = Files.createDirectories(Paths.get(dbDir)); + Path destPath = Paths.get(dir.toFile() + File.separator + "coref_xml_src.db"); + if (destPath.toFile().exists()) { + Files.delete(destPath); + } + File destFile = Files.createFile(destPath).toFile(); + InputStream in = this.getClass().getClassLoader().getResourceAsStream("coref_xml_src.db"); + + OutputStream os = new FileOutputStream(destFile); + int bytesRead = 0; + byte[] buffer = new byte[8192]; + while ((bytesRead = in.read(buffer, 0, 8192)) != -1) { + os.write(buffer, 0, bytesRead); + } + os.close(); + in.close(); + } + + public void addFile(XmlFile xmlFile) { + insert(XmlFileMapper.class, xmlFile); + } + + public void addElement(XmlElementModel elementModel) { + + insert(XmlElementMapper.class, elementModel.extractElement()); + insertLocation(elementModel.extractLocation()); + insert(XmlElementNameMapper.class, elementModel.extractElementName()); + if (elementModel.extractElementPrefix() != null) { + insert(XmlElementPrefixMapper.class, elementModel.extractElementPrefix()); + } + + for (XmlAttributeModel attribute : elementModel.getAttributes()) { + insert(XmlAttributeMapper.class, attribute.extractAttr()); + } + + for (XmlNamespaceModel xmlNamespaceModel : elementModel.getNsList()) { + insert(XmlNamespaceMapper.class, xmlNamespaceModel.extractNs()); + insert(XmlHasNamespaceMapper.class, xmlNamespaceModel.getXmlHasNamespace().extractXmlHasNameSpace()); + } + + } + + public void addComment(XmlCommentModel xmlCommentModel) { + insert(XmlCommentMapper.class, xmlCommentModel.extractXmlComment()); + insertLocation(xmlCommentModel.extractLocation()); + } + + public void addCharacter(XmlCharacterModel character) { + insert(XmlCharacterMapper.class, character.extractCharacter()); + insertLocation(character.extractLocation()); + } + + public void addDTD(XmlDTDModel dtd) { + insert(XmlDtdMapper.class, dtd.extractXmlDtd()); + insertLocation(dtd.extractLocation()); + } + + public void addXmlEncoding(XmlDeclarationModel encoding) { + insert(XmlEncodingMapper.class, encoding.extraceXmlEncoding()); + + } + + private void insert(Class mapperClass, Object object) { + BaseMapper mapper = (BaseMapper) session.getMapper(mapperClass); + mapper.insert(object); + } + + public void store() { + session.commit(); + } + + private void insertLocation(XmlLocation location) { + insert(XmlLocationMapper.class, location); + } + +} diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/util/IdGenerator.java b/language/xml/extractor/src/main/java/com/alipay/codequery/util/IdGenerator.java new file mode 100644 index 00000000..68a12fe7 --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/util/IdGenerator.java @@ -0,0 +1,10 @@ +package com.alipay.codequery.util; + + +public class IdGenerator { + private static int idCount = 0; + + public static int nextID() { + return idCount++; + } +} \ No newline at end of file diff --git a/language/xml/extractor/src/main/java/com/alipay/codequery/util/LoggerUtil.java b/language/xml/extractor/src/main/java/com/alipay/codequery/util/LoggerUtil.java new file mode 100644 index 00000000..4cfffdac --- /dev/null +++ b/language/xml/extractor/src/main/java/com/alipay/codequery/util/LoggerUtil.java @@ -0,0 +1,37 @@ +package com.alipay.codequery.util; + +import org.apache.logging.log4j.Level; +import org.apache.logging.log4j.core.appender.ConsoleAppender; +import org.apache.logging.log4j.core.config.Configurator; +import org.apache.logging.log4j.core.config.builder.api.AppenderComponentBuilder; +import org.apache.logging.log4j.core.config.builder.api.ConfigurationBuilder; +import org.apache.logging.log4j.core.config.builder.api.ConfigurationBuilderFactory; +import org.apache.logging.log4j.core.config.builder.api.RootLoggerComponentBuilder; +import org.apache.logging.log4j.core.config.builder.impl.BuiltConfiguration; + +public class LoggerUtil { + public static void initLogger(Level level, String pattern) { + ConfigurationBuilder builder = ConfigurationBuilderFactory.newConfigurationBuilder(); + + builder.setStatusLevel(level); + // naming the logger configuration + builder.setConfigurationName("DefaultLogger"); + + // create a console appender + AppenderComponentBuilder appenderBuilder = builder.newAppender("Console", "CONSOLE") + .addAttribute("target", ConsoleAppender.Target.SYSTEM_OUT); + // add a layout like pattern, json etc + appenderBuilder.add(builder.newLayout("PatternLayout") + .addAttribute("pattern", pattern)); + RootLoggerComponentBuilder rootLogger = builder.newRootLogger(level); + rootLogger.add(builder.newAppenderRef("Console")); + + builder.add(appenderBuilder); + builder.add(rootLogger); + Configurator.reconfigure(builder.build()); + } + + public static void initLogger(Level logLevel) { + initLogger(logLevel, "%d %p %c [%t] (%F:%L) %m%n"); + } +} diff --git a/language/xml/extractor/src/main/resources/coref_xml_src.db b/language/xml/extractor/src/main/resources/coref_xml_src.db new file mode 100644 index 00000000..7c268674 Binary files /dev/null and b/language/xml/extractor/src/main/resources/coref_xml_src.db differ diff --git a/language/xml/extractor/src/main/resources/dbschema.sql b/language/xml/extractor/src/main/resources/dbschema.sql new file mode 100644 index 00000000..1a24391e --- /dev/null +++ b/language/xml/extractor/src/main/resources/dbschema.sql @@ -0,0 +1,78 @@ +CREATE TABLE location ( + id INTEGER NOT NULL PRIMARY KEY , + start_line_number integer(10), + start_column_number integer(10), + end_line_number integer(10), + end_column_number integer(10), + containerid integer(10) NOT NULL); +CREATE TABLE xml_attribute ( + id INTEGER NOT NULL PRIMARY KEY , + element_id integer(10) NOT NULL, + name varchar(55), + value varchar(55), + index_order integer(10), + location_id integer(10) NOT NULL); +CREATE TABLE xml_character ( + id INTEGER NOT NULL PRIMARY KEY , + text varchar(255), + parentid integer(10) NOT NULL, + index_order integer(10), + is_cdata integer(10), + location_id integer(10) NOT NULL); +CREATE TABLE xml_comment ( + id INTEGER NOT NULL PRIMARY KEY , + text varchar(255), + parent_id integer(10) NOT NULL, + location_id integer(10) NOT NULL); +CREATE TABLE xml_dtd ( + id INTEGER NOT NULL PRIMARY KEY , + root varchar(55), + public_id varchar(55), + system_id varchar(55), + location_id integer(10) NOT NULL); +CREATE TABLE xml_element ( + id INTEGER NOT NULL PRIMARY KEY , + name blob, + parent_id integer(10) NOT NULL, + index_order integer(10), + location_id integer(10) NOT NULL); +CREATE TABLE xml_encoding ( + id integer(10) NOT NULL PRIMARY KEY, + encoding varchar(50) + ); +CREATE TABLE xml_has_namespace ( + id INTEGER NOT NULL PRIMARY KEY, + element_id integer(10) NOT NULL, + namespace_id integer(10) NOT NULL, + container_id integer(10) NOT NULL + ); +CREATE TABLE xml_namespace ( + id INTEGER NOT NULL PRIMARY KEY , + prefix_name varchar(55), + url varchar(55), + location_id integer(10) NOT NULL); + +CREATE TABLE Container ( + id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + fileid integer(10), + folderid integer(10), + programid integer(10) NOT NULL); +CREATE TABLE "File" ( + id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + abspath varchar(255), + relpath varchar(255), + extension varchar(40), + fileidentifier varchar(50), + Folderid integer(10) NOT NULL, + Containerid integer(10) NOT NULL); +CREATE TABLE Folder ( + id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + abspath varchar(255), + folderidentifier varchar(50)); +CREATE TABLE Folder_Container ( + Folderid integer(10) NOT NULL, + Containerid integer(10) NOT NULL, + PRIMARY KEY (Folderid, + Containerid), + FOREIGN KEY(Containerid) REFERENCES Container(id), + FOREIGN KEY(Folderid) REFERENCES Folder(id)); \ No newline at end of file diff --git a/language/xml/extractor/src/main/resources/generatorConfig.xml b/language/xml/extractor/src/main/resources/generatorConfig.xml new file mode 100644 index 00000000..634a1914 --- /dev/null +++ b/language/xml/extractor/src/main/resources/generatorConfig.xml @@ -0,0 +1,71 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+
+
+
+
+
+
+
+ + + \ No newline at end of file