diff --git a/.gitignore b/.gitignore
index cb67ab2..2e90d6f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,12 +1,12 @@
# Models
-models/*
+**/models/*
# Datasets
-java_files/*
-text_arff/*
+**/java_files/*
+**/text_arff/*
# Weka files
-weka_files/*
+**/weka_files/*
# Keep readme files
!**/README.md
diff --git a/JavaExtractor/JPredict/target/JavaExtractor-0.0.1-SNAPSHOT.jar b/JavaExtractor/JPredict/target/JavaExtractor-0.0.1-SNAPSHOT.jar
deleted file mode 100644
index 2b6ac72..0000000
Binary files a/JavaExtractor/JPredict/target/JavaExtractor-0.0.1-SNAPSHOT.jar and /dev/null differ
diff --git a/JavaExtractor/JPredict/target/classes/JavaExtractor/App.class b/JavaExtractor/JPredict/target/classes/JavaExtractor/App.class
deleted file mode 100644
index b75a6b8..0000000
Binary files a/JavaExtractor/JPredict/target/classes/JavaExtractor/App.class and /dev/null differ
diff --git a/JavaExtractor/JPredict/target/classes/JavaExtractor/Common/CommandLineValues.class b/JavaExtractor/JPredict/target/classes/JavaExtractor/Common/CommandLineValues.class
deleted file mode 100644
index ef75349..0000000
Binary files a/JavaExtractor/JPredict/target/classes/JavaExtractor/Common/CommandLineValues.class and /dev/null differ
diff --git a/JavaExtractor/JPredict/target/classes/JavaExtractor/Common/Common$1.class b/JavaExtractor/JPredict/target/classes/JavaExtractor/Common/Common$1.class
deleted file mode 100644
index d96d9df..0000000
Binary files a/JavaExtractor/JPredict/target/classes/JavaExtractor/Common/Common$1.class and /dev/null differ
diff --git a/JavaExtractor/JPredict/target/classes/JavaExtractor/Common/Common$2.class b/JavaExtractor/JPredict/target/classes/JavaExtractor/Common/Common$2.class
deleted file mode 100644
index 7c0940c..0000000
Binary files a/JavaExtractor/JPredict/target/classes/JavaExtractor/Common/Common$2.class and /dev/null differ
diff --git a/JavaExtractor/JPredict/target/classes/JavaExtractor/Common/Common$3.class b/JavaExtractor/JPredict/target/classes/JavaExtractor/Common/Common$3.class
deleted file mode 100644
index 7cddbd9..0000000
Binary files a/JavaExtractor/JPredict/target/classes/JavaExtractor/Common/Common$3.class and /dev/null differ
diff --git a/JavaExtractor/JPredict/target/classes/JavaExtractor/Common/Common.class b/JavaExtractor/JPredict/target/classes/JavaExtractor/Common/Common.class
deleted file mode 100644
index c61e9b6..0000000
Binary files a/JavaExtractor/JPredict/target/classes/JavaExtractor/Common/Common.class and /dev/null differ
diff --git a/JavaExtractor/JPredict/target/classes/JavaExtractor/Common/MethodContent.class b/JavaExtractor/JPredict/target/classes/JavaExtractor/Common/MethodContent.class
deleted file mode 100644
index 8c5a874..0000000
Binary files a/JavaExtractor/JPredict/target/classes/JavaExtractor/Common/MethodContent.class and /dev/null differ
diff --git a/JavaExtractor/JPredict/target/classes/JavaExtractor/ExtractFeaturesTask.class b/JavaExtractor/JPredict/target/classes/JavaExtractor/ExtractFeaturesTask.class
deleted file mode 100644
index 3468735..0000000
Binary files a/JavaExtractor/JPredict/target/classes/JavaExtractor/ExtractFeaturesTask.class and /dev/null differ
diff --git a/JavaExtractor/JPredict/target/classes/JavaExtractor/FeatureExtractor.class b/JavaExtractor/JPredict/target/classes/JavaExtractor/FeatureExtractor.class
deleted file mode 100644
index 7e6dbe6..0000000
Binary files a/JavaExtractor/JPredict/target/classes/JavaExtractor/FeatureExtractor.class and /dev/null differ
diff --git a/JavaExtractor/JPredict/target/classes/JavaExtractor/FeaturesEntities/ProgramFeatures.class b/JavaExtractor/JPredict/target/classes/JavaExtractor/FeaturesEntities/ProgramFeatures.class
deleted file mode 100644
index 026ea7a..0000000
Binary files a/JavaExtractor/JPredict/target/classes/JavaExtractor/FeaturesEntities/ProgramFeatures.class and /dev/null differ
diff --git a/JavaExtractor/JPredict/target/classes/JavaExtractor/FeaturesEntities/ProgramNode.class b/JavaExtractor/JPredict/target/classes/JavaExtractor/FeaturesEntities/ProgramNode.class
deleted file mode 100644
index 54e83de..0000000
Binary files a/JavaExtractor/JPredict/target/classes/JavaExtractor/FeaturesEntities/ProgramNode.class and /dev/null differ
diff --git a/JavaExtractor/JPredict/target/classes/JavaExtractor/FeaturesEntities/ProgramRelation.class b/JavaExtractor/JPredict/target/classes/JavaExtractor/FeaturesEntities/ProgramRelation.class
deleted file mode 100644
index 2eb40ba..0000000
Binary files a/JavaExtractor/JPredict/target/classes/JavaExtractor/FeaturesEntities/ProgramRelation.class and /dev/null differ
diff --git a/JavaExtractor/JPredict/target/classes/JavaExtractor/FeaturesEntities/Property.class b/JavaExtractor/JPredict/target/classes/JavaExtractor/FeaturesEntities/Property.class
deleted file mode 100644
index e7a1b6a..0000000
Binary files a/JavaExtractor/JPredict/target/classes/JavaExtractor/FeaturesEntities/Property.class and /dev/null differ
diff --git a/JavaExtractor/JPredict/target/classes/JavaExtractor/Visitors/FunctionVisitor.class b/JavaExtractor/JPredict/target/classes/JavaExtractor/Visitors/FunctionVisitor.class
deleted file mode 100644
index 70c6d53..0000000
Binary files a/JavaExtractor/JPredict/target/classes/JavaExtractor/Visitors/FunctionVisitor.class and /dev/null differ
diff --git a/JavaExtractor/JPredict/target/classes/JavaExtractor/Visitors/LeavesCollectorVisitor.class b/JavaExtractor/JPredict/target/classes/JavaExtractor/Visitors/LeavesCollectorVisitor.class
deleted file mode 100644
index ec51674..0000000
Binary files a/JavaExtractor/JPredict/target/classes/JavaExtractor/Visitors/LeavesCollectorVisitor.class and /dev/null differ
diff --git a/README.md b/README.md
index a7a2e56..7c46715 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# Obsucated code2vec: Improving Generalisation by Hiding Information
+# Obsucated code2vec: Reducing Model Bias by Hiding Information
![Overall project view](img/overall.png)
@@ -11,18 +11,37 @@ All of the model-related code (`common.py`, `model.py`, `PathContextReader.py`)
All models/datasets are on the paper google drive folder
https://drive.google.com/drive/u/1/folders/1CXgSXKf292BTlryASui2kBvYvJSvFnWN
+## Requirements
+- Java 8+
+- Python 3
+
+## Usage - Obfuscator
+These steps should all be run from within the `java-obfuscator/` directory.
+1. Locate a folder of `.java` files (e.g., from the [code2seq](https://github.com/tech-srl/code2seq) repository)
+2. Alter the input and output directories in `obfs-script.sh`, as well as the number of threads of your machine. If you're running this on a particularly large folder (e.g., millions of files) then you may need to increase the `NUM_PARTITIONS` to 3 or 4, otherwise memory issues can occur, grinding the obfuscator to a near halt.
+3. Run `obfs-script.sh` i.e. `$ source obfs-script.sh`
+
+This will result in a new obfuscated folder of `.java` files, that can be used to train a new obfuscated code2vec model (or any model that performs learning from source code for that matter).
+
## Usage - Dataset Pipeline
![Dataset Pipeline View](img/pipeline.png)
+These steps will convert a dataset of `.java` files into a numerical form (`.arff` by default), that can then be used with any standard WEKA classifier.
+
+These steps should all be run from within the `pipeline/` directory of this repository.
To run the dataset pipeline and create class-level embeddings for a dataset of Java files:
+1. `cd pipeline`
+2. `pip install -r requirements.txt`
1. Download a `.java` dataset (from the datasets supplied or your own) and put in the `java_files/` directory
2. Download a code2vec model checkpoint and put the checkpoint folder in the `models/` directory
-3. Change the paths and definitions in `model_defs.py` and number of models in `create_datasets.sh` to match your setup
-4. Run `create_datasets.sh`. This will loop through each model and create class-level embeddings for the supplied datasets. The resulting datasets will be in `.arff` format in the `weka_files/` folder
+3. Change the paths and definitions in `model_defs.py` and number of models in `scripts/create_datasets.sh` to match your setup
+4. Run `create_datasets.sh` (`source scripts/create_datasets.sh`). This will loop through each model and create class-level embeddings for the supplied datasets. The resulting datasets will be in `.arff` format in the `weka_files/` folder.
+
+You can now perform class-level classification on the dataset using any off-the-shelf classifier.
### Config
-By default the pipeline will use the full range of values for each parameter, which creates a huge number of resulting `.arff` datasets (>1000). To reduce the number of these, remove (or comment out) some of the items in the arrays in `reduction_methods.py` and `selection_methods.py` (at the end of the file). Our experiments showed that the `SelectAll` selection method and `NoReduction` reduction method performed best in most cases so you may want to keep only these.
+By default the pipeline will use the full range of values for each parameter, which creates a huge number of resulting `.arff` datasets (>1000). To reduce the number of these, remove (or comment out) some of the items in the arrays in `reduction_methods.py` and `selection_methods.py` (at the end of the file). Our experiments showed that the `SelectAll` selection method and `NoReduction` reduction method performed best in most cases so you may want to just keep these.
## Datasets
@@ -52,6 +71,8 @@ The `.java` files are all [available for download](https://drive.google.com/driv
13 categories, 1062 instances
+This dataset was collected using the [github-scraper](https://github.com/basedrhys/github-scraper) python tool, which makes it easy to download specific types of files from github repos (`.java` files in this case).
+
[Google Drive Link](https://drive.google.com/open?id=1IC0Nxeew73p9yvfhKcKH-6mxW8nHGyfn)
[Embedding Visualisation](http://projector.tensorflow.org/?config=https://gist.githubusercontent.com/basedrhys/36fcd8653f2d759a8f1b03e56502a58e/raw/7d2ddef1c219d4fad7a49cc2c978d1ff4e25e5f1/author_config.json)
diff --git a/java-tool.jar b/java-tool.jar
deleted file mode 100644
index 0d58838..0000000
Binary files a/java-tool.jar and /dev/null differ
diff --git a/ClassPreprocessor.py b/pipeline/ClassPreprocessor.py
similarity index 100%
rename from ClassPreprocessor.py
rename to pipeline/ClassPreprocessor.py
diff --git a/JavaExtractor/JPredict/.classpath b/pipeline/JavaExtractor/JPredict/.classpath
old mode 100755
new mode 100644
similarity index 100%
rename from JavaExtractor/JPredict/.classpath
rename to pipeline/JavaExtractor/JPredict/.classpath
diff --git a/JavaExtractor/JPredict/.gitignore b/pipeline/JavaExtractor/JPredict/.gitignore
old mode 100755
new mode 100644
similarity index 100%
rename from JavaExtractor/JPredict/.gitignore
rename to pipeline/JavaExtractor/JPredict/.gitignore
diff --git a/JavaExtractor/JPredict/.project b/pipeline/JavaExtractor/JPredict/.project
old mode 100755
new mode 100644
similarity index 95%
rename from JavaExtractor/JPredict/.project
rename to pipeline/JavaExtractor/JPredict/.project
index fee6c60..9ca9d79
--- a/JavaExtractor/JPredict/.project
+++ b/pipeline/JavaExtractor/JPredict/.project
@@ -1,23 +1,23 @@
-
-
- JavaExtractor
-
-
-
-
-
- org.eclipse.jdt.core.javabuilder
-
-
-
-
- org.eclipse.m2e.core.maven2Builder
-
-
-
-
-
- org.eclipse.jdt.core.javanature
- org.eclipse.m2e.core.maven2Nature
-
-
+
+
+ JavaExtractor
+
+
+
+
+
+ org.eclipse.jdt.core.javabuilder
+
+
+
+
+ org.eclipse.m2e.core.maven2Builder
+
+
+
+
+
+ org.eclipse.jdt.core.javanature
+ org.eclipse.m2e.core.maven2Nature
+
+
diff --git a/JavaExtractor/JPredict/.settings/org.eclipse.core.resources.prefs b/pipeline/JavaExtractor/JPredict/.settings/org.eclipse.core.resources.prefs
old mode 100755
new mode 100644
similarity index 96%
rename from JavaExtractor/JPredict/.settings/org.eclipse.core.resources.prefs
rename to pipeline/JavaExtractor/JPredict/.settings/org.eclipse.core.resources.prefs
index 654c175..e9441bb
--- a/JavaExtractor/JPredict/.settings/org.eclipse.core.resources.prefs
+++ b/pipeline/JavaExtractor/JPredict/.settings/org.eclipse.core.resources.prefs
@@ -1,3 +1,3 @@
-eclipse.preferences.version=1
-encoding//src/main/java=UTF-8
-encoding/=UTF-8
+eclipse.preferences.version=1
+encoding//src/main/java=UTF-8
+encoding/=UTF-8
diff --git a/JavaExtractor/JPredict/.settings/org.eclipse.jdt.apt.core.prefs b/pipeline/JavaExtractor/JPredict/.settings/org.eclipse.jdt.apt.core.prefs
similarity index 100%
rename from JavaExtractor/JPredict/.settings/org.eclipse.jdt.apt.core.prefs
rename to pipeline/JavaExtractor/JPredict/.settings/org.eclipse.jdt.apt.core.prefs
diff --git a/JavaExtractor/JPredict/.settings/org.eclipse.jdt.core.prefs b/pipeline/JavaExtractor/JPredict/.settings/org.eclipse.jdt.core.prefs
old mode 100755
new mode 100644
similarity index 98%
rename from JavaExtractor/JPredict/.settings/org.eclipse.jdt.core.prefs
rename to pipeline/JavaExtractor/JPredict/.settings/org.eclipse.jdt.core.prefs
index c79b505..ea7a397
--- a/JavaExtractor/JPredict/.settings/org.eclipse.jdt.core.prefs
+++ b/pipeline/JavaExtractor/JPredict/.settings/org.eclipse.jdt.core.prefs
@@ -1,16 +1,16 @@
-eclipse.preferences.version=1
-org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
-org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
-org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
-org.eclipse.jdt.core.compiler.compliance=1.8
-org.eclipse.jdt.core.compiler.debug.lineNumber=generate
-org.eclipse.jdt.core.compiler.debug.localVariable=generate
-org.eclipse.jdt.core.compiler.debug.sourceFile=generate
-org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
-org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled
-org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
-org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
-org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore
-org.eclipse.jdt.core.compiler.processAnnotations=disabled
-org.eclipse.jdt.core.compiler.release=disabled
-org.eclipse.jdt.core.compiler.source=1.8
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
+org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
+org.eclipse.jdt.core.compiler.compliance=1.8
+org.eclipse.jdt.core.compiler.debug.lineNumber=generate
+org.eclipse.jdt.core.compiler.debug.localVariable=generate
+org.eclipse.jdt.core.compiler.debug.sourceFile=generate
+org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
+org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled
+org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
+org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
+org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore
+org.eclipse.jdt.core.compiler.processAnnotations=disabled
+org.eclipse.jdt.core.compiler.release=disabled
+org.eclipse.jdt.core.compiler.source=1.8
diff --git a/JavaExtractor/JPredict/.settings/org.eclipse.m2e.core.prefs b/pipeline/JavaExtractor/JPredict/.settings/org.eclipse.m2e.core.prefs
old mode 100755
new mode 100644
similarity index 95%
rename from JavaExtractor/JPredict/.settings/org.eclipse.m2e.core.prefs
rename to pipeline/JavaExtractor/JPredict/.settings/org.eclipse.m2e.core.prefs
index 14b697b..f897a7f
--- a/JavaExtractor/JPredict/.settings/org.eclipse.m2e.core.prefs
+++ b/pipeline/JavaExtractor/JPredict/.settings/org.eclipse.m2e.core.prefs
@@ -1,4 +1,4 @@
-activeProfiles=
-eclipse.preferences.version=1
-resolveWorkspaceProjects=true
-version=1
+activeProfiles=
+eclipse.preferences.version=1
+resolveWorkspaceProjects=true
+version=1
diff --git a/JavaExtractor/JPredict/pom.xml b/pipeline/JavaExtractor/JPredict/pom.xml
similarity index 96%
rename from JavaExtractor/JPredict/pom.xml
rename to pipeline/JavaExtractor/JPredict/pom.xml
index 6e3b71d..91af533 100644
--- a/JavaExtractor/JPredict/pom.xml
+++ b/pipeline/JavaExtractor/JPredict/pom.xml
@@ -1,75 +1,75 @@
-
-
- 4.0.0
- JavaExtractor
- JavaExtractor
- JPredict
- 0.0.1-SNAPSHOT
- http://maven.apache.org
-
-
-
- maven-compiler-plugin
- 3.2
-
-
- 1.8
-
- Test.java
-
-
-
-
- maven-shade-plugin
- 2.1
-
-
- package
-
- shade
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- com.github.javaparser
- javaparser-core
- 3.0.0-alpha.4
-
-
- commons-io
- commons-io
- 1.3.2
- compile
-
-
- com.fasterxml.jackson.core
- jackson-databind
- 2.9.10.1
-
-
- args4j
- args4j
- 2.33
-
-
- org.apache.commons
- commons-lang3
- 3.5
-
-
-
- UTF-8
-
-
-
+
+
+ 4.0.0
+ JavaExtractor
+ JavaExtractor
+ JPredict
+ 0.0.1-SNAPSHOT
+ http://maven.apache.org
+
+
+
+ maven-compiler-plugin
+ 3.2
+
+
+ 1.8
+
+ Test.java
+
+
+
+
+ maven-shade-plugin
+ 2.1
+
+
+ package
+
+ shade
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ com.github.javaparser
+ javaparser-core
+ 3.0.0-alpha.4
+
+
+ commons-io
+ commons-io
+ 1.3.2
+ compile
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+ 2.9.10.1
+
+
+ args4j
+ args4j
+ 2.33
+
+
+ org.apache.commons
+ commons-lang3
+ 3.5
+
+
+
+ UTF-8
+
+
+
diff --git a/JavaExtractor/JPredict/src/main/java/JavaExtractor/App.java b/pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/App.java
old mode 100755
new mode 100644
similarity index 96%
rename from JavaExtractor/JPredict/src/main/java/JavaExtractor/App.java
rename to pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/App.java
index 778680c..e96b75c
--- a/JavaExtractor/JPredict/src/main/java/JavaExtractor/App.java
+++ b/pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/App.java
@@ -1,60 +1,60 @@
-package JavaExtractor;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Paths;
-import java.util.LinkedList;
-import java.util.concurrent.Executors;
-import java.util.concurrent.ThreadPoolExecutor;
-
-import org.kohsuke.args4j.CmdLineException;
-
-import JavaExtractor.Common.CommandLineValues;
-import JavaExtractor.FeaturesEntities.ProgramRelation;
-
-public class App {
- private static CommandLineValues s_CommandLineValues;
-
- public static void main(String[] args) {
- try {
- s_CommandLineValues = new CommandLineValues(args);
- } catch (CmdLineException e) {
- e.printStackTrace();
- return;
- }
-
- if (s_CommandLineValues.NoHash) {
- ProgramRelation.setNoHash();
- }
-
- if (s_CommandLineValues.File != null) {
- ExtractFeaturesTask extractFeaturesTask = new ExtractFeaturesTask(s_CommandLineValues,
- s_CommandLineValues.File.toPath());
- extractFeaturesTask.processFile();
- } else if (s_CommandLineValues.Dir != null) {
- extractDir();
- }
- }
-
- private static void extractDir() {
- ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(s_CommandLineValues.NumThreads);
- LinkedList tasks = new LinkedList<>();
- try {
- Files.walk(Paths.get(s_CommandLineValues.Dir)).filter(Files::isRegularFile)
- .filter(p -> p.toString().toLowerCase().endsWith(".java")).forEach(f -> {
- ExtractFeaturesTask task = new ExtractFeaturesTask(s_CommandLineValues, f);
- tasks.add(task);
- });
- } catch (IOException e) {
- e.printStackTrace();
- return;
- }
- try {
- executor.invokeAll(tasks);
- } catch (InterruptedException e) {
- e.printStackTrace();
- } finally {
- executor.shutdown();
- }
- }
-}
+package JavaExtractor;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.LinkedList;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ThreadPoolExecutor;
+
+import org.kohsuke.args4j.CmdLineException;
+
+import JavaExtractor.Common.CommandLineValues;
+import JavaExtractor.FeaturesEntities.ProgramRelation;
+
+public class App {
+ private static CommandLineValues s_CommandLineValues;
+
+ public static void main(String[] args) {
+ try {
+ s_CommandLineValues = new CommandLineValues(args);
+ } catch (CmdLineException e) {
+ e.printStackTrace();
+ return;
+ }
+
+ if (s_CommandLineValues.NoHash) {
+ ProgramRelation.setNoHash();
+ }
+
+ if (s_CommandLineValues.File != null) {
+ ExtractFeaturesTask extractFeaturesTask = new ExtractFeaturesTask(s_CommandLineValues,
+ s_CommandLineValues.File.toPath());
+ extractFeaturesTask.processFile();
+ } else if (s_CommandLineValues.Dir != null) {
+ extractDir();
+ }
+ }
+
+ private static void extractDir() {
+ ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(s_CommandLineValues.NumThreads);
+ LinkedList tasks = new LinkedList<>();
+ try {
+ Files.walk(Paths.get(s_CommandLineValues.Dir)).filter(Files::isRegularFile)
+ .filter(p -> p.toString().toLowerCase().endsWith(".java")).forEach(f -> {
+ ExtractFeaturesTask task = new ExtractFeaturesTask(s_CommandLineValues, f);
+ tasks.add(task);
+ });
+ } catch (IOException e) {
+ e.printStackTrace();
+ return;
+ }
+ try {
+ executor.invokeAll(tasks);
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ } finally {
+ executor.shutdown();
+ }
+ }
+}
diff --git a/JavaExtractor/JPredict/src/main/java/JavaExtractor/Common/CommandLineValues.java b/pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/Common/CommandLineValues.java
old mode 100755
new mode 100644
similarity index 96%
rename from JavaExtractor/JPredict/src/main/java/JavaExtractor/Common/CommandLineValues.java
rename to pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/Common/CommandLineValues.java
index 3cd3ff5..c5b3cdb
--- a/JavaExtractor/JPredict/src/main/java/JavaExtractor/Common/CommandLineValues.java
+++ b/pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/Common/CommandLineValues.java
@@ -1,56 +1,56 @@
-package JavaExtractor.Common;
-
-import java.io.File;
-import org.kohsuke.args4j.CmdLineException;
-import org.kohsuke.args4j.CmdLineParser;
-import org.kohsuke.args4j.Option;
-
-/**
- * This class handles the programs arguments.
- */
-public class CommandLineValues {
- @Option(name = "--file", required = false)
- public File File = null;
-
- @Option(name = "--dir", required = false, forbids = "--file")
- public String Dir = null;
-
- @Option(name = "--max_path_length", required = true)
- public int MaxPathLength;
-
- @Option(name = "--max_path_width", required = true)
- public int MaxPathWidth;
-
- @Option(name = "--no_hash", required = false)
- public boolean NoHash = false;
-
- @Option(name = "--num_threads", required = false)
- public int NumThreads = 32;
-
- @Option(name = "--min_code_len", required = false)
- public int MinCodeLength = 1;
-
- @Option(name = "--max_code_len", required = false)
- public int MaxCodeLength = 10000;
-
- @Option(name = "--pretty_print", required = false)
- public boolean PrettyPrint = false;
-
- @Option(name = "--max_child_id", required = false)
- public int MaxChildId = Integer.MAX_VALUE;
-
- public CommandLineValues(String... args) throws CmdLineException {
- CmdLineParser parser = new CmdLineParser(this);
- try {
- parser.parseArgument(args);
- } catch (CmdLineException e) {
- System.err.println(e.getMessage());
- parser.printUsage(System.err);
- throw e;
- }
- }
-
- public CommandLineValues() {
-
- }
+package JavaExtractor.Common;
+
+import java.io.File;
+import org.kohsuke.args4j.CmdLineException;
+import org.kohsuke.args4j.CmdLineParser;
+import org.kohsuke.args4j.Option;
+
+/**
+ * This class handles the programs arguments.
+ */
+public class CommandLineValues {
+ @Option(name = "--file", required = false)
+ public File File = null;
+
+ @Option(name = "--dir", required = false, forbids = "--file")
+ public String Dir = null;
+
+ @Option(name = "--max_path_length", required = true)
+ public int MaxPathLength;
+
+ @Option(name = "--max_path_width", required = true)
+ public int MaxPathWidth;
+
+ @Option(name = "--no_hash", required = false)
+ public boolean NoHash = false;
+
+ @Option(name = "--num_threads", required = false)
+ public int NumThreads = 32;
+
+ @Option(name = "--min_code_len", required = false)
+ public int MinCodeLength = 1;
+
+ @Option(name = "--max_code_len", required = false)
+ public int MaxCodeLength = 10000;
+
+ @Option(name = "--pretty_print", required = false)
+ public boolean PrettyPrint = false;
+
+ @Option(name = "--max_child_id", required = false)
+ public int MaxChildId = Integer.MAX_VALUE;
+
+ public CommandLineValues(String... args) throws CmdLineException {
+ CmdLineParser parser = new CmdLineParser(this);
+ try {
+ parser.parseArgument(args);
+ } catch (CmdLineException e) {
+ System.err.println(e.getMessage());
+ parser.printUsage(System.err);
+ throw e;
+ }
+ }
+
+ public CommandLineValues() {
+
+ }
}
\ No newline at end of file
diff --git a/JavaExtractor/JPredict/src/main/java/JavaExtractor/Common/Common.java b/pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/Common/Common.java
old mode 100755
new mode 100644
similarity index 97%
rename from JavaExtractor/JPredict/src/main/java/JavaExtractor/Common/Common.java
rename to pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/Common/Common.java
index 7b2e3d6..8f5acfd
--- a/JavaExtractor/JPredict/src/main/java/JavaExtractor/Common/Common.java
+++ b/pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/Common/Common.java
@@ -1,77 +1,77 @@
-package JavaExtractor.Common;
-
-import java.util.ArrayList;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-
-import com.github.javaparser.ast.Node;
-import com.github.javaparser.ast.UserDataKey;
-
-import JavaExtractor.FeaturesEntities.ProgramNode;
-import JavaExtractor.FeaturesEntities.Property;
-
-public final class Common {
- public static final UserDataKey PropertyKey = new UserDataKey() {
- };
- public static final UserDataKey ProgramNodeKey = new UserDataKey() {
- };
- public static final UserDataKey ChildId = new UserDataKey() {
- };
- public static final String EmptyString = "";
- public static final String UTF8 = "UTF-8";
- public static final String EvaluateTempDir = "EvalTemp";
-
- public static final String FieldAccessExpr = "FieldAccessExpr";
- public static final String ClassOrInterfaceType = "ClassOrInterfaceType";
- public static final String MethodDeclaration = "MethodDeclaration";
- public static final String NameExpr = "NameExpr";
- public static final String MethodCallExpr = "MethodCallExpr";
- public static final String DummyNode = "DummyNode";
- public static final String BlankWord = "BLANK";
-
- public static final int c_MaxLabelLength = 50;
- public static final String methodName = "METHOD_NAME";
- public static final String internalSeparator = "|";
-
- public static String normalizeName(String original, String defaultString) {
- original = original.toLowerCase().replaceAll("\\\\n", "") // escaped new
- // lines
- .replaceAll("//s+", "") // whitespaces
- .replaceAll("[\"',]", "") // quotes, apostrophies, commas
- .replaceAll("\\P{Print}", ""); // unicode weird characters
- String stripped = original.replaceAll("[^A-Za-z]", "");
- if (stripped.length() == 0) {
- String carefulStripped = original.replaceAll(" ", "_");
- if (carefulStripped.length() == 0) {
- return defaultString;
- } else {
- return carefulStripped;
- }
- } else {
- return stripped;
- }
- }
-
- public static boolean isMethod(Node node) {
- String type = node.getUserData(Common.PropertyKey).getType();
-
- return isMethod(node, type);
- }
-
- public static boolean isMethod(Node node, String type) {
- Property parentProperty = node.getParentNode().getUserData(Common.PropertyKey);
- if (parentProperty == null) {
- return false;
- }
-
- String parentType = parentProperty.getType();
- return Common.NameExpr.equals(type) && Common.MethodDeclaration.equals(parentType);
- }
-
- public static ArrayList splitToSubtokens(String str1) {
- String str2 = str1.trim();
- return Stream.of(str2.split("(?<=[a-z])(?=[A-Z])|_|[0-9]|(?<=[A-Z])(?=[A-Z][a-z])|\\s+"))
- .filter(s -> s.length() > 0).map(s -> Common.normalizeName(s, Common.EmptyString))
- .filter(s -> s.length() > 0).collect(Collectors.toCollection(ArrayList::new));
- }
-}
+package JavaExtractor.Common;
+
+import java.util.ArrayList;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import com.github.javaparser.ast.Node;
+import com.github.javaparser.ast.UserDataKey;
+
+import JavaExtractor.FeaturesEntities.ProgramNode;
+import JavaExtractor.FeaturesEntities.Property;
+
+public final class Common {
+ public static final UserDataKey PropertyKey = new UserDataKey() {
+ };
+ public static final UserDataKey ProgramNodeKey = new UserDataKey() {
+ };
+ public static final UserDataKey ChildId = new UserDataKey() {
+ };
+ public static final String EmptyString = "";
+ public static final String UTF8 = "UTF-8";
+ public static final String EvaluateTempDir = "EvalTemp";
+
+ public static final String FieldAccessExpr = "FieldAccessExpr";
+ public static final String ClassOrInterfaceType = "ClassOrInterfaceType";
+ public static final String MethodDeclaration = "MethodDeclaration";
+ public static final String NameExpr = "NameExpr";
+ public static final String MethodCallExpr = "MethodCallExpr";
+ public static final String DummyNode = "DummyNode";
+ public static final String BlankWord = "BLANK";
+
+ public static final int c_MaxLabelLength = 50;
+ public static final String methodName = "METHOD_NAME";
+ public static final String internalSeparator = "|";
+
+ public static String normalizeName(String original, String defaultString) {
+ original = original.toLowerCase().replaceAll("\\\\n", "") // escaped new
+ // lines
+ .replaceAll("//s+", "") // whitespaces
+ .replaceAll("[\"',]", "") // quotes, apostrophies, commas
+ .replaceAll("\\P{Print}", ""); // unicode weird characters
+ String stripped = original.replaceAll("[^A-Za-z]", "");
+ if (stripped.length() == 0) {
+ String carefulStripped = original.replaceAll(" ", "_");
+ if (carefulStripped.length() == 0) {
+ return defaultString;
+ } else {
+ return carefulStripped;
+ }
+ } else {
+ return stripped;
+ }
+ }
+
+ public static boolean isMethod(Node node) {
+ String type = node.getUserData(Common.PropertyKey).getType();
+
+ return isMethod(node, type);
+ }
+
+ public static boolean isMethod(Node node, String type) {
+ Property parentProperty = node.getParentNode().getUserData(Common.PropertyKey);
+ if (parentProperty == null) {
+ return false;
+ }
+
+ String parentType = parentProperty.getType();
+ return Common.NameExpr.equals(type) && Common.MethodDeclaration.equals(parentType);
+ }
+
+ public static ArrayList splitToSubtokens(String str1) {
+ String str2 = str1.trim();
+ return Stream.of(str2.split("(?<=[a-z])(?=[A-Z])|_|[0-9]|(?<=[A-Z])(?=[A-Z][a-z])|\\s+"))
+ .filter(s -> s.length() > 0).map(s -> Common.normalizeName(s, Common.EmptyString))
+ .filter(s -> s.length() > 0).collect(Collectors.toCollection(ArrayList::new));
+ }
+}
diff --git a/JavaExtractor/JPredict/src/main/java/JavaExtractor/Common/MethodContent.java b/pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/Common/MethodContent.java
old mode 100755
new mode 100644
similarity index 94%
rename from JavaExtractor/JPredict/src/main/java/JavaExtractor/Common/MethodContent.java
rename to pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/Common/MethodContent.java
index 4df3cfd..81b5f89
--- a/JavaExtractor/JPredict/src/main/java/JavaExtractor/Common/MethodContent.java
+++ b/pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/Common/MethodContent.java
@@ -1,29 +1,29 @@
-package JavaExtractor.Common;
-
-import java.util.ArrayList;
-import com.github.javaparser.ast.Node;
-
-public class MethodContent {
- private ArrayList leaves;
- private String name;
- private long length;
-
- public MethodContent(ArrayList leaves, String name, long length) {
- this.leaves = leaves;
- this.name = name;
- this.length = length;
- }
-
- public ArrayList getLeaves() {
- return leaves;
- }
-
- public String getName() {
- return name;
- }
-
- public long getLength() {
- return length;
- }
-
-}
+package JavaExtractor.Common;
+
+import java.util.ArrayList;
+import com.github.javaparser.ast.Node;
+
+public class MethodContent {
+ private ArrayList leaves;
+ private String name;
+ private long length;
+
+ public MethodContent(ArrayList leaves, String name, long length) {
+ this.leaves = leaves;
+ this.name = name;
+ this.length = length;
+ }
+
+ public ArrayList getLeaves() {
+ return leaves;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public long getLength() {
+ return length;
+ }
+
+}
diff --git a/JavaExtractor/JPredict/src/main/java/JavaExtractor/ExtractFeaturesTask.java b/pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/ExtractFeaturesTask.java
old mode 100755
new mode 100644
similarity index 96%
rename from JavaExtractor/JPredict/src/main/java/JavaExtractor/ExtractFeaturesTask.java
rename to pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/ExtractFeaturesTask.java
index 3eaf2a7..41512c8
--- a/JavaExtractor/JPredict/src/main/java/JavaExtractor/ExtractFeaturesTask.java
+++ b/pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/ExtractFeaturesTask.java
@@ -1,92 +1,92 @@
-package JavaExtractor;
-
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.concurrent.Callable;
-
-import org.apache.commons.lang3.StringUtils;
-
-import com.github.javaparser.ParseException;
-import com.github.javaparser.ast.CompilationUnit;
-
-import JavaExtractor.Common.CommandLineValues;
-import JavaExtractor.Common.Common;
-import JavaExtractor.FeaturesEntities.ProgramFeatures;
-
-public class ExtractFeaturesTask implements Callable {
- CommandLineValues m_CommandLineValues;
- Path filePath;
-
- public ExtractFeaturesTask(CommandLineValues commandLineValues, Path path) {
- m_CommandLineValues = commandLineValues;
- this.filePath = path;
- }
-
- @Override
- public Void call() throws Exception {
- //System.err.println("Extracting file: " + filePath);
- processFile();
- //System.err.println("Done with file: " + filePath);
- return null;
- }
-
- public void processFile() {
- ArrayList features;
- try {
- features = extractSingleFile();
- } catch (ParseException | IOException e) {
- e.printStackTrace();
- return;
- }
- if (features == null) {
- return;
- }
-
- String toPrint = featuresToString(features);
- if (toPrint.length() > 0) {
- System.out.println(toPrint);
- }
- }
-
- public ArrayList extractSingleFile() throws ParseException, IOException {
- String code = null;
- try {
- code = new String(Files.readAllBytes(this.filePath));
- } catch (IOException e) {
- e.printStackTrace();
- code = Common.EmptyString;
- }
- FeatureExtractor featureExtractor = new FeatureExtractor(m_CommandLineValues);
-
- ArrayList features = featureExtractor.extractFeatures(code);
-
- return features;
- }
-
- public String featuresToString(ArrayList features) {
- if (features == null || features.isEmpty()) {
- return Common.EmptyString;
- }
-
- List methodsOutputs = new ArrayList<>();
-
- for (ProgramFeatures singleMethodfeatures : features) {
- StringBuilder builder = new StringBuilder();
-
- String toPrint = Common.EmptyString;
- toPrint = singleMethodfeatures.toString();
- if (m_CommandLineValues.PrettyPrint) {
- toPrint = toPrint.replace(" ", "\n\t");
- }
- builder.append(toPrint);
-
-
- methodsOutputs.add(builder.toString());
-
- }
- return StringUtils.join(methodsOutputs, "\n");
- }
-}
+package JavaExtractor;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.Callable;
+
+import org.apache.commons.lang3.StringUtils;
+
+import com.github.javaparser.ParseException;
+import com.github.javaparser.ast.CompilationUnit;
+
+import JavaExtractor.Common.CommandLineValues;
+import JavaExtractor.Common.Common;
+import JavaExtractor.FeaturesEntities.ProgramFeatures;
+
+public class ExtractFeaturesTask implements Callable {
+ CommandLineValues m_CommandLineValues;
+ Path filePath;
+
+ public ExtractFeaturesTask(CommandLineValues commandLineValues, Path path) {
+ m_CommandLineValues = commandLineValues;
+ this.filePath = path;
+ }
+
+ @Override
+ public Void call() throws Exception {
+ //System.err.println("Extracting file: " + filePath);
+ processFile();
+ //System.err.println("Done with file: " + filePath);
+ return null;
+ }
+
+ public void processFile() {
+ ArrayList features;
+ try {
+ features = extractSingleFile();
+ } catch (ParseException | IOException e) {
+ e.printStackTrace();
+ return;
+ }
+ if (features == null) {
+ return;
+ }
+
+ String toPrint = featuresToString(features);
+ if (toPrint.length() > 0) {
+ System.out.println(toPrint);
+ }
+ }
+
+ public ArrayList extractSingleFile() throws ParseException, IOException {
+ String code = null;
+ try {
+ code = new String(Files.readAllBytes(this.filePath));
+ } catch (IOException e) {
+ e.printStackTrace();
+ code = Common.EmptyString;
+ }
+ FeatureExtractor featureExtractor = new FeatureExtractor(m_CommandLineValues);
+
+ ArrayList features = featureExtractor.extractFeatures(code);
+
+ return features;
+ }
+
+ public String featuresToString(ArrayList features) {
+ if (features == null || features.isEmpty()) {
+ return Common.EmptyString;
+ }
+
+ List methodsOutputs = new ArrayList<>();
+
+ for (ProgramFeatures singleMethodfeatures : features) {
+ StringBuilder builder = new StringBuilder();
+
+ String toPrint = Common.EmptyString;
+ toPrint = singleMethodfeatures.toString();
+ if (m_CommandLineValues.PrettyPrint) {
+ toPrint = toPrint.replace(" ", "\n\t");
+ }
+ builder.append(toPrint);
+
+
+ methodsOutputs.add(builder.toString());
+
+ }
+ return StringUtils.join(methodsOutputs, "\n");
+ }
+}
diff --git a/JavaExtractor/JPredict/src/main/java/JavaExtractor/FeatureExtractor.java b/pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/FeatureExtractor.java
old mode 100755
new mode 100644
similarity index 97%
rename from JavaExtractor/JPredict/src/main/java/JavaExtractor/FeatureExtractor.java
rename to pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/FeatureExtractor.java
index 1a0e1d5..626bda2
--- a/JavaExtractor/JPredict/src/main/java/JavaExtractor/FeatureExtractor.java
+++ b/pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/FeatureExtractor.java
@@ -1,196 +1,196 @@
-package JavaExtractor;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.Set;
-import java.util.StringJoiner;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-
-import com.github.javaparser.JavaParser;
-import com.github.javaparser.ParseException;
-import com.github.javaparser.ParseProblemException;
-import com.github.javaparser.ast.CompilationUnit;
-import com.github.javaparser.ast.Node;
-import JavaExtractor.Common.CommandLineValues;
-import JavaExtractor.Common.Common;
-import JavaExtractor.Common.MethodContent;
-import JavaExtractor.FeaturesEntities.ProgramFeatures;
-import JavaExtractor.FeaturesEntities.Property;
-import JavaExtractor.Visitors.FunctionVisitor;
-
-@SuppressWarnings("StringEquality")
-public class FeatureExtractor {
- private CommandLineValues m_CommandLineValues;
- private static Set s_ParentTypeToAddChildId = Stream
- .of("AssignExpr", "ArrayAccessExpr", "FieldAccessExpr", "MethodCallExpr")
- .collect(Collectors.toCollection(HashSet::new));
-
- final static String lparen = "(";
- final static String rparen = ")";
- final static String upSymbol = "^";
- final static String downSymbol = "_";
-
- public FeatureExtractor(CommandLineValues commandLineValues) {
- this.m_CommandLineValues = commandLineValues;
- }
-
- public ArrayList extractFeatures(String code) throws ParseException, IOException {
- CompilationUnit compilationUnit = parseFileWithRetries(code);
- FunctionVisitor functionVisitor = new FunctionVisitor();
-
- functionVisitor.visit(compilationUnit, null);
-
- ArrayList methods = functionVisitor.getMethodContents();
- ArrayList programs = generatePathFeatures(methods);
-
- return programs;
- }
-
- private CompilationUnit parseFileWithRetries(String code) throws IOException {
- final String classPrefix = "public class Test {";
- final String classSuffix = "}";
- final String methodPrefix = "SomeUnknownReturnType f() {";
- final String methodSuffix = "return noSuchReturnValue; }";
-
- String originalContent = code;
- String content = originalContent;
- CompilationUnit parsed = null;
- try {
- parsed = JavaParser.parse(content);
- } catch (ParseProblemException e1) {
- // Wrap with a class and method
- try {
- content = classPrefix + methodPrefix + originalContent + methodSuffix + classSuffix;
- parsed = JavaParser.parse(content);
- } catch (ParseProblemException e2) {
- // Wrap with a class only
- content = classPrefix + originalContent + classSuffix;
- parsed = JavaParser.parse(content);
- }
- }
-
- return parsed;
- }
-
- public ArrayList generatePathFeatures(ArrayList methods) {
- ArrayList methodsFeatures = new ArrayList<>();
- for (MethodContent content : methods) {
- if (content.getLength() < m_CommandLineValues.MinCodeLength
- || content.getLength() > m_CommandLineValues.MaxCodeLength)
- continue;
- ProgramFeatures singleMethodFeatures = generatePathFeaturesForFunction(content);
- if (!singleMethodFeatures.isEmpty()) {
- methodsFeatures.add(singleMethodFeatures);
- }
- }
- return methodsFeatures;
- }
-
- private ProgramFeatures generatePathFeaturesForFunction(MethodContent methodContent) {
- ArrayList functionLeaves = methodContent.getLeaves();
- ProgramFeatures programFeatures = new ProgramFeatures(methodContent.getName());
-
- for (int i = 0; i < functionLeaves.size(); i++) {
- for (int j = i + 1; j < functionLeaves.size(); j++) {
- String separator = Common.EmptyString;
-
- String path = generatePath(functionLeaves.get(i), functionLeaves.get(j), separator);
- if (path != Common.EmptyString) {
- Property source = functionLeaves.get(i).getUserData(Common.PropertyKey);
- Property target = functionLeaves.get(j).getUserData(Common.PropertyKey);
- programFeatures.addFeature(source, path, target);
- }
- }
- }
- return programFeatures;
- }
-
- private static ArrayList getTreeStack(Node node) {
- ArrayList upStack = new ArrayList<>();
- Node current = node;
- while (current != null) {
- upStack.add(current);
- current = current.getParentNode();
- }
- return upStack;
- }
-
- private String generatePath(Node source, Node target, String separator) {
- String down = downSymbol;
- String up = upSymbol;
- String startSymbol = lparen;
- String endSymbol = rparen;
-
- StringJoiner stringBuilder = new StringJoiner(separator);
- ArrayList sourceStack = getTreeStack(source);
- ArrayList targetStack = getTreeStack(target);
-
- int commonPrefix = 0;
- int currentSourceAncestorIndex = sourceStack.size() - 1;
- int currentTargetAncestorIndex = targetStack.size() - 1;
- while (currentSourceAncestorIndex >= 0 && currentTargetAncestorIndex >= 0
- && sourceStack.get(currentSourceAncestorIndex) == targetStack.get(currentTargetAncestorIndex)) {
- commonPrefix++;
- currentSourceAncestorIndex--;
- currentTargetAncestorIndex--;
- }
-
- int pathLength = sourceStack.size() + targetStack.size() - 2 * commonPrefix;
- if (pathLength > m_CommandLineValues.MaxPathLength) {
- return Common.EmptyString;
- }
-
- if (currentSourceAncestorIndex >= 0 && currentTargetAncestorIndex >= 0) {
- int pathWidth = targetStack.get(currentTargetAncestorIndex).getUserData(Common.ChildId)
- - sourceStack.get(currentSourceAncestorIndex).getUserData(Common.ChildId);
- if (pathWidth > m_CommandLineValues.MaxPathWidth) {
- return Common.EmptyString;
- }
- }
-
- for (int i = 0; i < sourceStack.size() - commonPrefix; i++) {
- Node currentNode = sourceStack.get(i);
- String childId = Common.EmptyString;
- String parentRawType = currentNode.getParentNode().getUserData(Common.PropertyKey).getRawType();
- if (i == 0 || s_ParentTypeToAddChildId.contains(parentRawType)) {
- childId = saturateChildId(currentNode.getUserData(Common.ChildId))
- .toString();
- }
- stringBuilder.add(String.format("%s%s%s%s%s", startSymbol,
- currentNode.getUserData(Common.PropertyKey).getType(), childId, endSymbol, up));
- }
-
- Node commonNode = sourceStack.get(sourceStack.size() - commonPrefix);
- String commonNodeChildId = Common.EmptyString;
- Property parentNodeProperty = commonNode.getParentNode().getUserData(Common.PropertyKey);
- String commonNodeParentRawType = Common.EmptyString;
- if (parentNodeProperty != null) {
- commonNodeParentRawType = parentNodeProperty.getRawType();
- }
- if (s_ParentTypeToAddChildId.contains(commonNodeParentRawType)) {
- commonNodeChildId = saturateChildId(commonNode.getUserData(Common.ChildId))
- .toString();
- }
- stringBuilder.add(String.format("%s%s%s%s", startSymbol,
- commonNode.getUserData(Common.PropertyKey).getType(), commonNodeChildId, endSymbol));
-
- for (int i = targetStack.size() - commonPrefix - 1; i >= 0; i--) {
- Node currentNode = targetStack.get(i);
- String childId = Common.EmptyString;
- if (i == 0 || s_ParentTypeToAddChildId.contains(currentNode.getUserData(Common.PropertyKey).getRawType())) {
- childId = saturateChildId(currentNode.getUserData(Common.ChildId))
- .toString();
- }
- stringBuilder.add(String.format("%s%s%s%s%s", down, startSymbol,
- currentNode.getUserData(Common.PropertyKey).getType(), childId, endSymbol));
- }
-
- return stringBuilder.toString();
- }
-
- private Integer saturateChildId(int childId) {
- return Math.min(childId, m_CommandLineValues.MaxChildId);
- }
-}
+package JavaExtractor;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.StringJoiner;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import com.github.javaparser.JavaParser;
+import com.github.javaparser.ParseException;
+import com.github.javaparser.ParseProblemException;
+import com.github.javaparser.ast.CompilationUnit;
+import com.github.javaparser.ast.Node;
+import JavaExtractor.Common.CommandLineValues;
+import JavaExtractor.Common.Common;
+import JavaExtractor.Common.MethodContent;
+import JavaExtractor.FeaturesEntities.ProgramFeatures;
+import JavaExtractor.FeaturesEntities.Property;
+import JavaExtractor.Visitors.FunctionVisitor;
+
+@SuppressWarnings("StringEquality")
+public class FeatureExtractor {
+ private CommandLineValues m_CommandLineValues;
+ private static Set s_ParentTypeToAddChildId = Stream
+ .of("AssignExpr", "ArrayAccessExpr", "FieldAccessExpr", "MethodCallExpr")
+ .collect(Collectors.toCollection(HashSet::new));
+
+ final static String lparen = "(";
+ final static String rparen = ")";
+ final static String upSymbol = "^";
+ final static String downSymbol = "_";
+
+ public FeatureExtractor(CommandLineValues commandLineValues) {
+ this.m_CommandLineValues = commandLineValues;
+ }
+
+ public ArrayList extractFeatures(String code) throws ParseException, IOException {
+ CompilationUnit compilationUnit = parseFileWithRetries(code);
+ FunctionVisitor functionVisitor = new FunctionVisitor();
+
+ functionVisitor.visit(compilationUnit, null);
+
+ ArrayList methods = functionVisitor.getMethodContents();
+ ArrayList programs = generatePathFeatures(methods);
+
+ return programs;
+ }
+
+ private CompilationUnit parseFileWithRetries(String code) throws IOException {
+ final String classPrefix = "public class Test {";
+ final String classSuffix = "}";
+ final String methodPrefix = "SomeUnknownReturnType f() {";
+ final String methodSuffix = "return noSuchReturnValue; }";
+
+ String originalContent = code;
+ String content = originalContent;
+ CompilationUnit parsed = null;
+ try {
+ parsed = JavaParser.parse(content);
+ } catch (ParseProblemException e1) {
+ // Wrap with a class and method
+ try {
+ content = classPrefix + methodPrefix + originalContent + methodSuffix + classSuffix;
+ parsed = JavaParser.parse(content);
+ } catch (ParseProblemException e2) {
+ // Wrap with a class only
+ content = classPrefix + originalContent + classSuffix;
+ parsed = JavaParser.parse(content);
+ }
+ }
+
+ return parsed;
+ }
+
+ public ArrayList generatePathFeatures(ArrayList methods) {
+ ArrayList methodsFeatures = new ArrayList<>();
+ for (MethodContent content : methods) {
+ if (content.getLength() < m_CommandLineValues.MinCodeLength
+ || content.getLength() > m_CommandLineValues.MaxCodeLength)
+ continue;
+ ProgramFeatures singleMethodFeatures = generatePathFeaturesForFunction(content);
+ if (!singleMethodFeatures.isEmpty()) {
+ methodsFeatures.add(singleMethodFeatures);
+ }
+ }
+ return methodsFeatures;
+ }
+
+ private ProgramFeatures generatePathFeaturesForFunction(MethodContent methodContent) {
+ ArrayList functionLeaves = methodContent.getLeaves();
+ ProgramFeatures programFeatures = new ProgramFeatures(methodContent.getName());
+
+ for (int i = 0; i < functionLeaves.size(); i++) {
+ for (int j = i + 1; j < functionLeaves.size(); j++) {
+ String separator = Common.EmptyString;
+
+ String path = generatePath(functionLeaves.get(i), functionLeaves.get(j), separator);
+ if (path != Common.EmptyString) {
+ Property source = functionLeaves.get(i).getUserData(Common.PropertyKey);
+ Property target = functionLeaves.get(j).getUserData(Common.PropertyKey);
+ programFeatures.addFeature(source, path, target);
+ }
+ }
+ }
+ return programFeatures;
+ }
+
+ private static ArrayList getTreeStack(Node node) {
+ ArrayList upStack = new ArrayList<>();
+ Node current = node;
+ while (current != null) {
+ upStack.add(current);
+ current = current.getParentNode();
+ }
+ return upStack;
+ }
+
+ private String generatePath(Node source, Node target, String separator) {
+ String down = downSymbol;
+ String up = upSymbol;
+ String startSymbol = lparen;
+ String endSymbol = rparen;
+
+ StringJoiner stringBuilder = new StringJoiner(separator);
+ ArrayList sourceStack = getTreeStack(source);
+ ArrayList targetStack = getTreeStack(target);
+
+ int commonPrefix = 0;
+ int currentSourceAncestorIndex = sourceStack.size() - 1;
+ int currentTargetAncestorIndex = targetStack.size() - 1;
+ while (currentSourceAncestorIndex >= 0 && currentTargetAncestorIndex >= 0
+ && sourceStack.get(currentSourceAncestorIndex) == targetStack.get(currentTargetAncestorIndex)) {
+ commonPrefix++;
+ currentSourceAncestorIndex--;
+ currentTargetAncestorIndex--;
+ }
+
+ int pathLength = sourceStack.size() + targetStack.size() - 2 * commonPrefix;
+ if (pathLength > m_CommandLineValues.MaxPathLength) {
+ return Common.EmptyString;
+ }
+
+ if (currentSourceAncestorIndex >= 0 && currentTargetAncestorIndex >= 0) {
+ int pathWidth = targetStack.get(currentTargetAncestorIndex).getUserData(Common.ChildId)
+ - sourceStack.get(currentSourceAncestorIndex).getUserData(Common.ChildId);
+ if (pathWidth > m_CommandLineValues.MaxPathWidth) {
+ return Common.EmptyString;
+ }
+ }
+
+ for (int i = 0; i < sourceStack.size() - commonPrefix; i++) {
+ Node currentNode = sourceStack.get(i);
+ String childId = Common.EmptyString;
+ String parentRawType = currentNode.getParentNode().getUserData(Common.PropertyKey).getRawType();
+ if (i == 0 || s_ParentTypeToAddChildId.contains(parentRawType)) {
+ childId = saturateChildId(currentNode.getUserData(Common.ChildId))
+ .toString();
+ }
+ stringBuilder.add(String.format("%s%s%s%s%s", startSymbol,
+ currentNode.getUserData(Common.PropertyKey).getType(), childId, endSymbol, up));
+ }
+
+ Node commonNode = sourceStack.get(sourceStack.size() - commonPrefix);
+ String commonNodeChildId = Common.EmptyString;
+ Property parentNodeProperty = commonNode.getParentNode().getUserData(Common.PropertyKey);
+ String commonNodeParentRawType = Common.EmptyString;
+ if (parentNodeProperty != null) {
+ commonNodeParentRawType = parentNodeProperty.getRawType();
+ }
+ if (s_ParentTypeToAddChildId.contains(commonNodeParentRawType)) {
+ commonNodeChildId = saturateChildId(commonNode.getUserData(Common.ChildId))
+ .toString();
+ }
+ stringBuilder.add(String.format("%s%s%s%s", startSymbol,
+ commonNode.getUserData(Common.PropertyKey).getType(), commonNodeChildId, endSymbol));
+
+ for (int i = targetStack.size() - commonPrefix - 1; i >= 0; i--) {
+ Node currentNode = targetStack.get(i);
+ String childId = Common.EmptyString;
+ if (i == 0 || s_ParentTypeToAddChildId.contains(currentNode.getUserData(Common.PropertyKey).getRawType())) {
+ childId = saturateChildId(currentNode.getUserData(Common.ChildId))
+ .toString();
+ }
+ stringBuilder.add(String.format("%s%s%s%s%s", down, startSymbol,
+ currentNode.getUserData(Common.PropertyKey).getType(), childId, endSymbol));
+ }
+
+ return stringBuilder.toString();
+ }
+
+ private Integer saturateChildId(int childId) {
+ return Math.min(childId, m_CommandLineValues.MaxChildId);
+ }
+}
diff --git a/JavaExtractor/JPredict/src/main/java/JavaExtractor/FeaturesEntities/ProgramFeatures.java b/pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/FeaturesEntities/ProgramFeatures.java
old mode 100755
new mode 100644
similarity index 95%
rename from JavaExtractor/JPredict/src/main/java/JavaExtractor/FeaturesEntities/ProgramFeatures.java
rename to pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/FeaturesEntities/ProgramFeatures.java
index 755c2ce..92c708f
--- a/JavaExtractor/JPredict/src/main/java/JavaExtractor/FeaturesEntities/ProgramFeatures.java
+++ b/pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/FeaturesEntities/ProgramFeatures.java
@@ -1,49 +1,49 @@
-package JavaExtractor.FeaturesEntities;
-
-import java.util.ArrayList;
-import java.util.stream.Collectors;
-
-import com.fasterxml.jackson.annotation.JsonIgnore;
-
-public class ProgramFeatures {
- private String name;
-
- private ArrayList features = new ArrayList<>();
-
- public ProgramFeatures(String name) {
- this.name = name;
- }
-
- @SuppressWarnings("StringBufferReplaceableByString")
- @Override
- public String toString() {
- StringBuilder stringBuilder = new StringBuilder();
- stringBuilder.append(name).append(" ");
- stringBuilder.append(features.stream().map(ProgramRelation::toString).collect(Collectors.joining(" ")));
-
- return stringBuilder.toString();
- }
-
- public void addFeature(Property source, String path, Property target) {
- ProgramRelation newRelation = new ProgramRelation(source, target, path);
- features.add(newRelation);
- }
-
- @JsonIgnore
- public boolean isEmpty() {
- return features.isEmpty();
- }
-
- public void deleteAllPaths() {
- features.clear();
- }
-
- public String getName() {
- return name;
- }
-
- public ArrayList getFeatures() {
- return features;
- }
-
-}
+package JavaExtractor.FeaturesEntities;
+
+import java.util.ArrayList;
+import java.util.stream.Collectors;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+
+public class ProgramFeatures {
+ private String name;
+
+ private ArrayList features = new ArrayList<>();
+
+ public ProgramFeatures(String name) {
+ this.name = name;
+ }
+
+ @SuppressWarnings("StringBufferReplaceableByString")
+ @Override
+ public String toString() {
+ StringBuilder stringBuilder = new StringBuilder();
+ stringBuilder.append(name).append(" ");
+ stringBuilder.append(features.stream().map(ProgramRelation::toString).collect(Collectors.joining(" ")));
+
+ return stringBuilder.toString();
+ }
+
+ public void addFeature(Property source, String path, Property target) {
+ ProgramRelation newRelation = new ProgramRelation(source, target, path);
+ features.add(newRelation);
+ }
+
+ @JsonIgnore
+ public boolean isEmpty() {
+ return features.isEmpty();
+ }
+
+ public void deleteAllPaths() {
+ features.clear();
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public ArrayList getFeatures() {
+ return features;
+ }
+
+}
diff --git a/JavaExtractor/JPredict/src/main/java/JavaExtractor/FeaturesEntities/ProgramNode.java b/pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/FeaturesEntities/ProgramNode.java
old mode 100755
new mode 100644
similarity index 95%
rename from JavaExtractor/JPredict/src/main/java/JavaExtractor/FeaturesEntities/ProgramNode.java
rename to pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/FeaturesEntities/ProgramNode.java
index 998bd90..7cf13c0
--- a/JavaExtractor/JPredict/src/main/java/JavaExtractor/FeaturesEntities/ProgramNode.java
+++ b/pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/FeaturesEntities/ProgramNode.java
@@ -1,21 +1,21 @@
-package JavaExtractor.FeaturesEntities;
-
-import java.io.UnsupportedEncodingException;
-import java.net.URLEncoder;
-import JavaExtractor.Common.Common;
-
-public class ProgramNode {
- public int Id;
- public String Type;
- public String Name;
- public boolean IsMethodDeclarationName;
-
- public ProgramNode(String name) {
- Name = name;
- try {
- Name = URLEncoder.encode(name, Common.UTF8);
- } catch (UnsupportedEncodingException e) {
- e.printStackTrace();
- }
- }
-}
+package JavaExtractor.FeaturesEntities;
+
+import java.io.UnsupportedEncodingException;
+import java.net.URLEncoder;
+import JavaExtractor.Common.Common;
+
+public class ProgramNode {
+ public int Id;
+ public String Type;
+ public String Name;
+ public boolean IsMethodDeclarationName;
+
+ public ProgramNode(String name) {
+ Name = name;
+ try {
+ Name = URLEncoder.encode(name, Common.UTF8);
+ } catch (UnsupportedEncodingException e) {
+ e.printStackTrace();
+ }
+ }
+}
diff --git a/JavaExtractor/JPredict/src/main/java/JavaExtractor/FeaturesEntities/ProgramRelation.java b/pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/FeaturesEntities/ProgramRelation.java
old mode 100755
new mode 100644
similarity index 95%
rename from JavaExtractor/JPredict/src/main/java/JavaExtractor/FeaturesEntities/ProgramRelation.java
rename to pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/FeaturesEntities/ProgramRelation.java
index b168d62..47d543f
--- a/JavaExtractor/JPredict/src/main/java/JavaExtractor/FeaturesEntities/ProgramRelation.java
+++ b/pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/FeaturesEntities/ProgramRelation.java
@@ -1,56 +1,56 @@
-package JavaExtractor.FeaturesEntities;
-
-import java.util.ArrayList;
-import java.util.function.Function;
-
-import com.fasterxml.jackson.annotation.JsonIgnore;
-import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
-import com.fasterxml.jackson.annotation.JsonPropertyDescription;
-
-public class ProgramRelation {
- private Property m_Source;
- private Property m_Target;
- private String m_HashedPath;
- private String m_Path;
- @SuppressWarnings("FieldCanBeLocal")
- @JsonPropertyDescription
- private ArrayList result;
- public static Function s_Hasher = (s) -> Integer.toString(s.hashCode());
-
- public ProgramRelation(Property sourceName, Property targetName, String path) {
- m_Source = sourceName;
- m_Target = targetName;
- m_Path = path;
- m_HashedPath = s_Hasher.apply(path);
- }
-
- public static void setNoHash() {
- s_Hasher = (s) -> s;
- }
-
- public String toString() {
- return String.format("%s,%s,%s", m_Source.getName(), m_HashedPath,
- m_Target.getName());
- }
-
- @JsonIgnore
- public String getPath() {
- return m_Path;
- }
-
- @JsonIgnore
- public Property getSource() {
- return m_Source;
- }
-
- @JsonIgnoreProperties
- public Property getTarget() {
- return m_Target;
- }
-
- @JsonIgnore
- public String getHashedPath() {
- return m_HashedPath;
-
- }
-}
+package JavaExtractor.FeaturesEntities;
+
+import java.util.ArrayList;
+import java.util.function.Function;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fasterxml.jackson.annotation.JsonPropertyDescription;
+
+public class ProgramRelation {
+ private Property m_Source;
+ private Property m_Target;
+ private String m_HashedPath;
+ private String m_Path;
+ @SuppressWarnings("FieldCanBeLocal")
+ @JsonPropertyDescription
+ private ArrayList result;
+ public static Function s_Hasher = (s) -> Integer.toString(s.hashCode());
+
+ public ProgramRelation(Property sourceName, Property targetName, String path) {
+ m_Source = sourceName;
+ m_Target = targetName;
+ m_Path = path;
+ m_HashedPath = s_Hasher.apply(path);
+ }
+
+ public static void setNoHash() {
+ s_Hasher = (s) -> s;
+ }
+
+ public String toString() {
+ return String.format("%s,%s,%s", m_Source.getName(), m_HashedPath,
+ m_Target.getName());
+ }
+
+ @JsonIgnore
+ public String getPath() {
+ return m_Path;
+ }
+
+ @JsonIgnore
+ public Property getSource() {
+ return m_Source;
+ }
+
+ @JsonIgnoreProperties
+ public Property getTarget() {
+ return m_Target;
+ }
+
+ @JsonIgnore
+ public String getHashedPath() {
+ return m_HashedPath;
+
+ }
+}
diff --git a/JavaExtractor/JPredict/src/main/java/JavaExtractor/FeaturesEntities/Property.java b/pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/FeaturesEntities/Property.java
old mode 100755
new mode 100644
similarity index 96%
rename from JavaExtractor/JPredict/src/main/java/JavaExtractor/FeaturesEntities/Property.java
rename to pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/FeaturesEntities/Property.java
index 697acc7..3718341
--- a/JavaExtractor/JPredict/src/main/java/JavaExtractor/FeaturesEntities/Property.java
+++ b/pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/FeaturesEntities/Property.java
@@ -1,90 +1,90 @@
-package JavaExtractor.FeaturesEntities;
-
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-
-import com.github.javaparser.ast.Node;
-import com.github.javaparser.ast.expr.AssignExpr;
-import com.github.javaparser.ast.expr.BinaryExpr;
-import com.github.javaparser.ast.expr.IntegerLiteralExpr;
-import com.github.javaparser.ast.expr.UnaryExpr;
-import com.github.javaparser.ast.type.ClassOrInterfaceType;
-
-import JavaExtractor.Common.Common;
-
-public class Property {
- private String RawType;
- private String Type;
- private String Name;
- private String SplitName;
- private String Operator;
- public static final HashSet NumericalKeepValues = Stream.of("0", "1", "32", "64")
- .collect(Collectors.toCollection(HashSet::new));
-
- public Property(Node node, boolean isLeaf, boolean isGenericParent, int id) {
- Class> nodeClass = node.getClass();
- RawType = Type = nodeClass.getSimpleName();
- if (node instanceof ClassOrInterfaceType && ((ClassOrInterfaceType) node).isBoxedType()) {
- Type = "PrimitiveType";
- }
- Operator = "";
- if (node instanceof BinaryExpr) {
- Operator = ((BinaryExpr) node).getOperator().toString();
- } else if (node instanceof UnaryExpr) {
- Operator = ((UnaryExpr) node).getOperator().toString();
- } else if (node instanceof AssignExpr) {
- Operator = ((AssignExpr) node).getOperator().toString();
- }
- if (Operator.length() > 0) {
- Type += ":" + Operator;
- }
-
- String nameToSplit = node.toString();
- if (isGenericParent) {
- nameToSplit = ((ClassOrInterfaceType) node).getName();
- if (isLeaf) {
- // if it is a generic parent which counts as a leaf, then when
- // it is participating in a path
- // as a parent, it should be GenericClass and not a simple
- // ClassOrInterfaceType.
- Type = "GenericClass";
- }
- }
- ArrayList splitNameParts = Common.splitToSubtokens(nameToSplit);
- SplitName = splitNameParts.stream().collect(Collectors.joining(Common.internalSeparator));
-
- node.toString();
- Name = Common.normalizeName(node.toString(), Common.BlankWord);
- if (Name.length() > Common.c_MaxLabelLength) {
- Name = Name.substring(0, Common.c_MaxLabelLength);
- } else if (node instanceof ClassOrInterfaceType && ((ClassOrInterfaceType) node).isBoxedType()) {
- Name = ((ClassOrInterfaceType) node).toUnboxedType().toString();
- }
-
- if (Common.isMethod(node, Type)) {
- Name = SplitName = Common.methodName;
- }
-
- if (SplitName.length() == 0) {
- SplitName = Name;
- if (node instanceof IntegerLiteralExpr && !NumericalKeepValues.contains(SplitName)) {
- // This is a numeric literal, but not in our white list
- SplitName = "";
- }
- }
- }
-
- public String getRawType() {
- return RawType;
- }
-
- public String getType() {
- return Type;
- }
-
- public String getName() {
- return Name;
- }
-}
+package JavaExtractor.FeaturesEntities;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import com.github.javaparser.ast.Node;
+import com.github.javaparser.ast.expr.AssignExpr;
+import com.github.javaparser.ast.expr.BinaryExpr;
+import com.github.javaparser.ast.expr.IntegerLiteralExpr;
+import com.github.javaparser.ast.expr.UnaryExpr;
+import com.github.javaparser.ast.type.ClassOrInterfaceType;
+
+import JavaExtractor.Common.Common;
+
+public class Property {
+ private String RawType;
+ private String Type;
+ private String Name;
+ private String SplitName;
+ private String Operator;
+ public static final HashSet NumericalKeepValues = Stream.of("0", "1", "32", "64")
+ .collect(Collectors.toCollection(HashSet::new));
+
+ public Property(Node node, boolean isLeaf, boolean isGenericParent, int id) {
+ Class> nodeClass = node.getClass();
+ RawType = Type = nodeClass.getSimpleName();
+ if (node instanceof ClassOrInterfaceType && ((ClassOrInterfaceType) node).isBoxedType()) {
+ Type = "PrimitiveType";
+ }
+ Operator = "";
+ if (node instanceof BinaryExpr) {
+ Operator = ((BinaryExpr) node).getOperator().toString();
+ } else if (node instanceof UnaryExpr) {
+ Operator = ((UnaryExpr) node).getOperator().toString();
+ } else if (node instanceof AssignExpr) {
+ Operator = ((AssignExpr) node).getOperator().toString();
+ }
+ if (Operator.length() > 0) {
+ Type += ":" + Operator;
+ }
+
+ String nameToSplit = node.toString();
+ if (isGenericParent) {
+ nameToSplit = ((ClassOrInterfaceType) node).getName();
+ if (isLeaf) {
+ // if it is a generic parent which counts as a leaf, then when
+ // it is participating in a path
+ // as a parent, it should be GenericClass and not a simple
+ // ClassOrInterfaceType.
+ Type = "GenericClass";
+ }
+ }
+ ArrayList splitNameParts = Common.splitToSubtokens(nameToSplit);
+ SplitName = splitNameParts.stream().collect(Collectors.joining(Common.internalSeparator));
+
+ node.toString();
+ Name = Common.normalizeName(node.toString(), Common.BlankWord);
+ if (Name.length() > Common.c_MaxLabelLength) {
+ Name = Name.substring(0, Common.c_MaxLabelLength);
+ } else if (node instanceof ClassOrInterfaceType && ((ClassOrInterfaceType) node).isBoxedType()) {
+ Name = ((ClassOrInterfaceType) node).toUnboxedType().toString();
+ }
+
+ if (Common.isMethod(node, Type)) {
+ Name = SplitName = Common.methodName;
+ }
+
+ if (SplitName.length() == 0) {
+ SplitName = Name;
+ if (node instanceof IntegerLiteralExpr && !NumericalKeepValues.contains(SplitName)) {
+ // This is a numeric literal, but not in our white list
+ SplitName = "";
+ }
+ }
+ }
+
+ public String getRawType() {
+ return RawType;
+ }
+
+ public String getType() {
+ return Type;
+ }
+
+ public String getName() {
+ return Name;
+ }
+}
diff --git a/JavaExtractor/JPredict/src/main/java/JavaExtractor/Visitors/FunctionVisitor.java b/pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/Visitors/FunctionVisitor.java
old mode 100755
new mode 100644
similarity index 97%
rename from JavaExtractor/JPredict/src/main/java/JavaExtractor/Visitors/FunctionVisitor.java
rename to pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/Visitors/FunctionVisitor.java
index e706e25..5703e0e
--- a/JavaExtractor/JPredict/src/main/java/JavaExtractor/Visitors/FunctionVisitor.java
+++ b/pipeline/JavaExtractor/JPredict/src/main/java/JavaExtractor/Visitors/FunctionVisitor.java
@@ -1,60 +1,60 @@
-package JavaExtractor.Visitors;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.stream.Collectors;
-
-import com.github.javaparser.ast.Node;
-import com.github.javaparser.ast.body.MethodDeclaration;
-import com.github.javaparser.ast.visitor.VoidVisitorAdapter;
-
-import JavaExtractor.Common.Common;
-import JavaExtractor.Common.MethodContent;
-
-@SuppressWarnings("StringEquality")
-public class FunctionVisitor extends VoidVisitorAdapter