Skip to content

Commit

Permalink
some refactorings for convenience
Browse files Browse the repository at this point in the history
  • Loading branch information
Luca Liechti committed Feb 3, 2017
1 parent 6f7d585 commit 746d40d
Show file tree
Hide file tree
Showing 15 changed files with 94 additions and 58 deletions.
Binary file modified bin/datastructures/FormalContext.class
Binary file not shown.
Binary file modified bin/driver/Driver.class
Binary file not shown.
Binary file modified bin/parsers/BibTexParser.class
Binary file not shown.
Binary file modified bin/parsers/JSONParser.class
Binary file not shown.
Binary file modified bin/parsers/NoSQLParser.class
Binary file not shown.
Binary file modified bin/parsers/XMLParser.class
Binary file not shown.
6 changes: 5 additions & 1 deletion src/datastructures/FormalContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public FormalContext() {
}

//the BitSet is created at the time of the object being added to the context
public void addObject(FormalObject object){
public void createAndAddObject(FormalObject object){
BitSet intent = new BitSet();
for(String attribute : object.getAttributes()){
if(!dic.containsAttribute(attribute))
Expand All @@ -36,6 +36,10 @@ public void addObject(FormalObject object){
objects.add(object);
}

public void addObject(FormalObject obj) {
objects.add(obj);
}

private void countAttribute(String attribute) {
if(!attributeSupport.containsKey(attribute))
attributeSupport.put(attribute, 1);
Expand Down
32 changes: 28 additions & 4 deletions src/datastructures/Lattice.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ public class Lattice {
private ArrayList<LatticeNode> nodes;
private ArrayList<LatticeEdge> edges;
private HashMap<Integer, ArrayList<LatticeNode>> nodesByLevel;
private FormalContext context;
private int currentNodeNumber;
private Dictionary dic;
private BitSet lastMergedInto; //used to keep track of which node has last been merged into in the tinker algorithm
Expand All @@ -26,9 +27,10 @@ public class Lattice {
private ContextCleanser cc;
private long time;

public Lattice(Dictionary _dic) {
public Lattice(Dictionary _dic, FormalContext _context) {
this.nodes = new ArrayList<LatticeNode>();
this.edges = new ArrayList<LatticeEdge>();
this.context = _context;
this.currentNodeNumber = 0;
this.dic = _dic;
this.nodesByLevel = new HashMap<Integer, ArrayList<LatticeNode>>();
Expand All @@ -49,8 +51,9 @@ public void clear() {
public String latticeStats() {
// return "Nodes: " + nodes.size() + "\twith own objects: " + nodesWithOwnObjects() + "\tedges: " + edges.size()
// + "\tclusterIndex: " + String.format("%.3f", clusterIndex()) + "\tcleanliness: " + String.format("%.1f", cleanliness()) + "%";
return numberOfAttributes() + "\t" + nodes.size() + "\t" + nodesWithOwnObjects() + "\t" + edges.size() + "\t" + String.format("%.3f", clusterIndex()) + "\t" + String.format("%.1f", cleanliness())
+ "\t" + String.format("%.1f", nullPercentage()) + "\t" + String.format("%.1f", legacyPercentage()) + "\t" + time;
return context.getObjects().size() + "\t" + types() + "\t" + numberOfAttributes() + "\t" + nodes.size() + "\t" + nodesWithOwnObjects()
+ "\t" + edges.size() + "\t" + String.format("%.3f", clusterIndex()) + "\t" + String.format("%.1f", inMajority())
+ "\t" + String.format("%.1f", inCleanNodes()) + "\t" + String.format("%.1f", nullPercentage()) + "\t" + String.format("%.1f", legacyPercentage()) + "\t" + time;
}

private int numberOfAttributes() {
Expand Down Expand Up @@ -315,7 +318,7 @@ public void setLastMergedInto(BitSet intent) {
this.lastMergedInto = intent;
}

public double cleanliness() {
public double inMajority() {
int majority = 0;
int total = 0;
for(LatticeNode node : nodes) {
Expand All @@ -327,6 +330,19 @@ public double cleanliness() {
return ((double)majority/(double)total)*100;
}

public double inCleanNodes() {
int inClean = 0;
int total = 0;
for(LatticeNode node : nodes) {
if(node.hasOwnObjects()){
if(node.typesOfFormalObjects(node.ownObjects()).substring(0,4).equals("100%"))
inClean += node.numberOfOwnObjects();
total += node.numberOfOwnObjects();
}
}
return ((double)inClean/(double)total)*100;
}

public Boolean bookkeepingIsNull() {
return bookkeeping == null;
}
Expand Down Expand Up @@ -436,6 +452,7 @@ public void retrofitSingletons() {
bestFit.addToOwnObjects(single);
//update the bookkeeping datastructure, ie. add the formalObject to the hash of the closest node. Do not re-compute anything.
bookkeeping.get(cc.bitsetHash(bestFit.getIntent())).add(single);
context.addObject(single);
}
}

Expand All @@ -459,6 +476,13 @@ private LatticeNode findBestNodeFit(FormalObject single) {
// System.out.println("Retrofitting " + single.getIntent() + " into " + bestFit.getIntent() + " (score = " + bestFitScore + ", own = " + bestFitOwnObjects + ")");
return bestFit;
}

public int types() {
HashSet<String> types = new HashSet<String>();
for(FormalObject obj : context.getObjects())
types.add(obj.getName());
return types.size();
}

public void setTime(long timeElapsed) {
this.time = timeElapsed;
Expand Down
2 changes: 1 addition & 1 deletion src/datastructures/LatticeNode.java
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ public int majority() {
return counts.get(majorityType);
}

private String typesOfFormalObjects(HashSet<FormalObject> set) {
protected String typesOfFormalObjects(HashSet<FormalObject> set) {
if(set.size() > 0) {
HashMap<String, Integer> counts = countObjectTypes(set);
if(counts.keySet().size() == 1)
Expand Down
71 changes: 33 additions & 38 deletions src/driver/Driver.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,69 +11,64 @@

public class Driver {
public static void main(String[] args){
String folder = "C:\\Users\\Luca Liechti\\Desktop\\IESL";
String folder200 = "C:\\Users\\Luca Liechti\\Desktop\\IESL200";
String folder2000 = "C:\\Users\\Luca Liechti\\Desktop\\IESL2000";
String repoFolder = "C:\\Users\\Luca Liechti\\Dropbox\\Uni\\!BSc\\NoSQL repos\\";
String ieslFolder = "C:\\Users\\Luca Liechti\\Desktop\\IESL\\";
String outputFolder = "C:\\Users\\Luca Liechti\\Dropbox\\Uni\\!BSc\\context files\\";
String graphvizFolder = "C:\\Users\\Luca Liechti\\Dropbox\\Uni\\!BSc\\graphviz files\\";
ArrayList<String> docs = new ArrayList<String>();
ArrayList<String> docs200 = new ArrayList<String>();
ArrayList<String> docs2000 = new ArrayList<String>();
ParserFactory factory = new ParserFactory();

//CONFIGURE HERE
double mergeStop = 0d;
Boolean deleteRareAttributes = true;
Boolean retroFitSingletons = true;
Boolean deleteRareAttributes = false;
Boolean retroFitSingletons = false;

// //add XML repos
// docs.add(repoFolder + "XML\\mondial.xml");
// docs.add(repoFolder + "XML\\SigmodRecord.xml");
// docs.add(repoFolder + "XML\\ebay.xml");
docs.add(repoFolder + "XML\\DBLP\\1000Lattice.xml"); //
docs.add(repoFolder + "XML\\DBLP\\316NoSql.xml"); //
docs.add(repoFolder + "XML\\DBLP\\1000FCA.xml"); //
docs.add(repoFolder + "XML\\DBLP\\1000Schema.xml"); //

//add IESL repos
// docs.add(ieslFolder + "gp-bibliography.bib");
// docs.add(ieslFolder + "visinfo.zib.de#EVlib#Bibliography#EVL-1998.bib");
// docs2000.add(repoFolder + "XML\\DBLP\\1000Lattice.xml");
// docs2000.add(repoFolder + "XML\\DBLP\\316NoSql.xml");
// docs2000.add(repoFolder + "XML\\DBLP\\1000FCA.xml");
// docs2000.add(repoFolder + "XML\\DBLP\\1000Schema.xml");

//add BibTex repos
// docs.add(repoFolder + "BibTex\\BordatTest.bib");
// docs.add(repoFolder + "BibTex\\Test2.bib");
docs.add(repoFolder + "BibTex\\scg.bib"); //
docs.add(repoFolder + "BibTex\\listb.bib"); //
docs.add(repoFolder + "BibTex\\zbMATH\\100Lattice.bib"); //
docs.add(repoFolder + "BibTex\\zbMATH\\100Schema.bib"); //
docs.add(repoFolder + "BibTex\\zbMATH\\100Algebra.bib"); //
docs.add(repoFolder + "BibTex\\zbMATH\\100Groups.bib"); //
// docs200.add(repoFolder + "BibTex\\scg.bib");
// docs200.add(repoFolder + "BibTex\\listb.bib");
// docs2000.add(repoFolder + "BibTex\\zbMATH\\100Lattice.bib");
// docs2000.add(repoFolder + "BibTex\\zbMATH\\100Schema.bib");
// docs2000.add(repoFolder + "BibTex\\zbMATH\\100Algebra.bib");
// docs2000.add(repoFolder + "BibTex\\zbMATH\\100Groups.bib");

// //add JSON repos
docs.add(repoFolder + "JSON\\SIRA\\alle.js"); //
docs2000.add(repoFolder + "JSON\\SIRA\\alle.js");

//PARSING SINGLE FILES
for(String doc : docs)
parseDocument(doc, outputFolder, graphvizFolder, factory.makeParser(doc), retroFitSingletons, deleteRareAttributes, mergeStop);
for(String doc : docs200)
parseDocument(doc, outputFolder, graphvizFolder, factory.makeParser(doc), retroFitSingletons, deleteRareAttributes, mergeStop, 200);
for(String doc : docs2000)
parseDocument(doc, outputFolder, graphvizFolder, factory.makeParser(doc), retroFitSingletons, deleteRareAttributes, mergeStop, 2000);

//PARSING ALL FILES IN FOLDER
parseFolder(folder, outputFolder, graphvizFolder, factory, retroFitSingletons, deleteRareAttributes, mergeStop);
// parseFolder(folder200, outputFolder, graphvizFolder, factory, retroFitSingletons, deleteRareAttributes, mergeStop, 200);
// parseFolder(folder2000, outputFolder, graphvizFolder, factory, retroFitSingletons, deleteRareAttributes, mergeStop, 200);

System.out.println("All done.");
}

private static void parseDocument(String doc, String outputFolder, String graphvizFolder, NoSQLParser parser, Boolean retroFitSingletons, Boolean deleteRareAttributes, double mergeStop){
private static void parseDocument(String doc, String outputFolder, String graphvizFolder, NoSQLParser parser, Boolean retroFitSingletons, Boolean deleteRareAttributes, double mergeStop, int obj){
System.out.println("Parsing file " + doc);
ArrayList<FormalObject> importedContext = parser.parseFile(doc);
ArrayList<FormalObject> importedContext = parser.parseFile(doc, obj);
FormalContext fc = new FormalContext();
for(FormalObject object : importedContext)
fc.addObject(object);
fc.createAndAddObject(object);
fc.exportContextToFile(outputFolder + parser.getTargetContextFilename(doc));

LatticeBuilder lb = new LatticeBuilder(fc);
Lattice lattice = lb.buildLattice();
lattice.exportLatticeToFile(graphvizFolder + "0a_original_" + parser.getTargetLatticeFilename(doc));

System.out.println("\nNr\tScore\tAttr\tNodes\tWithOwn\tedges\tindex\tclean\tnull\tleg\ttime");
System.out.println("-----------------------------------------------------------------------------------");
System.out.println("\nNr\tScore\tObjects\tTypes\tAttr\tNodes\tWithOwn\tedges\tindex\tmajor\tinClean\tnull\tleg\ttime");
System.out.println("------------------------------------------------------------------------------------------------------------");
System.out.println("orig\t---" + "\t" + lattice.latticeStats());
ContextCleanser cc = new ContextCleanser(fc, lattice);

Expand Down Expand Up @@ -102,11 +97,11 @@ private static void parseDocument(String doc, String outputFolder, String graphv
while(score > mergeStop) {
lattice.clear();
lattice = lb.buildLattice();
System.out.println(i + "\t" + String.format("%.2f", score) + "\t" + lattice.latticeStats());
// System.out.println(i + "\t" + String.format("%.2f", score) + "\t" + lattice.latticeStats());
lattice.exportLatticeToFile(graphvizFolder + (i++) + "_" + parser.getTargetLatticeFilename(doc));
score = cc.tinker();
}
System.out.println("final (" + (--i) + ")\t" + lattice.latticeStats());
if(!retroFitSingletons) System.out.println("final (" + (--i) + ")\t" + lattice.latticeStats());

///SINGLETONS PT. 2///
if(retroFitSingletons) {
Expand All @@ -115,14 +110,14 @@ private static void parseDocument(String doc, String outputFolder, String graphv
lattice.exportLatticeToFile(graphvizFolder + i + "_retroFit_" + parser.getTargetLatticeFilename(doc));
}

System.out.println("-----------------------------------------------------------------------------------\n\n");
System.out.println("------------------------------------------------------------------------------------------------------------\n\n");
}

private static void parseFolder(String inFolder, String outFolder, String gvFolder, ParserFactory fac, Boolean retroFitSingletons, Boolean deleteRareAttributes, double mergeStop) {
private static void parseFolder(String inFolder, String outFolder, String gvFolder, ParserFactory fac, Boolean retroFitSingletons, Boolean deleteRareAttributes, double mergeStop, int obj) {
File fold = new File(inFolder);
assert(fold.isDirectory());
String[] inFiles = fold.list();
for(int i = 0; i < inFiles.length; i++)
parseDocument(inFolder + "\\" + inFiles[i], outFolder, gvFolder, fac.makeParser(inFiles[i]), retroFitSingletons, deleteRareAttributes, mergeStop);
parseDocument(inFolder + "\\" + inFiles[i], outFolder, gvFolder, fac.makeParser(inFiles[i]), retroFitSingletons, deleteRareAttributes, mergeStop, obj);
}
}
7 changes: 6 additions & 1 deletion src/driver/LatticeBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ public class LatticeBuilder {

public LatticeBuilder(FormalContext _context) {
this.context = _context;
this.lattice = new Lattice(_context.getDictionary());
this.lattice = new Lattice(_context.getDictionary(), _context);
this.alreadyAddedObjects = new ArrayList<FormalObject>();
this.maximalConcept = null;
}
Expand All @@ -30,10 +30,15 @@ public Lattice buildLattice(){
//Norris algorithm
for(FormalObject g : context.getObjects()) add(g);
lattice.setTime(timer.timeElapsed());
// System.out.println("Added all objects in " + timer.timeElapsed() + " ms."); timer.reset();
addNodeWithAllAttributes();
// System.out.println("Added node with all attributes in " + timer.timeElapsed() + " ms."); timer.reset();
computeExtents();
// System.out.println("Computed extents in " + timer.timeElapsed() + " ms."); timer.reset();
lattice.computeEdges();
// System.out.println("Computed edges in " + timer.timeElapsed() + " ms."); timer.reset();
lattice.computeAttributes();
// System.out.println("Computed which attribute enters where in " + timer.timeElapsed() + " ms.");
if(lattice.bookkeepingIsNull()) lattice.initialiseBookkeeping();
alreadyAddedObjects.clear();
return lattice;
Expand Down
9 changes: 5 additions & 4 deletions src/parsers/BibTexParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@

public class BibTexParser implements NoSQLParser {

public ArrayList<FormalObject> parseFile(String file){
public ArrayList<FormalObject> parseFile(String file, int MAX_OBJECTS){
ArrayList<String> splitObjects = splitFile(file); //split the input file
return createFormalObjects(splitObjects); //extract attributes from split objects
return createFormalObjects(splitObjects, MAX_OBJECTS); //extract attributes from split objects
}

private ArrayList<String> splitFile(String file) {
Expand Down Expand Up @@ -45,7 +45,7 @@ private ArrayList<String> splitFile(String file) {
return splitString;
}

private ArrayList<FormalObject> createFormalObjects(ArrayList<String> splitObjects) {
private ArrayList<FormalObject> createFormalObjects(ArrayList<String> splitObjects, int MAX_OBJECTS) {
ArrayList<FormalObject> parsedObjects = new ArrayList<FormalObject>();
System.out.print("Parsing objects to context... ");
//extract the attributes from each object
Expand All @@ -67,10 +67,11 @@ else if(lines[i].matches("@.*\\{.*")){
}
currentObject.setAttributes(attributes);
currentObject.setName(name);
if(++k <= 200) //Comment in/out to look at all/n objects
if(MAX_OBJECTS == 0 || ++k <= MAX_OBJECTS)
parsedObjects.add(currentObject);
}
System.out.println("done.");
assert (parsedObjects.size() <= MAX_OBJECTS);
return parsedObjects;
}

Expand Down
10 changes: 6 additions & 4 deletions src/parsers/JSONParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ public class JSONParser implements NoSQLParser {
private String nameAttribute = "file";

@Override
public ArrayList<FormalObject> parseFile(String file) {
public ArrayList<FormalObject> parseFile(String file, int MAX_OBJECTS) {
JSONArray jarray = extractJSONarray(file);
return(createFormalObjects(jarray));
return(createFormalObjects(jarray, MAX_OBJECTS));
}

private JSONArray extractJSONarray(String file) {
Expand All @@ -34,11 +34,12 @@ private JSONArray extractJSONarray(String file) {
return array;
}

private ArrayList<FormalObject> createFormalObjects(JSONArray jarray) {
private ArrayList<FormalObject> createFormalObjects(JSONArray jarray, int MAX_OBJECTS) {
ArrayList<FormalObject> parsedObjects = new ArrayList<FormalObject>();
System.out.print("Parsing objects to context... ");
if(MAX_OBJECTS == 0) MAX_OBJECTS = jarray.length(); //declare how many objects we want. If all, just parse the whole array
try{
for(int i = 0; i < jarray.length(); i++) {
for(int i = 0; i < MAX_OBJECTS; i++) {
FormalObject formalObj = new FormalObject();
ArrayList<String> formalAttr = new ArrayList<String>();
JSONObject obj = jarray.getJSONObject(i);
Expand All @@ -53,6 +54,7 @@ private ArrayList<FormalObject> createFormalObjects(JSONArray jarray) {
}
catch(JSONException jsone) { jsone.printStackTrace(); }
System.out.print("Done.");
assert (parsedObjects.size() <= MAX_OBJECTS);
return parsedObjects;
}

Expand Down
2 changes: 1 addition & 1 deletion src/parsers/NoSQLParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

public interface NoSQLParser {

public ArrayList<FormalObject> parseFile(String file);
public ArrayList<FormalObject> parseFile(String file, int nrObj);

public String getTargetContextFilename(String doc);

Expand Down
13 changes: 9 additions & 4 deletions src/parsers/XMLParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,15 @@ public class XMLParser implements NoSQLParser {
private String wantedObjects = "info";
private String nameAttribute = "type";

public ArrayList<FormalObject> parseFile(String file){
ArrayList<Element> wantedElements = extractElements(file); //split the input file
public ArrayList<FormalObject> parseFile(String file, int MAX_OBJECTS){
ArrayList<Element> wantedElements = extractElements(file, MAX_OBJECTS); //split the input file
return createFormalObjects(wantedElements); //extract attributes from split objects
}

private ArrayList<Element> extractElements(String file) {
private ArrayList<Element> extractElements(String file, int MAX_OBJECTS) {
ArrayList<Element> wantedElements = new ArrayList<Element>();
ElementFilter ef = new ElementFilter();
int numberOfParsedObjects = 0;
try {
File inputFile = new File(file);
SAXBuilder saxBuilder = new SAXBuilder();
Expand All @@ -35,11 +36,15 @@ private ArrayList<Element> extractElements(String file) {
Iterator<Element> allElementsIterator = rootElement.getDescendants(ef);
while(allElementsIterator.hasNext()) {
Element currentElement = allElementsIterator.next();
if(currentElement.getName().equals(wantedObjects)) wantedElements.add(currentElement);
if(currentElement.getName().equals(wantedObjects) && (MAX_OBJECTS == 0 || MAX_OBJECTS < numberOfParsedObjects)) {
wantedElements.add(currentElement);
numberOfParsedObjects++;
}
}
}
catch (JDOMException e) { e.printStackTrace(); }
catch (IOException e) { e.printStackTrace(); }
assert ((MAX_OBJECTS == 0 || numberOfParsedObjects <= MAX_OBJECTS) && wantedElements.size() <= MAX_OBJECTS);
return wantedElements;
}

Expand Down

0 comments on commit 746d40d

Please sign in to comment.