Skip to content

Commit

Permalink
measuring time, counting attributes, numerous fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Luca Liechti committed Jan 31, 2017
1 parent e4bf946 commit 6f7d585
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 29 deletions.
2 changes: 2 additions & 0 deletions bin/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@
/driver/
/parsers/
/tests/
/timer/
/tools/
Binary file modified bin/driver/Driver.class
Binary file not shown.
44 changes: 33 additions & 11 deletions src/datastructures/Lattice.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ public class Lattice {
private HashMap<String, ArrayList<FormalObject>> bookkeeping; //used to calculate the number of NULLs and legacy values
private ArrayList<FormalObject> removedSingletons;
private ContextCleanser cc;
private long time;

public Lattice(Dictionary _dic) {
this.nodes = new ArrayList<LatticeNode>();
Expand All @@ -35,6 +36,7 @@ public Lattice(Dictionary _dic) {
this.bookkeeping = null;
this.removedSingletons = new ArrayList<FormalObject>();
this.cc = new ContextCleanser(_dic);
this.time = 0;
}

public void clear() {
Expand All @@ -47,10 +49,20 @@ public void clear() {
public String latticeStats() {
// return "Nodes: " + nodes.size() + "\twith own objects: " + nodesWithOwnObjects() + "\tedges: " + edges.size()
// + "\tclusterIndex: " + String.format("%.3f", clusterIndex()) + "\tcleanliness: " + String.format("%.1f", cleanliness()) + "%";
return nodes.size() + "\t" + nodesWithOwnObjects() + "\t" + edges.size() + "\t" + String.format("%.3f", clusterIndex()) + "\t" + String.format("%.1f", cleanliness())
+ "\t" + String.format("%.1f", nullPercentage()) + "\t" + String.format("%.1f", legacyPercentage());
return numberOfAttributes() + "\t" + nodes.size() + "\t" + nodesWithOwnObjects() + "\t" + edges.size() + "\t" + String.format("%.3f", clusterIndex()) + "\t" + String.format("%.1f", cleanliness())
+ "\t" + String.format("%.1f", nullPercentage()) + "\t" + String.format("%.1f", legacyPercentage()) + "\t" + time;
}

private int numberOfAttributes() {
BitSet ORset = new BitSet(dic.getSize());
for(LatticeNode node : nodes){
for(FormalObject obj : node.ownObjects()) {
ORset.or(obj.getIntent());
}
}
return ORset.cardinality();
}

public void addNode(LatticeNode node) {
node.setNodeNumber(++currentNodeNumber);
nodes.add(node);
Expand Down Expand Up @@ -362,7 +374,7 @@ private int nulls() {
int nulls = 0;
for(String hash : bookkeeping.keySet()){
ArrayList<FormalObject> nodeObjects = bookkeeping.get(hash);
BitSet archetype = findArchetype(hash, nodeObjects);
BitSet archetype = bitsetFromHash(hash/*, nodeObjects*/);
// System.out.println(archetype);
for(FormalObject comp : nodeObjects){
BitSet nullSet = (BitSet)archetype.clone();
Expand All @@ -378,7 +390,7 @@ private int legacies() {
int legacies = 0;
for(String hash : bookkeeping.keySet()){
ArrayList<FormalObject> nodeObjects = bookkeeping.get(hash);
BitSet archetype = findArchetype(hash, nodeObjects);
BitSet archetype = bitsetFromHash(hash/*, nodeObjects*/);
for(FormalObject comp : nodeObjects){
BitSet legSet = (BitSet)archetype.clone();
legSet.xor(comp.getIntent());
Expand All @@ -389,12 +401,18 @@ private int legacies() {
return legacies;
}

private BitSet findArchetype(String hash, ArrayList<FormalObject> objectArray) {
for(FormalObject obj : objectArray){
if(cc.bitsetHash(obj.getIntent()).equals(hash))
return (BitSet)obj.getIntent().clone();
private BitSet bitsetFromHash(String hash/*, ArrayList<FormalObject> objectArray*/) {
// for(FormalObject obj : objectArray){
// if(cc.bitsetHash(obj.getIntent()).equals(hash))
// return (BitSet)obj.getIntent().clone();
// }
// return null;
BitSet set = new BitSet(hash.length());
for(int i = 0; i < hash.length(); i++){
if(hash.charAt(i) == '1')
set.set(i);
}
return null;
return set;
}

private double nullPercentage() {
Expand All @@ -413,11 +431,11 @@ public void retrofitSingletons() {
for(FormalObject single : removedSingletons) {
LatticeNode bestFit = findBestNodeFit(single);//find suitable latticeNode WITH own objects for each formalObject in singleton array.
//add the objects to those nodes. TODO: Does this really have to require two function calls?
single.setIntent(bestFit.getIntent());/////////////////////////////////////////////////////
bestFit.addObject(single);
bestFit.addToOwnObjects(single);
//update the bookkeeping datastructure, ie. add the formalObject to the hash of the closest node. Do not re-compute anything.
try{bookkeeping.get(cc.bitsetHash(bestFit.getIntent())).add(single);}
catch (NullPointerException npe) {System.out.println("best Fit for " + single.getIntent() + " (" + cc.bitsetHash(single.getIntent()) + ": " + bestFit.getIntent() + "(" + cc.bitsetHash(bestFit.getIntent()) + ")");}
bookkeeping.get(cc.bitsetHash(bestFit.getIntent())).add(single);
}
}

Expand All @@ -441,4 +459,8 @@ private LatticeNode findBestNodeFit(FormalObject single) {
// System.out.println("Retrofitting " + single.getIntent() + " into " + bestFit.getIntent() + " (score = " + bestFitScore + ", own = " + bestFitOwnObjects + ")");
return bestFit;
}

public void setTime(long timeElapsed) {
this.time = timeElapsed;
}
}
37 changes: 19 additions & 18 deletions src/driver/Driver.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,17 @@ public static void main(String[] args){

//CONFIGURE HERE
double mergeStop = 0d;
Boolean retroFitSingletons = false;
Boolean deleteRareAttributes = true;
Boolean retroFitSingletons = true;

// //add XML repos
// docs.add(repoFolder + "XML\\mondial.xml");
// docs.add(repoFolder + "XML\\SigmodRecord.xml");
// docs.add(repoFolder + "XML\\ebay.xml");
// docs.add(repoFolder + "XML\\DBLP\\1000Lattice.xml"); //
// docs.add(repoFolder + "XML\\DBLP\\316NoSql.xml");
// docs.add(repoFolder + "XML\\DBLP\\1000FCA.xml"); //
// docs.add(repoFolder + "XML\\DBLP\\1000Schema.xml"); //
docs.add(repoFolder + "XML\\DBLP\\1000Lattice.xml"); //
docs.add(repoFolder + "XML\\DBLP\\316NoSql.xml"); //
docs.add(repoFolder + "XML\\DBLP\\1000FCA.xml"); //
docs.add(repoFolder + "XML\\DBLP\\1000Schema.xml"); //

//add IESL repos
// docs.add(ieslFolder + "gp-bibliography.bib");
Expand All @@ -40,19 +40,19 @@ public static void main(String[] args){
//add BibTex repos
// docs.add(repoFolder + "BibTex\\BordatTest.bib");
// docs.add(repoFolder + "BibTex\\Test2.bib");
// docs.add(repoFolder + "BibTex\\scg.bib"); //
// docs.add(repoFolder + "BibTex\\listb.bib"); //
// docs.add(repoFolder + "BibTex\\zbMATH\\100Lattice.bib"); //
// docs.add(repoFolder + "BibTex\\zbMATH\\100Schema.bib");
// docs.add(repoFolder + "BibTex\\zbMATH\\100Algebra.bib");
// docs.add(repoFolder + "BibTex\\zbMATH\\100Groups.bib");
docs.add(repoFolder + "BibTex\\scg.bib"); //
docs.add(repoFolder + "BibTex\\listb.bib"); //
docs.add(repoFolder + "BibTex\\zbMATH\\100Lattice.bib"); //
docs.add(repoFolder + "BibTex\\zbMATH\\100Schema.bib"); //
docs.add(repoFolder + "BibTex\\zbMATH\\100Algebra.bib"); //
docs.add(repoFolder + "BibTex\\zbMATH\\100Groups.bib"); //

// //add JSON repos
// docs.add(repoFolder + "JSON\\SIRA\\alle.js"); //
docs.add(repoFolder + "JSON\\SIRA\\alle.js"); //

//PARSING SINGLE FILES
// for(String doc : docs)
// parseDocument(doc, outputFolder, graphvizFolder, factory.makeParser(doc), retroFitSingletons, deleteRareAttributes, mergeStop);
for(String doc : docs)
parseDocument(doc, outputFolder, graphvizFolder, factory.makeParser(doc), retroFitSingletons, deleteRareAttributes, mergeStop);

//PARSING ALL FILES IN FOLDER
parseFolder(folder, outputFolder, graphvizFolder, factory, retroFitSingletons, deleteRareAttributes, mergeStop);
Expand All @@ -72,8 +72,8 @@ private static void parseDocument(String doc, String outputFolder, String graphv
Lattice lattice = lb.buildLattice();
lattice.exportLatticeToFile(graphvizFolder + "0a_original_" + parser.getTargetLatticeFilename(doc));

System.out.println("\nNr\tScore\tNodes\tWithOwn\tedges\tindex\tclean\tnull\tleg");
System.out.println("-------------------------------------------------------------------");
System.out.println("\nNr\tScore\tAttr\tNodes\tWithOwn\tedges\tindex\tclean\tnull\tleg\ttime");
System.out.println("-----------------------------------------------------------------------------------");
System.out.println("orig\t---" + "\t" + lattice.latticeStats());
ContextCleanser cc = new ContextCleanser(fc, lattice);

Expand All @@ -98,14 +98,15 @@ private static void parseDocument(String doc, String outputFolder, String graphv

///TINKER///
double score = cc.tinker();
int i = 0;
int i = 1;
while(score > mergeStop) {
lattice.clear();
lattice = lb.buildLattice();
System.out.println(i + "\t" + String.format("%.2f", score) + "\t" + lattice.latticeStats());
lattice.exportLatticeToFile(graphvizFolder + (i++) + "_" + parser.getTargetLatticeFilename(doc));
score = cc.tinker();
}
System.out.println("final (" + (--i) + ")\t" + lattice.latticeStats());

///SINGLETONS PT. 2///
if(retroFitSingletons) {
Expand All @@ -114,7 +115,7 @@ private static void parseDocument(String doc, String outputFolder, String graphv
lattice.exportLatticeToFile(graphvizFolder + i + "_retroFit_" + parser.getTargetLatticeFilename(doc));
}

System.out.println("-------------------------------------------------------------------\n\n");
System.out.println("-----------------------------------------------------------------------------------\n\n");
}

private static void parseFolder(String inFolder, String outFolder, String gvFolder, ParserFactory fac, Boolean retroFitSingletons, Boolean deleteRareAttributes, double mergeStop) {
Expand Down
3 changes: 3 additions & 0 deletions src/driver/LatticeBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import datastructures.FormalObject;
import datastructures.Lattice;
import datastructures.LatticeNode;
import tools.Timer;

public class LatticeBuilder {

Expand All @@ -25,8 +26,10 @@ public LatticeBuilder(FormalContext _context) {
}

public Lattice buildLattice(){
Timer timer = new Timer();
//Norris algorithm
for(FormalObject g : context.getObjects()) add(g);
lattice.setTime(timer.timeElapsed());
addNodeWithAllAttributes();
computeExtents();
lattice.computeEdges();
Expand Down
35 changes: 35 additions & 0 deletions src/tools/Timer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package tools;
import java.util.Date;

/**
* A utility class to help us run benchmarks.
*
* @author [email protected]
* @version 1.0 1998-11-25
*/
public class Timer {
long _startTime;

/**
* You can either create a new instance whenever
* you want to time something, or you can reset()
* an existing instance.
*/
public Timer() { this.reset(); }

public void reset() {
_startTime = this.timeNow();
}

/**
* How many milliseconds have elapsed since
* the last reset()? NB: does not reset the timer!
*/
public long timeElapsed() {
return this.timeNow() - _startTime;
}

protected long timeNow() {
return new Date().getTime();
}
}

0 comments on commit 6f7d585

Please sign in to comment.