Skip to content

Commit

Permalink
calculating NULLs and legacy values
Browse files Browse the repository at this point in the history
  • Loading branch information
Luca Liechti committed Jan 23, 2017
1 parent 2abb517 commit 92b9bbd
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 16 deletions.
Binary file modified bin/driver/Driver.class
Binary file not shown.
95 changes: 92 additions & 3 deletions src/datastructures/Lattice.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ public class Lattice {
private int currentNodeNumber;
private Dictionary dic;
private BitSet lastMergedInto; //used to keep track of which node has last been merged into in the tinker algorithm
private HashMap<Integer, ArrayList<FormalObject>> bookkeeping; //used to calculate the number of NULLs and legacy values

public Lattice(Dictionary _dic) {
this.nodes = new ArrayList<LatticeNode>();
Expand All @@ -28,6 +29,7 @@ public Lattice(Dictionary _dic) {
this.dic = _dic;
this.nodesByLevel = new HashMap<Integer, ArrayList<LatticeNode>>();
this.lastMergedInto = null;
this.bookkeeping = null;
}

public void clear() {
Expand All @@ -40,7 +42,8 @@ public void clear() {
public String latticeStats() {
// return "Nodes: " + nodes.size() + "\twith own objects: " + nodesWithOwnObjects() + "\tedges: " + edges.size()
// + "\tclusterIndex: " + String.format("%.3f", clusterIndex()) + "\tcleanliness: " + String.format("%.1f", cleanliness()) + "%";
return nodes.size() + "\t" + nodesWithOwnObjects() + "\t" + edges.size() + "\t" + String.format("%.3f", clusterIndex()) + "\t" + String.format("%.1f", cleanliness());
return nodes.size() + "\t" + nodesWithOwnObjects() + "\t" + edges.size() + "\t" + String.format("%.3f", clusterIndex()) + "\t" + String.format("%.3f", cleanliness())
+ "\t" + String.format("%.3f", nullPercentage()) + "\t" + String.format("%.3f", legacyPercentage());
}

public void addNode(LatticeNode node) {
Expand All @@ -65,8 +68,8 @@ public void exportLatticeToFile(String outputFile){
latticeString += "digraph d{\n";
for(LatticeNode node : nodes)
latticeString += node.getNodeNumber()
+ " [label=\"" + node.getNiceAttributes() + node.getIntent()
+ "\next.: " + node.numberOfObjects() + " (" + node.typesOfExtent() + ") "
+ " [label=\"" + node.getNiceAttributes() //+ node.getIntent() ---------------------excluding intent for the moment
+ "ext.: " + node.numberOfObjects() + " (" + node.typesOfExtent() + ") "
+ "\nown: " + node.numberOfOwnObjects() + " (" + node.typesOfOwnObjects() + ") "
// + "\n merges into : " + node.mergesInto()
+ "\"" + peripheries(node) + color(node) + "]\n";
Expand Down Expand Up @@ -306,4 +309,90 @@ public double cleanliness() {
}
return ((double)majority/(double)total)*100;
}

public void initialiseBookkeeping() {
if(bookkeeping == null){
bookkeeping = new HashMap<Integer, ArrayList<FormalObject>>();
for(LatticeNode node : nodes){
if(node.hasOwnObjects()){
for(FormalObject ownObject : node.ownObjects()){
if(bookkeeping.containsKey(ownObject.getIntent().hashCode()))
bookkeeping.get(ownObject.getIntent().hashCode()).add(ownObject);
else {
ArrayList<FormalObject> newList = new ArrayList<FormalObject>();
newList.add(ownObject);
bookkeeping.put(ownObject.getIntent().hashCode(), newList);
}
}
}
}
}
}

//when all objects of one node (the mergee) are merged into another node (the merger),
//we keep track of this in the bookkeeping datastructure used to calculate NULLs and legacies
public void updateBookkeeping(LatticeNode mergee, LatticeNode merger) {
int mergeeHash = mergee.getIntent().hashCode();
int mergerHash = merger.getIntent().hashCode();
ArrayList<FormalObject> mergedObjects = bookkeeping.get(mergeeHash);
for(FormalObject obj : mergedObjects){
FormalObject copy = new FormalObject();
copy.setIntent((BitSet)obj.getIntent().clone());
bookkeeping.get(mergerHash).add(copy);
}
bookkeeping.remove(mergeeHash);
}

private int totalCardinality() {
int card = 0;
for(LatticeNode node : nodes)
card += node.getIntent().cardinality()*node.numberOfOwnObjects();
return card;
}

private int nulls() {
int nulls = 0;
for(int hash : bookkeeping.keySet()){
ArrayList<FormalObject> nodeObjects = bookkeeping.get(hash);
BitSet archetype = findArchetype(hash, nodeObjects);
for(FormalObject comp : nodeObjects){
BitSet nullSet = (BitSet)archetype.clone();
nullSet.xor(comp.getIntent());
nullSet.and(archetype);
nulls += nullSet.cardinality();
}
}
return nulls;
}

private int legacies() {
int legacies = 0;
for(int hash : bookkeeping.keySet()){
ArrayList<FormalObject> nodeObjects = bookkeeping.get(hash);
BitSet archetype = findArchetype(hash, nodeObjects);
for(FormalObject comp : nodeObjects){
BitSet legSet = (BitSet)archetype.clone();
legSet.xor(comp.getIntent());
legSet.and(comp.getIntent());
legacies += legSet.cardinality();
}
}
return legacies;
}

private BitSet findArchetype(int hash, ArrayList<FormalObject> objectArray) {
for(FormalObject obj : objectArray){
if(obj.getIntent().hashCode() == hash)
return (BitSet)obj.getIntent().clone();
}
return null;
}

private double nullPercentage() {
return (double)nulls()/(double)totalCardinality()*100;
}

private double legacyPercentage() {
return (double)legacies()/(double)totalCardinality()*100;
}
}
13 changes: 7 additions & 6 deletions src/driver/ContextCleanser.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,13 @@ public double tinker() {
mergeCandidates.addAll(node.upperNeighbours());
mergeCandidates.addAll(node.lowerNeighbours());
//nodes from same level with at least one shared parent
for(LatticeNode parent : node.upperNeighbours()){
for(LatticeNode child : parent.lowerNeighbours()){
if(child != node) mergeCandidates.add(child);
}
}
// for(LatticeNode parent : node.upperNeighbours()){
// for(LatticeNode child : parent.lowerNeighbours()){
// if(child != node) mergeCandidates.add(child);
// }
// }
for(LatticeNode candidate : mergeCandidates) {
if(mergeScore(node, candidate) > highScore) {
if(mergeScore(node, candidate) >= highScore) {
highScore = mergeScore(node, candidate);
firstNode = node;
secondNode = candidate;
Expand All @@ -88,6 +88,7 @@ private double mergeScore(LatticeNode node, LatticeNode candidate) {

private void mergeInto(LatticeNode firstNode, LatticeNode secondNode) {
BitSet mergedIntent = (BitSet)secondNode.getIntent().clone();
lattice.updateBookkeeping(firstNode, secondNode);
for(FormalObject obj : firstNode.ownObjects())
obj.setIntent(mergedIntent);
lattice.setLastMergedInto((BitSet)secondNode.getIntent().clone());
Expand Down
14 changes: 7 additions & 7 deletions src/driver/Driver.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,15 @@ public static void main(String[] args){
// docs.add(repoFolder + "XML\\ebay.xml");
docs.add(repoFolder + "XML\\DBLP\\1000Lattice.xml"); //
// docs.add(repoFolder + "XML\\DBLP\\316NoSql.xml");
// docs.add(repoFolder + "XML\\DBLP\\1000FCA.xml");
// docs.add(repoFolder + "XML\\DBLP\\1000Schema.xml");
docs.add(repoFolder + "XML\\DBLP\\1000FCA.xml");
docs.add(repoFolder + "XML\\DBLP\\1000Schema.xml");

//add BibTex repos
// docs.add(repoFolder + "BibTex\\BordatTest.bib");
// docs.add(repoFolder + "BibTex\\BordatTest3.bib");
docs.add(repoFolder + "BibTex\\scg.bib");
// docs.add(repoFolder + "BibTex\\listb.bib"); //
// docs.add(repoFolder + "BibTex\\zbMATH\\100Lattice.bib"); //
// docs.add(repoFolder + "BibTex\\scg.bib");
docs.add(repoFolder + "BibTex\\listb.bib"); //
docs.add(repoFolder + "BibTex\\zbMATH\\100Lattice.bib"); //
// docs.add(repoFolder + "BibTex\\zbMATH\\100Schema.bib");
// docs.add(repoFolder + "BibTex\\zbMATH\\100Algebra.bib");
// docs.add(repoFolder + "BibTex\\zbMATH\\100Groups.bib");
Expand All @@ -56,10 +56,10 @@ private static void parseDocument(String doc, String outputFolder, String graphv
Lattice lattice = lb.buildLattice();
lattice.exportLatticeToFile(graphvizFolder + "0a_" + parser.getTargetLatticeFilename(doc));

System.out.println("Nr\tScore\tNodes\tWithOwn\tedges\tindex\tclean");
System.out.println("Nr\tScore\tNodes\tWithOwn\tedges\tindex\tclean\tnull\tleg");
System.out.println("orig\t---" + "\t" + lattice.latticeStats());
ContextCleanser cc = new ContextCleanser(fc, lattice);
cc.removeSingletonObjects();
// cc.removeSingletonObjects();
// cc.removeRareAttributes(0);
lattice.clear();
lattice = lb.buildLattice();
Expand Down
1 change: 1 addition & 0 deletions src/driver/LatticeBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ public Lattice buildLattice(){
computeExtents();
lattice.computeEdges();
lattice.computeAttributes();
lattice.initialiseBookkeeping();
alreadyAddedObjects.clear();
return lattice;
}
Expand Down

0 comments on commit 92b9bbd

Please sign in to comment.