Skip to content

Commit

Permalink
automatizing the merging
Browse files Browse the repository at this point in the history
  • Loading branch information
Luca Liechti committed Jan 13, 2017
1 parent 32c9181 commit fb2be20
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 23 deletions.
Binary file modified bin/driver/Driver.class
Binary file not shown.
4 changes: 2 additions & 2 deletions src/datastructures/Lattice.java
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public ArrayList<LatticeNode> getNodes() {
}

public void exportLatticeToFile(String outputFile){
System.out.print("Writing lattice to file... ");
// System.out.print("Writing lattice to file... ");
String latticeString = "";
latticeString += "digraph d{\n";
for(LatticeNode node : nodes)
Expand All @@ -69,7 +69,7 @@ public void exportLatticeToFile(String outputFile){
latticeString += edge.getLowerNodeNumber() + "->" + edge.getUpperNodeNumber() + ";\n";
latticeString += "}";
writeToFile(latticeString, outputFile);
System.out.println("done.");
// System.out.println("done.");
}

private String peripheries(LatticeNode node) {
Expand Down
42 changes: 29 additions & 13 deletions src/driver/ContextCleanser.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public void removeRareAttributes(int treshold) {
}

//changes the intents of objects that are very close to other objects.
//after running this, the lattice has to be recomputed :/
//after running this, the lattice has to be recomputed
public void mergeNodes(int factor, int attrDiff, int percent) {
System.out.println("---BEGIN CLEANSING---");
System.out.println("Merging all nodes with their biggest neighbours if they\n"
Expand Down Expand Up @@ -103,35 +103,45 @@ private Boolean isMergeCandidateFor(LatticeNode mergeCandidate, LatticeNode smal
&& mergeCandidate.getIntent().cardinality() <= (smallNode.getIntent().cardinality() + attributeDifference));
}

public void tinker() {
System.out.println("tinkering...");
public double tinker() {
// System.out.println("tinkering...");
HashMap<Integer, ArrayList<LatticeNode>> latticeLevelNodes = lattice.nodesByLevel();
int[] levelArray = lattice.levelArray();
double highScore = 0.0;
String highScoreMerge = "";
LatticeNode firstNode = null;
LatticeNode secondNode = null;
for(int i = 0; i < levelArray.length; i++) {
ArrayList<LatticeNode> thisLevelNodes = latticeLevelNodes.get(levelArray[i]);
for(LatticeNode node : thisLevelNodes) {
for(LatticeNode upper : node.upperNeighbours()) {
if(editDistance(node, upper) > highScore) {
highScore = editDistance(node, upper);
highScoreMerge = node.getIntent() + " -> " + upper.getIntent() +
" (up, score = " + new DecimalFormat("#.##").format(editDistance(node, upper)) + ")";
if(mergeScore(node, upper) > highScore) {
highScore = mergeScore(node, upper);
highScoreMerge = "merged " /*+ node.getIntent() + " -> " + upper.getIntent()*/ +
" (up, score = " + new DecimalFormat("#.##").format(mergeScore(node, upper)) + ")";
firstNode = node;
secondNode = upper;
}
}
for(LatticeNode lower : node.lowerNeighbours()) {
if(editDistance(node, lower) > highScore) {
highScore = editDistance(node, lower);
highScoreMerge = node.getIntent() + " -> " + lower.getIntent() +
" (down, score = " + new DecimalFormat("#.##").format(editDistance(node, lower)) + ")";
if(mergeScore(node, lower) > highScore) {
highScore = mergeScore(node, lower);
highScoreMerge = "merged " /*+ node.getIntent() + " -> " + lower.getIntent()*/ +
" (down, score = " + new DecimalFormat("#.##").format(mergeScore(node, lower)) + ")";
firstNode = node;
secondNode = lower;
}
}
}
}
System.out.println(highScoreMerge);
if(highScore > 0.0) {
mergeInto(firstNode, secondNode);
System.out.println(highScoreMerge);
}
return highScore;
}

private double editDistance(LatticeNode node, LatticeNode candidate) {
private double mergeScore(LatticeNode node, LatticeNode candidate) {
if(!node.hasOwnObjects() || !candidate.hasOwnObjects() || candidate.numberOfOwnObjects() <= node.numberOfOwnObjects())
return 0.0;
double ownObjectRatio = candidate.numberOfOwnObjects()/(double)node.numberOfOwnObjects();
Expand All @@ -140,4 +150,10 @@ private double editDistance(LatticeNode node, LatticeNode candidate) {
return 2*(ownObjectRatio/percentOfObjects);
return ownObjectRatio/percentOfObjects;
}

private void mergeInto(LatticeNode firstNode, LatticeNode secondNode) {
BitSet mergedIntent = (BitSet)secondNode.getIntent().clone();
for(FormalObject obj : firstNode.ownObjects())
obj.setIntent(mergedIntent);
}
}
22 changes: 14 additions & 8 deletions src/driver/Driver.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ public static void main(String[] args){
// docs.add(repoFolder + "XML\\mondial.xml");
// docs.add(repoFolder + "XML\\SigmodRecord.xml");
// docs.add(repoFolder + "XML\\ebay.xml");
docs.add(repoFolder + "XML\\DBLP\\1000Lattice.xml");
// docs.add(repoFolder + "XML\\DBLP\\1000Lattice.xml");
// docs.add(repoFolder + "XML\\DBLP\\316NoSql.xml");
// docs.add(repoFolder + "XML\\DBLP\\1000FCA.xml");
// docs.add(repoFolder + "XML\\DBLP\\1000Schema.xml");

//add BibTex repos
// docs.add(repoFolder + "BibTex\\BordatTest.bib");
// docs.add(repoFolder + "BibTex\\scg.bib");
// docs.add(repoFolder + "BibTex\\listb.bib");
docs.add(repoFolder + "BibTex\\listb.bib");
// docs.add(repoFolder + "BibTex\\zbMATH\\100Lattice.bib");
// docs.add(repoFolder + "BibTex\\zbMATH\\100Schema.bib");
// docs.add(repoFolder + "BibTex\\zbMATH\\100Algebra.bib");
Expand All @@ -53,16 +53,22 @@ private static void parseDocument(String doc, String outputFolder, String graphv

LatticeBuilder lb = new LatticeBuilder(fc);
Lattice lattice = lb.buildLattice();
lattice.exportLatticeToFile(graphvizFolder + parser.getTargetLatticeFilename(doc));
lattice.exportLatticeToFile(graphvizFolder + "0_" + parser.getTargetLatticeFilename(doc));

System.out.println("Lattice stats before:\t" + lattice.latticeStats());
ContextCleanser cc = new ContextCleanser(fc, lattice);
cc.tinker();
double score = 1d;
int i = 1;
while(score > 0d) {
score = cc.tinker();
lattice.clear();
lattice = lb.buildLattice();
System.out.println("Lattice stats after merge " + i + ": " + lattice.latticeStats());
lattice.exportLatticeToFile(graphvizFolder + (i++) + "_" + parser.getTargetLatticeFilename(doc));
}
System.out.println("performed " + (i-2) + " merges in total.");
// cc.mergeNodes(10, 1, 5);
// cc.removeRareAttributes(2);
// LatticeBuilder lb2 = new LatticeBuilder(fc);
// Lattice lattice2 = lb2.buildLattice();
// System.out.println("Lattice stats after:\t" + lattice.latticeStats());
// System.out.println("---END CLEANSING---");
// lattice.exportLatticeToFile(graphvizFolder + "edit_" + parser.getTargetLatticeFilename(doc));
}
}
1 change: 1 addition & 0 deletions src/driver/LatticeBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ public Lattice buildLattice(){
computeExtents();
lattice.computeEdges();
lattice.computeAttributes();
alreadyAddedObjects.clear();
return lattice;
}

Expand Down

0 comments on commit fb2be20

Please sign in to comment.