Skip to content

Commit

Permalink
Minor fixes; re-computing nulls after deleting rare attributes
Browse files Browse the repository at this point in the history
  • Loading branch information
Luca Liechti committed Jan 30, 2017
1 parent cf27b9b commit e4bf946
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 45 deletions.
Binary file modified bin/driver/Driver.class
Binary file not shown.
27 changes: 15 additions & 12 deletions src/datastructures/Lattice.java
Original file line number Diff line number Diff line change
Expand Up @@ -315,20 +315,22 @@ public double cleanliness() {
return ((double)majority/(double)total)*100;
}

public Boolean bookkeepingIsNull() {
return bookkeeping == null;
}

public void initialiseBookkeeping() {
ContextCleanser cc = new ContextCleanser(dic);
if(bookkeeping == null){
bookkeeping = new HashMap<String, ArrayList<FormalObject>>();
for(LatticeNode node : nodes){
if(node.hasOwnObjects()){
for(FormalObject ownObject : node.ownObjects()){
if(bookkeeping.containsKey(cc.bitsetHash(ownObject.getIntent())))
bookkeeping.get(cc.bitsetHash(ownObject.getIntent())).add(ownObject);
else {
ArrayList<FormalObject> newList = new ArrayList<FormalObject>();
newList.add(ownObject);
bookkeeping.put(cc.bitsetHash(ownObject.getIntent()), newList);
}
bookkeeping = new HashMap<String, ArrayList<FormalObject>>();
for(LatticeNode node : nodes){
if(node.hasOwnObjects()){
for(FormalObject ownObject : node.ownObjects()){
if(bookkeeping.containsKey(cc.bitsetHash(ownObject.getIntent())))
bookkeeping.get(cc.bitsetHash(ownObject.getIntent())).add(ownObject);
else {
ArrayList<FormalObject> newList = new ArrayList<FormalObject>();
newList.add(ownObject);
bookkeeping.put(cc.bitsetHash(ownObject.getIntent()), newList);
}
}
}
Expand Down Expand Up @@ -361,6 +363,7 @@ private int nulls() {
for(String hash : bookkeeping.keySet()){
ArrayList<FormalObject> nodeObjects = bookkeeping.get(hash);
BitSet archetype = findArchetype(hash, nodeObjects);
// System.out.println(archetype);
for(FormalObject comp : nodeObjects){
BitSet nullSet = (BitSet)archetype.clone();
nullSet.xor(comp.getIntent());
Expand Down
3 changes: 2 additions & 1 deletion src/driver/ContextCleanser.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public void removeRareAttributes(int treshold) {
Integer[] supportArray = supportSet.toArray(new Integer[supportSet.size()]);
Arrays.sort(supportArray);
final int TRESHOLD = supportArray[treshold];
System.out.print("Deleted attributes: ");
System.out.print("Deleted attributes (Treshold = " + TRESHOLD + "): ");
// System.out.println("Deleting all attributes that occur at most " + TRESHOLD + " times.");
// int deleted = 0;
// System.out.print("Nr of attributes before: " + context.numberOfAttributes() + "\n");
Expand Down Expand Up @@ -104,6 +104,7 @@ private double mergeScore(LatticeNode node, LatticeNode candidate) {
}

private void mergeInto(LatticeNode firstNode, LatticeNode secondNode) {
// System.out.println("\tmerging " + firstNode.getIntent() + " (" + bitsetHash(firstNode.getIntent()) + ") -> " + secondNode.getIntent() + " (" + bitsetHash(secondNode.getIntent()) + ")");
BitSet mergedIntent = (BitSet)secondNode.getIntent().clone();
lattice.updateBookkeeping(firstNode, secondNode);
for(FormalObject obj : firstNode.ownObjects())
Expand Down
65 changes: 38 additions & 27 deletions src/driver/Driver.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,20 @@ public static void main(String[] args){
ArrayList<String> docs = new ArrayList<String>();
ParserFactory factory = new ParserFactory();

//CONFIGURE HERE
double mergeStop = 0d;
Boolean retroFitSingletons = false;
Boolean deleteRareAttributes = true;

// //add XML repos
// docs.add(repoFolder + "XML\\mondial.xml");
// docs.add(repoFolder + "XML\\SigmodRecord.xml");
// docs.add(repoFolder + "XML\\ebay.xml");
docs.add(repoFolder + "XML\\DBLP\\1000Lattice.xml"); //
// docs.add(repoFolder + "XML\\DBLP\\1000Lattice.xml"); //
// docs.add(repoFolder + "XML\\DBLP\\316NoSql.xml");
// docs.add(repoFolder + "XML\\DBLP\\1000FCA.xml"); //
// docs.add(repoFolder + "XML\\DBLP\\1000Schema.xml"); //


//add IESL repos
// docs.add(ieslFolder + "gp-bibliography.bib");
// docs.add(ieslFolder + "visinfo.zib.de#EVlib#Bibliography#EVL-1998.bib");
Expand All @@ -47,22 +51,21 @@ public static void main(String[] args){
// docs.add(repoFolder + "JSON\\SIRA\\alle.js"); //

//PARSING SINGLE FILES
for(String doc : docs)
parseDocument(doc, outputFolder, graphvizFolder, factory.makeParser(doc));
// for(String doc : docs)
// parseDocument(doc, outputFolder, graphvizFolder, factory.makeParser(doc), retroFitSingletons, deleteRareAttributes, mergeStop);

//PARSING ALL FILES IN FOLDER
// parseFolder(folder, outputFolder, graphvizFolder, factory);
parseFolder(folder, outputFolder, graphvizFolder, factory, retroFitSingletons, deleteRareAttributes, mergeStop);

System.out.println("All done.");
}

private static void parseDocument(String doc, String outputFolder, String graphvizFolder, NoSQLParser parser){
private static void parseDocument(String doc, String outputFolder, String graphvizFolder, NoSQLParser parser, Boolean retroFitSingletons, Boolean deleteRareAttributes, double mergeStop){
System.out.println("Parsing file " + doc);
ArrayList<FormalObject> importedContext = parser.parseFile(doc);
FormalContext fc = new FormalContext();
for(FormalObject object : importedContext){
for(FormalObject object : importedContext)
fc.addObject(object);
}
fc.exportContextToFile(outputFolder + parser.getTargetContextFilename(doc));

LatticeBuilder lb = new LatticeBuilder(fc);
Expand All @@ -74,24 +77,29 @@ private static void parseDocument(String doc, String outputFolder, String graphv
System.out.println("orig\t---" + "\t" + lattice.latticeStats());
ContextCleanser cc = new ContextCleanser(fc, lattice);

///SINGLETONS PT. 1///
cc.removeSingletonObjects();
lattice.clear();
lattice = lb.buildLattice();
System.out.println("noSing\t---" + "\t" + lattice.latticeStats()); //if we have deleted singleton objects
lattice.exportLatticeToFile(graphvizFolder + "0b_withoutSingletons_" + parser.getTargetLatticeFilename(doc));

///RARE ATTRIBUTES///
// cc.removeRareAttributes(0);
// lattice.clear();
// lattice = lb.buildLattice();
// System.out.println("del\t---" + "\t" + lattice.latticeStats()); //if we have deleted rare attributes
// lattice.exportLatticeToFile(graphvizFolder + "0c_withoutRareAttributes_" + parser.getTargetLatticeFilename(doc));
if(deleteRareAttributes) {
cc.removeRareAttributes(0);
lattice.clear();
lattice = lb.buildLattice();
lattice.initialiseBookkeeping();
System.out.println("del\t---" + "\t" + lattice.latticeStats()); //if we have deleted rare attributes
lattice.exportLatticeToFile(graphvizFolder + "0c_withoutRareAttributes_" + parser.getTargetLatticeFilename(doc));
}

///SINGLETONS PT. 1///
if(retroFitSingletons){
cc.removeSingletonObjects();
lattice.clear();
lattice = lb.buildLattice();
System.out.println("noSing\t---" + "\t" + lattice.latticeStats()); //if we have deleted singleton objects
lattice.exportLatticeToFile(graphvizFolder + "0b_withoutSingletons_" + parser.getTargetLatticeFilename(doc));
}

///TINKER///
double score = cc.tinker();
int i = 1;
while(score > 0d) {
int i = 0;
while(score > mergeStop) {
lattice.clear();
lattice = lb.buildLattice();
System.out.println(i + "\t" + String.format("%.2f", score) + "\t" + lattice.latticeStats());
Expand All @@ -100,17 +108,20 @@ private static void parseDocument(String doc, String outputFolder, String graphv
}

///SINGLETONS PT. 2///
lattice.retrofitSingletons();
System.out.println("retfit\t\t" + lattice.latticeStats());
lattice.exportLatticeToFile(graphvizFolder + i + "_retroFit_" + parser.getTargetLatticeFilename(doc));
if(retroFitSingletons) {
lattice.retrofitSingletons();
System.out.println("retfit\t\t" + lattice.latticeStats());
lattice.exportLatticeToFile(graphvizFolder + i + "_retroFit_" + parser.getTargetLatticeFilename(doc));
}

System.out.println("-------------------------------------------------------------------\n\n");
}

private static void parseFolder(String inFolder, String outFolder, String gvFolder, ParserFactory fac) {
private static void parseFolder(String inFolder, String outFolder, String gvFolder, ParserFactory fac, Boolean retroFitSingletons, Boolean deleteRareAttributes, double mergeStop) {
File fold = new File(inFolder);
assert(fold.isDirectory());
String[] inFiles = fold.list();
for(int i = 0; i < inFiles.length; i++)
parseDocument(inFolder + "\\" + inFiles[i], outFolder, gvFolder, fac.makeParser(inFiles[i]));
parseDocument(inFolder + "\\" + inFiles[i], outFolder, gvFolder, fac.makeParser(inFiles[i]), retroFitSingletons, deleteRareAttributes, mergeStop);
}
}
2 changes: 1 addition & 1 deletion src/driver/LatticeBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public Lattice buildLattice(){
computeExtents();
lattice.computeEdges();
lattice.computeAttributes();
lattice.initialiseBookkeeping();
if(lattice.bookkeepingIsNull()) lattice.initialiseBookkeeping();
alreadyAddedObjects.clear();
return lattice;
}
Expand Down
38 changes: 34 additions & 4 deletions src/tests/CCTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,49 @@
import java.util.BitSet;
import org.junit.Test;

import datastructures.Dictionary;
import driver.ContextCleanser;

public class CCTest {

@Test
public void CCTest() {
BitSet set = new BitSet(8);
public void CCTest1() {
Dictionary dic = new Dictionary();
dic.addAttribute("0");
dic.addAttribute("1");
dic.addAttribute("2");
dic.addAttribute("3");
dic.addAttribute("4");
dic.addAttribute("5");
dic.addAttribute("6");
dic.addAttribute("7");
BitSet set = new BitSet(dic.getSize());
set.set(0);
set.set(1);
set.set(2);

ContextCleanser cc = new ContextCleanser();
ContextCleanser cc = new ContextCleanser(dic);
String hash = cc.bitsetHash(set);
assertEquals("11100000", hash.substring(0, 8));

assertEquals(dic.getSize(), 8);
assertEquals(hash.length(), dic.getSize());
assertEquals("11100000", hash);
}

@Test
public void CCTest2() {
Dictionary dic = new Dictionary();
dic.addAttribute("0");
dic.addAttribute("1");
dic.addAttribute("2");
dic.addAttribute("3");
BitSet set = new BitSet(dic.getSize());

ContextCleanser cc = new ContextCleanser(dic);
String hash = cc.bitsetHash(set);

assertEquals(dic.getSize(), 4);
assertEquals(hash.length(), dic.getSize());
assertEquals("0000", hash);
}
}

0 comments on commit e4bf946

Please sign in to comment.