Skip to content

Commit

Permalink
Mergig with siblings; removing singleton objects
Browse files Browse the repository at this point in the history
  • Loading branch information
Luca Liechti committed Jan 22, 2017
1 parent fb2be20 commit 2abb517
Show file tree
Hide file tree
Showing 6 changed files with 137 additions and 115 deletions.
1 change: 1 addition & 0 deletions bin/.gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
/datastructures/
/driver/
/parsers/
Binary file modified bin/driver/Driver.class
Binary file not shown.
36 changes: 32 additions & 4 deletions src/datastructures/Lattice.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,15 @@ public class Lattice {
private HashMap<Integer, ArrayList<LatticeNode>> nodesByLevel;
private int currentNodeNumber;
private Dictionary dic;
private BitSet lastMergedInto; //used to keep track of which node has last been merged into in the tinker algorithm

public Lattice(Dictionary _dic) {
this.nodes = new ArrayList<LatticeNode>();
this.edges = new ArrayList<LatticeEdge>();
this.currentNodeNumber = 0;
this.dic = _dic;
this.nodesByLevel = new HashMap<Integer, ArrayList<LatticeNode>>();
this.lastMergedInto = null;
}

public void clear() {
Expand All @@ -36,7 +38,9 @@ public void clear() {
}

public String latticeStats() {
return "Nodes: " + nodes.size() + "\twith own objects: " + nodesWithOwnObjects() + "\tedges: " + edges.size() + "\tclusterIndex: " + String.format("%.3f", clusterIndex());
// return "Nodes: " + nodes.size() + "\twith own objects: " + nodesWithOwnObjects() + "\tedges: " + edges.size()
// + "\tclusterIndex: " + String.format("%.3f", clusterIndex()) + "\tcleanliness: " + String.format("%.1f", cleanliness()) + "%";
return nodes.size() + "\t" + nodesWithOwnObjects() + "\t" + edges.size() + "\t" + String.format("%.3f", clusterIndex()) + "\t" + String.format("%.1f", cleanliness());
}

public void addNode(LatticeNode node) {
Expand Down Expand Up @@ -64,17 +68,25 @@ public void exportLatticeToFile(String outputFile){
+ " [label=\"" + node.getNiceAttributes() + node.getIntent()
+ "\next.: " + node.numberOfObjects() + " (" + node.typesOfExtent() + ") "
+ "\nown: " + node.numberOfOwnObjects() + " (" + node.typesOfOwnObjects() + ") "
+ "\"" + peripheries(node) + "]\n";
// + "\n merges into : " + node.mergesInto()
+ "\"" + peripheries(node) + color(node) + "]\n";
for(LatticeEdge edge: edges)
latticeString += edge.getLowerNodeNumber() + "->" + edge.getUpperNodeNumber() + ";\n";
latticeString += "}";
writeToFile(latticeString, outputFile);
// System.out.println("done.");
}

//paints the node that was last merged into red
private String color(LatticeNode node) {
if(this.lastMergedInto != null && this.lastMergedInto.equals(node.getIntent()))
return ", style = filled, color = red";
return "";
}

private String peripheries(LatticeNode node) {
if(node.numberOfOwnObjects() > 0){
return ", peripheries = 2"; }
if(node.numberOfOwnObjects() > 0)
return ", peripheries = 2";
return "";
}

Expand Down Expand Up @@ -278,4 +290,20 @@ private int[] numbersOfOwnObjects() {
assert(pos == nodesWithOwnObjects()-1);
return numbers;
}

public void setLastMergedInto(BitSet intent) {
this.lastMergedInto = intent;
}

public double cleanliness() {
int majority = 0;
int total = 0;
for(LatticeNode node : nodes) {
if(node.hasOwnObjects()){
majority += node.majority();
total += node.numberOfOwnObjects();
}
}
return ((double)majority/(double)total)*100;
}
}
63 changes: 45 additions & 18 deletions src/datastructures/LatticeNode.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ public class LatticeNode {
private HashSet<LatticeNode> upperNeighbours;
private HashSet<LatticeNode> lowerNeighbours;
private ArrayList<String> ownAttributes;
private Boolean mergedInto;

public LatticeNode(HashSet<FormalObject> hashSet, BitSet intent, Dictionary _dic) {
this.intent = intent;
Expand All @@ -27,6 +28,7 @@ public LatticeNode(HashSet<FormalObject> hashSet, BitSet intent, Dictionary _dic
this.upperNeighbours = new HashSet<LatticeNode>();
this.lowerNeighbours = new HashSet<LatticeNode>();
this.ownAttributes = new ArrayList<String>();
this.mergedInto = false;
}

public BitSet getIntent() {
Expand Down Expand Up @@ -151,32 +153,57 @@ public String typesOfOwnObjects() {
return typesOfFormalObjects(ownObjects);
}

public void setMergedInto() {
this.mergedInto = true;
}

public boolean mergedInto() {
return mergedInto;
}

public int majority() {
HashMap<String, Integer> counts = countObjectTypes(ownObjects);
String majorityType = mostFrequentType(counts);
return counts.get(majorityType);
}

private String typesOfFormalObjects(HashSet<FormalObject> set) {
if(set.size() > 0) {
HashMap<String, Integer> counts = new HashMap<String, Integer>();
String type = "";
for(FormalObject obj : set){
type = obj.getName();
if(!counts.containsKey(type))
counts.put(type, 1);
else
counts.put(type, counts.get(type)+1);
}
HashMap<String, Integer> counts = countObjectTypes(set);
if(counts.keySet().size() == 1)
return "100% " + type;
return "100% " + counts.keySet().toArray()[0];
else {
int highest = 0;
String highestAttr = "";
for(String attr : counts.keySet()) {
if(counts.get(attr) > highest) {
highest = counts.get(attr);
highestAttr = attr;
}
}
String highestAttr = mostFrequentType(counts);
int highest = counts.get(highestAttr);
return (100*highest)/set.size() + "% " + highestAttr;
}
}
else
return "empty";
}

private String mostFrequentType(HashMap<String, Integer> counts) {
int highest = 0;
String highestAttr = "";
for(String attr : counts.keySet()) {
if(counts.get(attr) > highest) {
highest = counts.get(attr);
highestAttr = attr;
}
}
return highestAttr;
}

private HashMap<String, Integer> countObjectTypes(HashSet<FormalObject> set) {
HashMap<String, Integer> counts = new HashMap<String, Integer>();
String type = "";
for(FormalObject obj : set){
type = obj.getName();
if(!counts.containsKey(type))
counts.put(type, 1);
else
counts.put(type, counts.get(type)+1);
}
return counts;
}
}
124 changes: 42 additions & 82 deletions src/driver/ContextCleanser.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,117 +26,54 @@ public ContextCleanser(FormalContext _context, Lattice _lattice){
//checks which attributes appear the least amount of times in the data
//and completely removes these attributes from the context
public void removeRareAttributes(int treshold) {
System.out.println("---BEGIN CLEANSING---");
HashMap<String, Integer> attributeSupport = context.getAttributeSupport();
HashSet<Integer> supportSet = new HashSet<Integer>();
supportSet.addAll(attributeSupport.values());
Integer[] supportArray = supportSet.toArray(new Integer[supportSet.size()]);
Arrays.sort(supportArray);
final int TRESHOLD = supportArray[treshold];
System.out.println("Deleting all attributes that occur at most " + TRESHOLD + " times.");
int deleted = 0;
System.out.print("Nr of attributes before: " + context.numberOfAttributes() + "\t");
System.out.println("Lattice stats before:\t" + lattice.latticeStats());
// System.out.println("Deleting all attributes that occur at most " + TRESHOLD + " times.");
// int deleted = 0;
// System.out.print("Nr of attributes before: " + context.numberOfAttributes() + "\n");
for(String attr : context.getDictionary().getContents()){
if(attributeSupport.get(attr) <= TRESHOLD){
deleted++;
// deleted++;
context.removeAttribute(attr);
}
}
System.out.print("Nr of attributes after: " + (context.numberOfAttributes()-deleted) + "\t");
}

//changes the intents of objects that are very close to other objects.
//after running this, the lattice has to be recomputed
public void mergeNodes(int factor, int attrDiff, int percent) {
System.out.println("---BEGIN CLEANSING---");
System.out.println("Merging all nodes with their biggest neighbours if they\n"
+ "\t- have at least " + factor + " times more own objects\n"
+ "\t- have at most " + attrDiff + " more/less attribute(s)\n"
+ "\t- make up at most " + percent + "% of all objects.");
System.out.println("Lattice stats before:\t" + lattice.latticeStats());
attributeDifference = attrDiff;
HashMap<Integer, ArrayList<LatticeNode>> latticeLevelNodes = lattice.nodesByLevel();
int[] levelArray = lattice.levelArray();
for(int i = 0; i < levelArray.length; i++) {
ArrayList<LatticeNode> thisLevelNodes = latticeLevelNodes.get(levelArray[i]);
for(LatticeNode node : thisLevelNodes) {
if(node.hasOwnObjects() && node.numberOfOwnObjects() < (percent*context.getObjects().size()/100)) {
//find out which lower node has the most own objects
LatticeNode mergeCandidate = findMergeCandidate(node.lowerNeighbours());
//if criteria fit, merge upper node into lower
if(isMergeCandidateFor(mergeCandidate, node)){
for(FormalObject obj : node.ownObjects()){
obj.setIntent((BitSet)mergeCandidate.getIntent().clone());
}
}
//if that didn't happen but there's an UPPER candidate, merge upward
else{
LatticeNode upperMergeCandidate = findMergeCandidate(node.upperNeighbours());
if(isMergeCandidateFor(upperMergeCandidate, node)){
for(FormalObject obj : node.ownObjects()){
obj.setIntent((BitSet)upperMergeCandidate.getIntent().clone());
//TODO: recompute upper and lower neighbours
}
}
}
}
}
}
}

private LatticeNode findMergeCandidate(HashSet<LatticeNode> neighbours) {
int mostOwnObjects = 0;
LatticeNode mergeCandidate = null;
for(LatticeNode lowerNode : neighbours){
if(lowerNode.numberOfOwnObjects() > mostOwnObjects){
mostOwnObjects = lowerNode.numberOfOwnObjects();
mergeCandidate = lowerNode;
}
}
return mergeCandidate;
}

private Boolean isMergeCandidateFor(LatticeNode mergeCandidate, LatticeNode smallNode) {
return (mergeCandidate != null
&& smallNode.numberOfOwnObjects()*10 <= mergeCandidate.numberOfOwnObjects()
&& mergeCandidate.getIntent().cardinality() <= (smallNode.getIntent().cardinality() + attributeDifference));
// System.out.println("Nr of attributes after: " + (context.numberOfAttributes()-deleted) + "\t");
}

public double tinker() {
// System.out.println("tinkering...");
HashMap<Integer, ArrayList<LatticeNode>> latticeLevelNodes = lattice.nodesByLevel();
int[] levelArray = lattice.levelArray();
double highScore = 0.0;
String highScoreMerge = "";
LatticeNode firstNode = null;
LatticeNode secondNode = null;
for(int i = 0; i < levelArray.length; i++) {
ArrayList<LatticeNode> thisLevelNodes = latticeLevelNodes.get(levelArray[i]);
for(LatticeNode node : thisLevelNodes) {
for(LatticeNode upper : node.upperNeighbours()) {
if(mergeScore(node, upper) > highScore) {
highScore = mergeScore(node, upper);
highScoreMerge = "merged " /*+ node.getIntent() + " -> " + upper.getIntent()*/ +
" (up, score = " + new DecimalFormat("#.##").format(mergeScore(node, upper)) + ")";
firstNode = node;
secondNode = upper;
//specify which nodes can be merged into
ArrayList<LatticeNode> mergeCandidates = new ArrayList<LatticeNode>();
mergeCandidates.addAll(node.upperNeighbours());
mergeCandidates.addAll(node.lowerNeighbours());
//nodes from same level with at least one shared parent
for(LatticeNode parent : node.upperNeighbours()){
for(LatticeNode child : parent.lowerNeighbours()){
if(child != node) mergeCandidates.add(child);
}
}
for(LatticeNode lower : node.lowerNeighbours()) {
if(mergeScore(node, lower) > highScore) {
highScore = mergeScore(node, lower);
highScoreMerge = "merged " /*+ node.getIntent() + " -> " + lower.getIntent()*/ +
" (down, score = " + new DecimalFormat("#.##").format(mergeScore(node, lower)) + ")";
for(LatticeNode candidate : mergeCandidates) {
if(mergeScore(node, candidate) > highScore) {
highScore = mergeScore(node, candidate);
firstNode = node;
secondNode = lower;
secondNode = candidate;
}
}
}
}
if(highScore > 0.0) {
mergeInto(firstNode, secondNode);
System.out.println(highScoreMerge);
}
return highScore;
}
Expand All @@ -146,14 +83,37 @@ private double mergeScore(LatticeNode node, LatticeNode candidate) {
return 0.0;
double ownObjectRatio = candidate.numberOfOwnObjects()/(double)node.numberOfOwnObjects();
double percentOfObjects = node.numberOfOwnObjects()/(double)context.getObjects().size()*100.0;
if(node.lowerNeighbours().contains(candidate))
return 2*(ownObjectRatio/percentOfObjects);
return ownObjectRatio/percentOfObjects;
}

private void mergeInto(LatticeNode firstNode, LatticeNode secondNode) {
BitSet mergedIntent = (BitSet)secondNode.getIntent().clone();
for(FormalObject obj : firstNode.ownObjects())
obj.setIntent(mergedIntent);
lattice.setLastMergedInto((BitSet)secondNode.getIntent().clone());
}

public void removeSingletonObjects() {
HashMap<Integer, ArrayList<FormalObject>> nodeArray = new HashMap<Integer, ArrayList<FormalObject>>();
int i = 0;
int k = context.getObjects().size();
//fill node array
for(FormalObject obj : context.getObjects()) {
if(nodeArray.containsKey(obj.getIntent().hashCode()))
nodeArray.get(obj.getIntent().hashCode()).add(obj);
else {
ArrayList<FormalObject> newArray = new ArrayList<FormalObject>();
newArray.add(obj);
nodeArray.put(obj.getIntent().hashCode(), newArray);
}
}
//delete singleton objects from context
for(int hash : nodeArray.keySet()) {
if(nodeArray.get(hash).size() == 1){
context.getObjects().remove(nodeArray.get(hash).get(0));
i++;
}
}
System.out.println("Removed " + i + "/" + k + " objects from context.");
}
}
Loading

0 comments on commit 2abb517

Please sign in to comment.