Skip to content

Commit

Permalink
dupe code removed
Browse files Browse the repository at this point in the history
  • Loading branch information
vikasgupta78 committed Aug 12, 2024
1 parent a077c3b commit b81907c
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 11 deletions.
12 changes: 6 additions & 6 deletions common/core/src/main/java/zingg/common/core/block/Block.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,13 @@
import zingg.common.client.ZFrame;
import zingg.common.client.ZinggClientException;
import zingg.common.client.util.ListMap;
import zingg.common.core.feature.FeatureFactory;
import zingg.common.core.hash.HashFunction;

public abstract class Block<D,R,C,T> implements Serializable {

private static final long serialVersionUID = 1L;

public static final Log LOG = LogFactory.getLog(Block.class);

protected ZFrame<D,R,C> dupes;
Expand Down Expand Up @@ -117,24 +120,21 @@ public void estimateElimCount(Canopy<R> c, long elimCount) {
c.estimateElimCount();
}

public abstract T getDataTypeFromString(String t);

public Canopy<R>getBestNode(Tree<Canopy<R>> tree, Canopy<R>parent, Canopy<R>node,
List<FieldDefinition> fieldsOfInterest) throws Exception {
long least = Long.MAX_VALUE;
int maxElimination = 0;
Canopy<R>best = null;

for (FieldDefinition field : fieldsOfInterest) {
if (LOG.isDebugEnabled()){
LOG.debug("Trying for " + field + " with data type " + field.getDataType() + " and real dt "
+ getDataTypeFromString(field.getDataType()));
+ getFeatureFactory().getDataTypeFromString(field.getDataType()));
}
//Class type = FieldClass.getFieldClassClass(field.getFieldClass());
FieldDefinition context = field;
if (least ==0) break;//how much better can it get?
// applicable functions
List<HashFunction<D,R,C,T>> functions = functionsMap.get(getDataTypeFromString(field.getDataType()));
List<HashFunction<D,R,C,T>> functions = functionsMap.get(getFeatureFactory().getDataTypeFromString(field.getDataType()));
if (LOG.isDebugEnabled()){
LOG.debug("functions are " + functions);
}
Expand Down Expand Up @@ -404,7 +404,7 @@ public void printTree(Tree<Canopy<R>> tree,
}
}


public abstract FeatureFactory<T> getFeatureFactory();


}
Expand Down
11 changes: 6 additions & 5 deletions spark/core/src/main/java/zingg/spark/core/block/SparkBlock.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
import zingg.common.client.ZFrame;
import zingg.common.client.util.ListMap;
import zingg.common.core.block.Block;
import zingg.common.core.feature.FeatureFactory;
import zingg.common.core.hash.HashFunction;
import zingg.spark.core.feature.SparkFeatureFactory;

public class SparkBlock extends Block<Dataset<Row>, Row, Column, DataType> {

Expand All @@ -22,11 +24,10 @@ public SparkBlock(ZFrame<Dataset<Row>, Row, Column> training, ZFrame<Dataset<Row
ListMap<DataType, HashFunction<Dataset<Row>, Row, Column, DataType>> functionsMap, long maxSize) {
super(training, dupes, functionsMap, maxSize);
}



@Override
public DataType getDataTypeFromString(String t) {
return DataType.fromDDL(t);
}
public FeatureFactory<DataType> getFeatureFactory() {
return new SparkFeatureFactory();
}

}

0 comments on commit b81907c

Please sign in to comment.