Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dupe code removed issue #598 #882

Merged
merged 1 commit into from
Aug 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions common/core/src/main/java/zingg/common/core/block/Block.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,13 @@
import zingg.common.client.ZFrame;
import zingg.common.client.ZinggClientException;
import zingg.common.client.util.ListMap;
import zingg.common.core.feature.FeatureFactory;
import zingg.common.core.hash.HashFunction;

public abstract class Block<D,R,C,T> implements Serializable {

private static final long serialVersionUID = 1L;

public static final Log LOG = LogFactory.getLog(Block.class);

protected ZFrame<D,R,C> dupes;
Expand Down Expand Up @@ -117,24 +120,21 @@ public void estimateElimCount(Canopy<R> c, long elimCount) {
c.estimateElimCount();
}

public abstract T getDataTypeFromString(String t);

public Canopy<R>getBestNode(Tree<Canopy<R>> tree, Canopy<R>parent, Canopy<R>node,
List<FieldDefinition> fieldsOfInterest) throws Exception {
long least = Long.MAX_VALUE;
int maxElimination = 0;
Canopy<R>best = null;

for (FieldDefinition field : fieldsOfInterest) {
if (LOG.isDebugEnabled()){
LOG.debug("Trying for " + field + " with data type " + field.getDataType() + " and real dt "
+ getDataTypeFromString(field.getDataType()));
+ getFeatureFactory().getDataTypeFromString(field.getDataType()));
}
//Class type = FieldClass.getFieldClassClass(field.getFieldClass());
FieldDefinition context = field;
if (least ==0) break;//how much better can it get?
// applicable functions
List<HashFunction<D,R,C,T>> functions = functionsMap.get(getDataTypeFromString(field.getDataType()));
List<HashFunction<D,R,C,T>> functions = functionsMap.get(getFeatureFactory().getDataTypeFromString(field.getDataType()));
if (LOG.isDebugEnabled()){
LOG.debug("functions are " + functions);
}
Expand Down Expand Up @@ -404,7 +404,7 @@ public void printTree(Tree<Canopy<R>> tree,
}
}


public abstract FeatureFactory<T> getFeatureFactory();


}
Expand Down
11 changes: 6 additions & 5 deletions spark/core/src/main/java/zingg/spark/core/block/SparkBlock.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
import zingg.common.client.ZFrame;
import zingg.common.client.util.ListMap;
import zingg.common.core.block.Block;
import zingg.common.core.feature.FeatureFactory;
import zingg.common.core.hash.HashFunction;
import zingg.spark.core.feature.SparkFeatureFactory;

public class SparkBlock extends Block<Dataset<Row>, Row, Column, DataType> {

Expand All @@ -22,11 +24,10 @@ public SparkBlock(ZFrame<Dataset<Row>, Row, Column> training, ZFrame<Dataset<Row
ListMap<DataType, HashFunction<Dataset<Row>, Row, Column, DataType>> functionsMap, long maxSize) {
super(training, dupes, functionsMap, maxSize);
}



@Override
public DataType getDataTypeFromString(String t) {
return DataType.fromDDL(t);
}
public FeatureFactory<DataType> getFeatureFactory() {
return new SparkFeatureFactory();
}

}
Loading