Skip to content

Commit

Permalink
Merge pull request #875 from vikasgupta78/relate
Browse files Browse the repository at this point in the history
merge main OSS into relate OSS
  • Loading branch information
sonalgoyal authored Jul 31, 2024
2 parents 42c8bff + ddc56d4 commit 7bffa94
Show file tree
Hide file tree
Showing 29 changed files with 253 additions and 70 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import com.fasterxml.jackson.annotation.JsonSetter;

import zingg.common.client.pipe.Pipe;
import zingg.common.client.util.JsonStringify;


/**
Expand Down Expand Up @@ -270,6 +271,10 @@ public void checkNullBlankEmpty(Pipe[] field, String fieldName) throws ZinggClie
}
}

@Override
public String toString() {
return JsonStringify.toString(this);
}



Expand Down
4 changes: 4 additions & 0 deletions common/client/src/main/java/zingg/common/client/Samples.java
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@




package zingg.common.client;

import java.io.Serializable;
Expand Down
3 changes: 3 additions & 0 deletions common/client/src/main/java/zingg/common/client/ZFrame.java
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ public interface ZFrame<D, R, C> {

public ZFrame<D, R, C> repartition(int num);
public ZFrame<D, R, C> repartition(int num, C c);
public ZFrame<D, R, C> repartition(int num,scala.collection.Seq<C> partitionExprs);
public ZFrame<D, R, C> repartition(scala.collection.Seq<C> partitionExprs);


public ZFrame<D, R, C> sample(boolean repartition, float num);
public ZFrame<D, R, C> sample(boolean repartition, double num);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import java.util.List;

import zingg.common.client.ZinggClientException;
import zingg.common.client.event.events.IEvent;
import zingg.common.client.util.ListMap;

Expand All @@ -22,11 +21,11 @@ public void addListener(Class<? extends IEvent> eventClass, IEventListener liste
eventListenersList.add(eventClass.getCanonicalName(), listener);
}

public void fireEvent(IEvent event) throws ZinggClientException {
public void fireEvent(IEvent event) {
listen(event);
}

private void listen(IEvent event) throws ZinggClientException {
private void listen(IEvent event) {
Class<? extends IEvent> eventClass = event.getClass();
List<IEventListener> listenerList = eventListenersList.get(eventClass.getCanonicalName());
if (listenerList != null) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
package zingg.common.client.event.listeners;

import zingg.common.client.ZinggClientException;
import zingg.common.client.event.events.IEvent;

public class IEventListener {

public void listen(IEvent event) throws ZinggClientException {
public void listen(IEvent event) {

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package zingg.common.client.util;

import java.io.IOException;
import java.io.StringWriter;

import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.databind.ObjectMapper;

import zingg.common.client.Arguments;
import zingg.common.client.ArgumentsUtil;

public class JsonStringify {
public static String toString (Object o){
ObjectMapper mapper = new ObjectMapper();
mapper.configure(JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS, true);
//mapper.configure(JsonParser.Feature.FAIL_ON_EMPTY_BEANS, true)
try {
StringWriter writer = new StringWriter();
return mapper.writeValueAsString(o);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
return null;
}
}

}
33 changes: 29 additions & 4 deletions common/client/src/test/java/zingg/common/client/TestArguments.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public class TestArguments {
private static final String KEY_MODEL_ID = "modelId";

public static final Log LOG = LogFactory.getLog(TestArguments.class);
protected ArgumentsUtil argsUtil = new ArgumentsUtil<Arguments>(Arguments.class);
protected ArgumentsUtil<Arguments> argsUtil = new ArgumentsUtil<Arguments>(Arguments.class);

@Test
public void testSubstituteVariablesWithAllEnvVarSet() {
Expand Down Expand Up @@ -244,8 +244,33 @@ public void testMatchTypeWrong() {


}




@Test
public void testJsonStringify(){
IArguments argsFromJsonFile;
try{
//Converting to JSON using toString()
argsFromJsonFile = argsUtil.createArgumentsFromJSON(getClass().getResource("../../../testArguments/configWithMultipleMatchTypesUnsupported.json").getFile(), "test");
String strFromJsonFile = argsFromJsonFile.toString();

IArguments argsFullCycle = argsUtil.createArgumentsFromJSONString(strFromJsonFile, "");

assertEquals(argsFullCycle.getFieldDefinition().get(0), argsFromJsonFile.getFieldDefinition().get(0));
assertEquals(argsFullCycle.getFieldDefinition().get(2), argsFromJsonFile.getFieldDefinition().get(2));
assertEquals(argsFullCycle.getModelId(), argsFromJsonFile.getModelId());
// assertEquals(argsFullCycle.getZinggModelDir(), argsFromJsonFile.getZinggModelDir());
assertEquals(argsFullCycle.getNumPartitions(), argsFromJsonFile.getNumPartitions());
assertEquals(argsFullCycle.getLabelDataSampleSize() ,argsFromJsonFile.getLabelDataSampleSize());
assertEquals(argsFullCycle.getTrainingSamples(),argsFromJsonFile.getTrainingSamples());
assertEquals(argsFullCycle.getOutput(),argsFromJsonFile.getOutput());
assertEquals(argsFullCycle.getData(),argsFromJsonFile.getData());
assertEquals(argsFullCycle.getZinggDir(),argsFromJsonFile.getZinggDir());
assertEquals(argsFullCycle.getJobId(),argsFromJsonFile.getJobId());

} catch (Exception | ZinggClientException e) {
e.printStackTrace();
}

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,10 @@
import zingg.common.client.ZFrame;
import zingg.common.client.ZinggClientException;
import zingg.common.client.options.ZinggOptions;
import zingg.common.client.util.ColName;
import zingg.common.core.filter.PredictionFilter;
import zingg.common.core.match.output.IMatchOutputBuilder;
import zingg.common.core.match.output.LinkOutputBuilder;
import zingg.common.core.pairs.IPairBuilder;
import zingg.common.core.pairs.SelfPairBuilder;
import zingg.common.core.pairs.SelfPairBuilderSourceSensitive;


Expand All @@ -31,6 +29,12 @@ public ZFrame<D,R,C> selectColsFromBlocked(ZFrame<D,R,C> blocked) {
return blocked;
}

@Override
protected ZFrame<D,R,C> getActualDupes(ZFrame<D,R,C> blocked, ZFrame<D,R,C> testData) throws Exception, ZinggClientException{
PredictionFilter<D, R, C> predictionFilter = new PredictionFilter<D, R, C>();
return getActualDupes(blocked, testData,predictionFilter, getIPairBuilder(), null);
}

@Override
public IMatchOutputBuilder<S,D,R,C> getMatchOutputBuilder(){
if (this.matchOutputBuilder == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ public ZFrame<D,R,C> getBlocked( ZFrame<D,R,C> testData) throws Exception, Zin
//LOG.debug("Blocking model file location is " + getModelHelper().getBlockFile(args));
Tree<Canopy<R>> tree = getBlockingTreeUtil().readBlockingTree(args, getModelHelper());
ZFrame<D,R,C> blocked = getBlockingTreeUtil().getBlockHashes(testData, tree);
ZFrame<D,R,C> blocked1 = blocked.repartition(args.getNumPartitions(), blocked.col(ColName.HASH_COL)); //.cache();
ZFrame<D,R,C> blocked1 = blocked.repartition(args.getNumPartitions(), blocked.col(ColName.HASH_COL)).cache();
return blocked1;
}

Expand All @@ -127,10 +127,6 @@ public void setIPairbuilder(IPairBuilder<S, D, R, C> p){
this.iPairBuilder = p;
}

public ZFrame<D,R,C> getPairs(ZFrame<D,R,C>blocked, ZFrame<D,R,C>bAll) throws Exception{
return getPairs(blocked, bAll, getIPairBuilder());
}

public ZFrame<D,R,C> getPairs(ZFrame<D,R,C>blocked, ZFrame<D,R,C>bAll, IPairBuilder<S, D, R, C> iPairBuilder) throws Exception{
return iPairBuilder.getPairs(blocked, bAll);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public ZFrame<D, R, C> getPairs(ZFrame<D,R,C>blocked, ZFrame<D,R,C>bAll) throws
*/
//joinH.show();
joinH = joinH.filter(joinH.gt(ColName.ID_COL));
LOG.warn("Num comparisons " + joinH.count());
if (LOG.isDebugEnabled()) LOG.debug("Num comparisons " + joinH.count());
joinH = joinH.repartition(args.getNumPartitions(), joinH.col(ColName.ID_COL));
bAll = bAll.repartition(args.getNumPartitions(), bAll.col(ColName.ID_COL));
joinH = joinH.joinOnCol(bAll, ColName.ID_COL);
Expand Down
28 changes: 28 additions & 0 deletions common/core/src/test/java/zingg/common/core/util/CsvReader.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package zingg.common.core.util;

import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;

public class CsvReader {
protected List<? extends IFromCsv> records;
IFromCsv creator;

public CsvReader(IFromCsv creator){
records = new ArrayList<IFromCsv>();
this.creator = creator;
}

public List<? extends IFromCsv> getRecords(String file, boolean skipHeader) throws FileNotFoundException{
int lineno = 0;
try (Scanner scanner = new Scanner(new File(file))) {
while (scanner.hasNextLine()) {
records.add(creator.fromCsv(scanner.nextLine()));
}
}
return records;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package zingg.common.core.util;

public interface IFromCsv {

<C> C fromCsv(String s);

}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package zingg.common.infra.util;
package zingg.common.core.util;

import java.lang.reflect.*;
import java.security.NoSuchAlgorithmException;
Expand Down
Loading

0 comments on commit 7bffa94

Please sign in to comment.