Skip to content

Commit

Permalink
Merge pull request #2342 from IDEMSInternational/fix/data-pipe-merge
Browse files Browse the repository at this point in the history
fix: data pipe merge operator handles case of no input source
  • Loading branch information
chrismclarke authored Jun 25, 2024
2 parents caa16b0 + 6ca0459 commit 3171d7a
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 16 deletions.
8 changes: 4 additions & 4 deletions packages/shared/src/models/dataPipe/operators/filter.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ describe("Filter", () => {
expect(outputIDs).toEqual(["id_1", "id_3", "id_4"]);
});
it("Filters with 'this' context", () => {
const nestedData = testData.names.map((entry) => {
entry["nested"] = { first_name: entry.first_name };
return entry;
});
const nestedData = testData.names.map((entry) => ({
...entry,
nested: { first_name: entry.first_name },
}));
const testDf = new DataFrame(nestedData);
const output = new filter(testDf, ["this.nested.first_name === 'Ada'"]).apply();
const outputIDs = output.column("first_name").values;
Expand Down
26 changes: 22 additions & 4 deletions packages/shared/src/models/dataPipe/operators/merge.spec.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import { DataFrame } from "danfojs";
import { DataPipe } from "../pipe";
import testData from "../testData.spec";
import { DataPipe } from "../pipe";
import merge from "./merge";

(testData as any).merge_nationality = [
const nationality_data = [
{
id: "invalid_id",
nationality: "German",
Expand All @@ -19,9 +19,18 @@ import merge from "./merge";
},
];

const nested_data = [
{
id: "id_1",
nested: {
value: "test",
},
},
];

describe("Merge Operator", () => {
const testDf = new DataFrame(testData.names);
const testPipe: DataPipe = new DataPipe([], testData);
const testPipe = new DataPipe([], { ...testData, nationality_data, nested_data });

it("Throws on missing list", () => {
// throws on missing list
Expand All @@ -31,7 +40,7 @@ describe("Merge Operator", () => {
});
it("Merges multiple lists", () => {
// merges data - additional nationality column appended for all entries and populated for available
const output = new merge(testDf, ["merge_nationality"], testPipe).apply();
const output = new merge(testDf, ["nationality_data"], testPipe).apply();
expect(output.index).toEqual(["id_1", "id_2", "id_3", "id_4"]);
// merges new nationality column
const expectedNationalities = ["British", "French", undefined, undefined];
Expand All @@ -40,4 +49,13 @@ describe("Merge Operator", () => {
const expectedNames = ["override", "Blaise", "Charles", "Daniel"];
expect(output.column("first_name").values).toEqual(expectedNames);
});
it("Merges multiple lists including list with nested data", () => {
// merges data - additional nationality column appended for all entries and populated for available
const output = new merge(testDf, ["nested_data"], testPipe).apply();
expect(output.index).toEqual(["id_1", "id_2", "id_3", "id_4"]);
// merges nested column
// TODO - danfo stringifies nested data, should it be handled differently?
const expectedNested = ['{"value":"test"}', "undefined", "undefined", "undefined"];
expect(output.column("nested").values).toEqual(expectedNested as any);
});
});
18 changes: 10 additions & 8 deletions packages/shared/src/models/dataPipe/operators/merge.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ class MergeOperator extends BaseOperator {
}

apply() {
if (this.df.shape[0] === 0)
console.error("Merge: No data in base dataframe - an input_source must be provided");
setIndexColumn(this.df, this.indexColumn);
for (const dataList of this.args_list) {
this.df = this.replaceUpdatedValues(dataList);
Expand Down Expand Up @@ -49,20 +51,20 @@ class MergeOperator extends BaseOperator {

/** Replace any values updated from the data in the original dataframe **/
private replaceUpdatedValues(data: any[]) {
const replacments = new DataFrame(data);
setIndexColumn(replacments, this.indexColumn);
const replacements = new DataFrame(data);
setIndexColumn(replacements, this.indexColumn);

// remove any columns that does not exist in left
const droppedColumns = replacments.columns.filter(
const droppedColumns = replacements.columns.filter(
(column) => column !== this.indexColumn && !this.df.columns.includes(column)
);
replacments.drop({ columns: droppedColumns, inplace: true });
replacements.drop({ columns: droppedColumns, inplace: true });
// remove any rows that does not exist in left
const droppedIndexes = replacments.index.filter((i) => !this.df.index.includes(i));
replacments.drop({ index: droppedIndexes, inplace: true });
const droppedIndexes = replacements.index.filter((i) => !this.df.index.includes(i));
replacements.drop({ index: droppedIndexes, inplace: true });

// replace all values in left with values from replacments where defined
const replaceHashmap = arrayToHashmap(toJSON(replacments) as any, this.indexColumn);
// replace all values in left with values from replacements where defined
const replaceHashmap = arrayToHashmap(toJSON(replacements) as any, this.indexColumn);

// handle replacement by looping over all rows and replacing values where override defined
const replaceDf = this.df.apply((row: any[]) => {
Expand Down

0 comments on commit 3171d7a

Please sign in to comment.