diff --git a/src/engine/join-filter.js b/src/engine/join-filter.js index 99475666..e71ca35e 100644 --- a/src/engine/join-filter.js +++ b/src/engine/join-filter.js @@ -1,4 +1,4 @@ -import { singleRowLookup } from './join/lookup'; +import { rowLookup } from './join/lookup'; import BitSet from '../table/bit-set'; import isArray from '../util/is-array'; @@ -18,7 +18,7 @@ export default function(tableL, tableR, predicate, options = {}) { function hashSemiJoin(filter, tableL, tableR, [keyL, keyR]) { // build lookup table - const lut = singleRowLookup(tableR, keyR); + const lut = rowLookup(tableR, keyR); // scan table, update filter with matches tableL.scan((rowL, data) => { diff --git a/src/engine/join.js b/src/engine/join.js index ea68d274..e242402f 100644 --- a/src/engine/join.js +++ b/src/engine/join.js @@ -1,4 +1,4 @@ -import { multiRowLookup } from './join/lookup'; +import { indexLookup } from './join/lookup'; import columnSet from '../table/column-set'; import concat from '../util/concat'; import isArray from '../util/is-array'; @@ -90,7 +90,7 @@ function hashJoin(emit, [keyL, keyR], dataL, dataR, idxL, idxR, hitL, hitR, nL, } // build lookup table - const lut = multiRowLookup(idxHash, dataHash, keyHash); + const lut = indexLookup(idxHash, dataHash, keyHash); // scan other table const m = idxScan.length; diff --git a/src/engine/join/lookup.js b/src/engine/join/lookup.js index 0dbbada8..745a4be0 100644 --- a/src/engine/join/lookup.js +++ b/src/engine/join/lookup.js @@ -1,4 +1,4 @@ -export function singleRowLookup(table, hash) { +export function rowLookup(table, hash) { const lut = new Map(); table.scan((row, data) => { const key = hash(row, data); @@ -9,7 +9,7 @@ export function singleRowLookup(table, hash) { return lut; } -export function multiRowLookup(idx, data, hash) { +export function indexLookup(idx, data, hash) { const lut = new Map(); const n = idx.length; for (let i = 0; i < n; ++i) { @@ -17,8 +17,8 @@ export function multiRowLookup(idx, data, hash) { const key = hash(row, data); if (key != null && key === key) { lut.has(key) - ? lut.get(key).push(row) - : lut.set(key, [row]); + ? lut.get(key).push(i) + : lut.set(key, [i]); } } return lut; diff --git a/src/engine/lookup.js b/src/engine/lookup.js index 519baf1e..ee435580 100644 --- a/src/engine/lookup.js +++ b/src/engine/lookup.js @@ -1,4 +1,4 @@ -import { singleRowLookup } from './join/lookup'; +import { rowLookup } from './join/lookup'; import { aggregateGet } from './reduce/util'; import columnSet from '../table/column-set'; import NULL from '../util/null'; @@ -12,7 +12,7 @@ export default function(tableL, tableR, [keyL, keyR], { names, exprs, ops }) { names.forEach(name => cols.add(name, Array(total).fill(NULL))); // build lookup table - const lut = singleRowLookup(tableR, keyR); + const lut = rowLookup(tableR, keyR); // generate setter function for lookup match const set = unroll( diff --git a/test/verbs/join-test.js b/test/verbs/join-test.js index ad12a88e..c8d7109b 100644 --- a/test/verbs/join-test.js +++ b/test/verbs/join-test.js @@ -124,7 +124,25 @@ tape('join handles filtered tables', t => { key: [ 1, 2, 5 ], value1: [ 1, 2, undefined ], value2: [ 1, 2, 5 ] - }, 'natural left join on filtered data'); + }, 'natural right join on filtered data'); + + const dt = table({ + year: [2017, 2017, 2017, 2018, 2018, 2018], + month: ['01', '02', 'YR', '01', '02', 'YR'], + count: [6074, 7135, 220582, 5761, 6764, 222153] + }); + + const jt = dt + .filter(d => d.month === 'YR') + .select('year', {count: 'total'}) + .join(dt.filter(d => d.month !== 'YR')); + + tableEqual(t, jt, { + total: [ 220582, 220582, 222153, 222153 ], + year: [ 2017, 2017, 2018, 2018 ], + month: [ '01', '02', '01', '02' ], + count: [ 6074, 7135, 5761, 6764 ] + }, 'join of two filtered tables'); t.end(); });