Skip to content

Commit

Permalink
fix: Fix hash-join lookup regression. (Fix #140)
Browse files Browse the repository at this point in the history
  • Loading branch information
jheer committed Apr 2, 2021
1 parent 01093ab commit d31c865
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 11 deletions.
4 changes: 2 additions & 2 deletions src/engine/join-filter.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { singleRowLookup } from './join/lookup';
import { rowLookup } from './join/lookup';
import BitSet from '../table/bit-set';
import isArray from '../util/is-array';

Expand All @@ -18,7 +18,7 @@ export default function(tableL, tableR, predicate, options = {}) {

function hashSemiJoin(filter, tableL, tableR, [keyL, keyR]) {
// build lookup table
const lut = singleRowLookup(tableR, keyR);
const lut = rowLookup(tableR, keyR);

// scan table, update filter with matches
tableL.scan((rowL, data) => {
Expand Down
4 changes: 2 additions & 2 deletions src/engine/join.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { multiRowLookup } from './join/lookup';
import { indexLookup } from './join/lookup';
import columnSet from '../table/column-set';
import concat from '../util/concat';
import isArray from '../util/is-array';
Expand Down Expand Up @@ -90,7 +90,7 @@ function hashJoin(emit, [keyL, keyR], dataL, dataR, idxL, idxR, hitL, hitR, nL,
}

// build lookup table
const lut = multiRowLookup(idxHash, dataHash, keyHash);
const lut = indexLookup(idxHash, dataHash, keyHash);

// scan other table
const m = idxScan.length;
Expand Down
8 changes: 4 additions & 4 deletions src/engine/join/lookup.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
export function singleRowLookup(table, hash) {
export function rowLookup(table, hash) {
const lut = new Map();
table.scan((row, data) => {
const key = hash(row, data);
Expand All @@ -9,16 +9,16 @@ export function singleRowLookup(table, hash) {
return lut;
}

export function multiRowLookup(idx, data, hash) {
export function indexLookup(idx, data, hash) {
const lut = new Map();
const n = idx.length;
for (let i = 0; i < n; ++i) {
const row = idx[i];
const key = hash(row, data);
if (key != null && key === key) {
lut.has(key)
? lut.get(key).push(row)
: lut.set(key, [row]);
? lut.get(key).push(i)
: lut.set(key, [i]);
}
}
return lut;
Expand Down
4 changes: 2 additions & 2 deletions src/engine/lookup.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { singleRowLookup } from './join/lookup';
import { rowLookup } from './join/lookup';
import { aggregateGet } from './reduce/util';
import columnSet from '../table/column-set';
import NULL from '../util/null';
Expand All @@ -12,7 +12,7 @@ export default function(tableL, tableR, [keyL, keyR], { names, exprs, ops }) {
names.forEach(name => cols.add(name, Array(total).fill(NULL)));

// build lookup table
const lut = singleRowLookup(tableR, keyR);
const lut = rowLookup(tableR, keyR);

// generate setter function for lookup match
const set = unroll(
Expand Down
20 changes: 19 additions & 1 deletion test/verbs/join-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,25 @@ tape('join handles filtered tables', t => {
key: [ 1, 2, 5 ],
value1: [ 1, 2, undefined ],
value2: [ 1, 2, 5 ]
}, 'natural left join on filtered data');
}, 'natural right join on filtered data');

const dt = table({
year: [2017, 2017, 2017, 2018, 2018, 2018],
month: ['01', '02', 'YR', '01', '02', 'YR'],
count: [6074, 7135, 220582, 5761, 6764, 222153]
});

const jt = dt
.filter(d => d.month === 'YR')
.select('year', {count: 'total'})
.join(dt.filter(d => d.month !== 'YR'));

tableEqual(t, jt, {
total: [ 220582, 220582, 222153, 222153 ],
year: [ 2017, 2017, 2018, 2018 ],
month: [ '01', '02', '01', '02' ],
count: [ 6074, 7135, 5761, 6764 ]
}, 'join of two filtered tables');

t.end();
});
Expand Down

0 comments on commit d31c865

Please sign in to comment.