Skip to content

Commit 54c152c

Browse files
committed
Modify DeduplicatedLoadSubset.loadSubset such that it only loads the missing data
1 parent 0d1f275 commit 54c152c

File tree

2 files changed

+257
-7
lines changed

2 files changed

+257
-7
lines changed

packages/db/src/query/subset-dedupe.ts

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import {
22
isPredicateSubset,
33
isWhereSubset,
4+
minusWherePredicates,
45
unionWherePredicates,
56
} from "./predicate-utils.js"
67
import type { BasicExpression } from "./ir.js"
@@ -105,19 +106,31 @@ export class DeduplicatedLoadSubset {
105106
return matchingInflight.promise
106107
}
107108

108-
// Not covered by existing data - call underlying loadSubset
109-
const resultPromise = this._loadSubset(options)
109+
// Not fully covered by existing data
110+
// Compute the subset of data that is not covered by the existing data
111+
// such that we only have to load that subset of missing data
112+
const clonedOptions = cloneOptions(options)
113+
if (this.unlimitedWhere !== undefined && options.limit === undefined) {
114+
// Compute difference to get only the missing data
115+
// We can only do this for unlimited queries
116+
// and we can only remove data that was loaded from unlimited queries
117+
// because with limited queries we have no way to express that we already loaded part of the matching data
118+
clonedOptions.where =
119+
minusWherePredicates(clonedOptions.where, this.unlimitedWhere) ??
120+
clonedOptions.where
121+
}
122+
123+
// Call underlying loadSubset to load the missing data
124+
const resultPromise = this._loadSubset(clonedOptions)
110125

111126
// Handle both sync (true) and async (Promise<void>) return values
112127
if (resultPromise === true) {
113128
// Sync return - update tracking synchronously
114129
// Clone options before storing to protect against caller mutation
115-
this.updateTracking(cloneOptions(options))
130+
this.updateTracking(clonedOptions)
116131
return true
117132
} else {
118133
// Async return - track the promise and update tracking after it resolves
119-
// Clone options BEFORE entering async context to prevent mutation issues
120-
const clonedOptions = cloneOptions(options)
121134

122135
// Capture the current generation - this lets us detect if reset() was called
123136
// while this request was in-flight, so we can skip updating tracking state
@@ -205,7 +218,7 @@ export class DeduplicatedLoadSubset {
205218
* properties like limit or where between calls. Without cloning, our stored history
206219
* would reflect the mutated values rather than what was actually loaded.
207220
*/
208-
function cloneOptions(options: LoadSubsetOptions): LoadSubsetOptions {
221+
export function cloneOptions(options: LoadSubsetOptions): LoadSubsetOptions {
209222
return {
210223
where: options.where,
211224
orderBy: options.orderBy,

packages/db/tests/subset-dedupe.test.ts

Lines changed: 238 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
import { describe, expect, it } from "vitest"
2-
import { DeduplicatedLoadSubset } from "../src/query/subset-dedupe"
2+
import {
3+
DeduplicatedLoadSubset,
4+
cloneOptions,
5+
} from "../src/query/subset-dedupe"
36
import { Func, PropRef, Value } from "../src/query/ir"
7+
import { minusWherePredicates } from "../src/query/predicate-utils"
48
import type { BasicExpression, OrderBy } from "../src/query/ir"
59
import type { LoadSubsetOptions } from "../src/types"
610

@@ -25,6 +29,22 @@ function eq(left: BasicExpression<any>, right: BasicExpression<any>): Func {
2529
return new Func(`eq`, [left, right])
2630
}
2731

32+
function and(...expressions: Array<BasicExpression<boolean>>): Func {
33+
return new Func(`and`, expressions)
34+
}
35+
36+
function inOp(left: BasicExpression<any>, values: Array<any>): Func {
37+
return new Func(`in`, [left, new Value(values)])
38+
}
39+
40+
function lte(left: BasicExpression<any>, right: BasicExpression<any>): Func {
41+
return new Func(`lte`, [left, right])
42+
}
43+
44+
function not(expression: BasicExpression<boolean>): Func {
45+
return new Func(`not`, [expression])
46+
}
47+
2848
describe(`createDeduplicatedLoadSubset`, () => {
2949
it(`should call underlying loadSubset on first call`, async () => {
3050
let callCount = 0
@@ -322,4 +342,221 @@ describe(`createDeduplicatedLoadSubset`, () => {
322342
expect(calls[0]).toEqual({ where: eq(ref(`status`), val(`active`)) })
323343
expect(calls[1]).toEqual({ where: eq(ref(`status`), val(`inactive`)) })
324344
})
345+
346+
describe(`subset deduplication with minusWherePredicates`, () => {
347+
it(`should request only the difference for range predicates`, async () => {
348+
let callCount = 0
349+
const calls: Array<LoadSubsetOptions> = []
350+
const mockLoadSubset = (options: LoadSubsetOptions) => {
351+
callCount++
352+
calls.push(cloneOptions(options))
353+
return Promise.resolve()
354+
}
355+
356+
const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset)
357+
358+
// First call: age > 20 (loads data for age > 20)
359+
await deduplicated.loadSubset({ where: gt(ref(`age`), val(20)) })
360+
expect(callCount).toBe(1)
361+
expect(calls[0]).toEqual({ where: gt(ref(`age`), val(20)) })
362+
363+
// Second call: age > 10 (should request only age > 10 AND age <= 20)
364+
await deduplicated.loadSubset({ where: gt(ref(`age`), val(10)) })
365+
expect(callCount).toBe(2)
366+
expect(calls[1]).toEqual({
367+
where: and(gt(ref(`age`), val(10)), lte(ref(`age`), val(20))),
368+
})
369+
})
370+
371+
it(`should request only the difference for set predicates`, async () => {
372+
let callCount = 0
373+
const calls: Array<LoadSubsetOptions> = []
374+
const mockLoadSubset = (options: LoadSubsetOptions) => {
375+
callCount++
376+
calls.push(cloneOptions(options))
377+
return Promise.resolve()
378+
}
379+
380+
const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset)
381+
382+
// First call: status IN ['B', 'C'] (loads data for B and C)
383+
await deduplicated.loadSubset({
384+
where: inOp(ref(`status`), [`B`, `C`]),
385+
})
386+
expect(callCount).toBe(1)
387+
expect(calls[0]).toEqual({ where: inOp(ref(`status`), [`B`, `C`]) })
388+
389+
// Second call: status IN ['A', 'B', 'C', 'D'] (should request only A and D)
390+
await deduplicated.loadSubset({
391+
where: inOp(ref(`status`), [`A`, `B`, `C`, `D`]),
392+
})
393+
expect(callCount).toBe(2)
394+
expect(calls[1]).toEqual({
395+
where: inOp(ref(`status`), [`A`, `D`]),
396+
})
397+
})
398+
399+
it(`should return true immediately for complete overlap`, async () => {
400+
let callCount = 0
401+
const calls: Array<LoadSubsetOptions> = []
402+
const mockLoadSubset = (options: LoadSubsetOptions) => {
403+
callCount++
404+
calls.push(cloneOptions(options))
405+
return Promise.resolve()
406+
}
407+
408+
const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset)
409+
410+
// First call: age > 10 (loads data for age > 10)
411+
await deduplicated.loadSubset({ where: gt(ref(`age`), val(10)) })
412+
expect(callCount).toBe(1)
413+
414+
// Second call: age > 20 (completely covered by first call)
415+
const result = await deduplicated.loadSubset({
416+
where: gt(ref(`age`), val(20)),
417+
})
418+
expect(result).toBe(true)
419+
expect(callCount).toBe(1) // Should not make additional call
420+
})
421+
422+
it(`should handle complex predicate differences`, async () => {
423+
let callCount = 0
424+
const calls: Array<LoadSubsetOptions> = []
425+
const mockLoadSubset = (options: LoadSubsetOptions) => {
426+
callCount++
427+
calls.push(cloneOptions(options))
428+
return Promise.resolve()
429+
}
430+
431+
const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset)
432+
433+
// First call: age > 20 AND status = 'active'
434+
const firstPredicate = and(
435+
gt(ref(`age`), val(20)),
436+
eq(ref(`status`), val(`active`))
437+
)
438+
await deduplicated.loadSubset({ where: firstPredicate })
439+
expect(callCount).toBe(1)
440+
expect(calls[0]).toEqual({ where: firstPredicate })
441+
442+
// Second call: age > 10 AND status = 'active' (should request only age > 10 AND age <= 20 AND status = 'active')
443+
const secondPredicate = and(
444+
gt(ref(`age`), val(10)),
445+
eq(ref(`status`), val(`active`))
446+
)
447+
448+
const test = minusWherePredicates(secondPredicate, firstPredicate)
449+
console.log(`test`, test)
450+
451+
await deduplicated.loadSubset({ where: secondPredicate })
452+
expect(callCount).toBe(2)
453+
expect(calls[1]).toEqual({
454+
where: and(
455+
eq(ref(`status`), val(`active`)),
456+
gt(ref(`age`), val(10)),
457+
lte(ref(`age`), val(20))
458+
),
459+
})
460+
})
461+
462+
it(`should not apply subset logic to limited calls`, async () => {
463+
let callCount = 0
464+
const calls: Array<LoadSubsetOptions> = []
465+
const mockLoadSubset = (options: LoadSubsetOptions) => {
466+
callCount++
467+
calls.push(cloneOptions(options))
468+
return Promise.resolve()
469+
}
470+
471+
const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset)
472+
473+
const orderBy1: OrderBy = [
474+
{
475+
expression: ref(`age`),
476+
compareOptions: {
477+
direction: `asc`,
478+
nulls: `last`,
479+
stringSort: `lexical`,
480+
},
481+
},
482+
]
483+
484+
// First call: unlimited age > 20
485+
await deduplicated.loadSubset({ where: gt(ref(`age`), val(20)) })
486+
expect(callCount).toBe(1)
487+
488+
// Second call: limited age > 10 with orderBy + limit
489+
// Should request the full predicate, not the difference, because it's limited
490+
await deduplicated.loadSubset({
491+
where: gt(ref(`age`), val(10)),
492+
orderBy: orderBy1,
493+
limit: 10,
494+
})
495+
expect(callCount).toBe(2)
496+
expect(calls[1]).toEqual({
497+
where: gt(ref(`age`), val(10)),
498+
orderBy: orderBy1,
499+
limit: 10,
500+
})
501+
})
502+
503+
it(`should handle undefined where clauses in subset logic`, async () => {
504+
let callCount = 0
505+
const calls: Array<LoadSubsetOptions> = []
506+
const mockLoadSubset = (options: LoadSubsetOptions) => {
507+
callCount++
508+
calls.push(cloneOptions(options))
509+
return Promise.resolve()
510+
}
511+
512+
const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset)
513+
514+
// First call: age > 20
515+
await deduplicated.loadSubset({ where: gt(ref(`age`), val(20)) })
516+
expect(callCount).toBe(1)
517+
518+
// Second call: no where clause (all data)
519+
// Should request all data except what we already loaded
520+
// i.e. should request NOT (age > 20)
521+
await deduplicated.loadSubset({})
522+
expect(callCount).toBe(2)
523+
expect(calls[1]).toEqual({ where: not(gt(ref(`age`), val(20))) }) // Should request all data except what we already loaded
524+
})
525+
526+
it(`should handle multiple overlapping unlimited calls`, async () => {
527+
let callCount = 0
528+
const calls: Array<LoadSubsetOptions> = []
529+
const mockLoadSubset = (options: LoadSubsetOptions) => {
530+
callCount++
531+
calls.push(cloneOptions(options))
532+
return Promise.resolve()
533+
}
534+
535+
const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset)
536+
537+
// First call: age > 20
538+
await deduplicated.loadSubset({ where: gt(ref(`age`), val(20)) })
539+
expect(callCount).toBe(1)
540+
541+
// Second call: age < 10 (different range)
542+
await deduplicated.loadSubset({ where: lt(ref(`age`), val(10)) })
543+
expect(callCount).toBe(2)
544+
545+
// Third call: age > 5 (should request only age >= 10 AND age <= 20, since age < 10 is already covered)
546+
await deduplicated.loadSubset({ where: gt(ref(`age`), val(5)) })
547+
expect(callCount).toBe(3)
548+
549+
// Ideally it would be smart enough to optimize it to request only age >= 10 AND age <= 20, since age < 10 is already covered
550+
// However, it doesn't do that currently, so it will not optimize and execute the original query
551+
expect(calls[2]).toEqual({
552+
where: gt(ref(`age`), val(5)),
553+
})
554+
555+
/*
556+
expect(calls[2]).toEqual({
557+
where: and(gte(ref(`age`), val(10)), lte(ref(`age`), val(20))),
558+
})
559+
*/
560+
})
561+
})
325562
})

0 commit comments

Comments
 (0)