Skip to content

Commit

Permalink
Merge pull request #868 from concord-consortium/185745420-same-lvl-gr…
Browse files Browse the repository at this point in the history
…oups

feat: implement handling of same-level grouping in aggregate formulas [PT-185745420]
  • Loading branch information
pjanik authored Sep 4, 2023
2 parents 005ef28 + e2e37d3 commit 5d34434
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 44 deletions.
56 changes: 37 additions & 19 deletions v3/src/models/data/formula-manager.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import { makeObservable, observable, reaction } from "mobx"
import { FormulaMathJsScope } from "./formula-mathjs-scope"
import { CaseGroup, ICase } from "./data-set-types"
import { CaseGroup, ICase, IGroupedCase, symParent } from "./data-set-types"
import { onAnyAction } from "../../utilities/mst-utils"
import { getFormulaDependencies } from "./formula-utils"
import {
DisplayNameMap, IFormulaDependency, GLOBAL_VALUE, LOCAL_ATTR, ILocalAttributeDependency, IGlobalValueDependency,
ILookupDependency
ILookupDependency, NO_PARENT_KEY
} from "./formula-types"
import { math } from "./formula-fn-registry"
import { IDataSet } from "./data-set"
Expand Down Expand Up @@ -64,18 +64,41 @@ export class FormulaManager {
const { formula, attributeId, dataSet } = this.getFormulaContext(formulaId)

const collectionId = dataSet.getCollectionForAttribute(attributeId)?.id
const collectionGroup = dataSet.getCollectionGroupForAttributes([attributeId])
let sameLevelCaseIds: string[] = []
const caseIdToCaseGroup: Record<string, CaseGroup> = {}
if (collectionGroup) {
collectionGroup.groups.forEach((group: CaseGroup) => {
sameLevelCaseIds.push(group.pseudoCase.__id__)
caseIdToCaseGroup[group.pseudoCase.__id__] = group
})
} else {
sameLevelCaseIds = dataSet.childCases().map(c => c.__id__)
const collectionIndex = dataSet.getCollectionIndex(collectionId || "")
const caseGroupId: Record<string, string> = {}

const processCase = (c: IGroupedCase) => {
const parentId = c[symParent] || NO_PARENT_KEY
caseGroupId[c.__id__] = caseGroupId[parentId] || parentId
}

const calculateChildCollectionGroups = () => {
if (collectionIndex !== -1) {
for (let i = collectionIndex + 1; i < dataSet.collections.length; i++) {
const collectionGroup = dataSet.collectionGroups[i]
collectionGroup.groups.forEach((group: CaseGroup) => processCase(group.pseudoCase))
}
}
// Note that the child cases are never in any collection and they require separate processing.
dataSet.childCases().forEach(childCase => processCase(childCase))
}

const calculateSameLevelGroups = () => {
if (collectionIndex !== -1) {
dataSet.collectionGroups[collectionIndex].groups.forEach((group: CaseGroup) =>
processCase(group.pseudoCase)
)
}
}

// Note that order of execution of these functions is critical. First, we need to calculate child collection groups,
// as child collection cases are grouped using the pseudo cases from the collection where the formula attribute is.
// Next, we can calculate grouping for the formula attribute collection (same-level grouping). These will be parents
// of the formula attribute collection cases. If we reversed the order, the child collection cases would be
// grouped incorrectly (using a collection too high in the collections hierarchy).
calculateChildCollectionGroups()
calculateSameLevelGroups()

let casesToRecalculate: ICase[] = []
if (casesToRecalculateDesc === "ALL_CASES") {
// When casesToRecalculate is not provided, recalculate all cases.
Expand All @@ -93,17 +116,12 @@ export class FormulaManager {
localDataSet: dataSet,
dataSets: this.dataSets,
globalValueManager: this.globalValueManager,
sameLevelCaseIds,
formulaAttributeCollectionId: collectionId
formulaAttributeCollectionId: collectionId,
caseGroupId
})

const casesToUpdate = casesToRecalculate.map((c) => {
formulaScope.setCaseId(c.__id__)
if (collectionGroup) {
// We're dealing with hierarchical data, so we need to provide child case ids to the formula scope for each
// case.
formulaScope.setChildCaseIds(caseIdToCaseGroup[c.__id__].childCaseIds)
}
const formulaValue = compiledFormula.evaluate(formulaScope)
return {
__id__: c.__id__,
Expand Down
57 changes: 32 additions & 25 deletions v3/src/models/data/formula-mathjs-scope.ts
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
import { AGGREGATE_SYMBOL_SUFFIX, GLOBAL_VALUE, LOCAL_ATTR } from "./formula-types"
import { AGGREGATE_SYMBOL_SUFFIX, GLOBAL_VALUE, LOCAL_ATTR, NO_PARENT_KEY } from "./formula-types"
import type { IGlobalValueManager } from "../global/global-value-manager"
import type { IDataSet } from "./data-set"
import type { IValueType } from "./attribute"

const CACHE_ENABLED = false

export interface IFormulaMathjsScopeContext {
localDataSet: IDataSet
dataSets: Map<string, IDataSet>
globalValueManager?: IGlobalValueManager
sameLevelCaseIds: string[]
formulaAttributeCollectionId?: string
caseGroupId: Record<string, string>
}

// Official MathJS docs don't describe custom scopes in great detail, but there's a good example in their repo:
// https://github.com/josdejong/mathjs/blob/develop/examples/advanced/custom_scope_objects.js
export class FormulaMathJsScope {
context: IFormulaMathjsScopeContext
caseId = ""
childCaseIds: string[] = []
dataStorage: Record<string, any> = {}
cache = new Map<string, any>()

Expand All @@ -34,27 +34,38 @@ export class FormulaMathJsScope {
}
})

const cachedSameLevelCases = context.sameLevelCaseIds.map(caseId =>
context.localDataSet.getValue(caseId, attr.id)
)
const allCasesForAttr = context.localDataSet.getCasesForAttributes([attr.id])
const cachedGroup: Record<string, IValueType[]> = {}
let cacheInitialized = false
Object.defineProperty(this.dataStorage, `${LOCAL_ATTR}${attr.id}${AGGREGATE_SYMBOL_SUFFIX}`, {
get: () => {
// Note that we have to return all the same level cases in two cases:
// - the table is flat and aggregate function is referencing another attribute
// - the table is hierarchical and aggregate function is referencing an attribute from the same collection
// In both cases it's enough to compare collection ids. When the table is flat, they might be equal to
// undefined but equality check works anyway.
// There are two separate kinds of aggregate cases grouping:
// - Same-level grouping, which is used when the table is flat or when the aggregate function is referencing
// another attribute from the same collection. In this case the group ID is the currently processed case
// parent ID.
// - Parent-child grouping, which is used when the table is hierarchical and the aggregate function is
// referencing an attribute from a different collection. In this case the group ID is the currently
// processed case ID.
const attrCollectionId = context.localDataSet.getCollectionForAttribute(attr.id)?.id
const useSameLevelCases = context.formulaAttributeCollectionId === attrCollectionId
// Note that mapping childCaseIds might look like potential performance issue / O(n^2), but it's not.
// When we're dealing with hierarchical data, each parent has distinct set of child cases, so we're not
// processing any child case more than once. In other words, child cases sets never overlap and they always
// sum to the total number of cases in the dataset.
// However, when dealing with flat tables and returning all the cases over and over, we could easily
// reach O(n^2) complexity just in the cases retrieval. That's why they need to be cached.
return useSameLevelCases
? cachedSameLevelCases
: this.childCaseIds.map(caseId => context.localDataSet.getValue(caseId, attr.id))
// When the table is flat, collection IDs might be equal to undefined but equality check works anyway.
const useSameLevelGrouping = context.formulaAttributeCollectionId === attrCollectionId

if (!cacheInitialized) {
// Cache is calculated lazily to avoid calculating it for all the attributes that are not referenced by
// the formula. Note that each case is processed only once, so this mapping is only O(n) complexity.
allCasesForAttr.forEach(c => {
const groupId = context.caseGroupId[c.__id__]
if (!cachedGroup[groupId]) {
cachedGroup[groupId] = []
}
cachedGroup[groupId].push(context.localDataSet.getValue(c.__id__, attr.id))
})
cacheInitialized = true
}

// Same-level grouping uses parent ID as a group ID, parent-child grouping uses case ID as a group ID.
const groupParentId = useSameLevelGrouping ? context.caseGroupId[this.caseId] : this.caseId
return cachedGroup[groupParentId] || cachedGroup[NO_PARENT_KEY]
}
})
})
Expand Down Expand Up @@ -108,10 +119,6 @@ export class FormulaMathJsScope {
this.caseId = caseId
}

setChildCaseIds(childCaseIds: string[]) {
this.childCaseIds = childCaseIds
}

getCaseId() {
return this.caseId
}
Expand Down
2 changes: 2 additions & 0 deletions v3/src/models/data/formula-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ export const GLOBAL_VALUE = "GLOBAL_VALUE_"
export const LOCAL_ATTR = "LOCAL_ATTR_"
export const AGGREGATE_SYMBOL_SUFFIX = "_ALL"

export const NO_PARENT_KEY = "__NO_PARENT__"

export const isConstantStringNode = (node: MathNode): node is ConstantNode<string> =>
isConstantNode(node) && typeof node.value === "string"

Expand Down

0 comments on commit 5d34434

Please sign in to comment.