From 81e2e7df59fa3064eb9361ffbaea444df4b717d8 Mon Sep 17 00:00:00 2001 From: GuangMingYouBei Date: Tue, 25 Apr 2023 17:41:54 +0800 Subject: [PATCH] =?UTF-8?q?feat(ava/lite-insight):=20=E6=96=B0=E5=A2=9E?= =?UTF-8?q?=E5=A4=9A=E7=BB=B4=E5=BA=A6=E4=B8=8B=E9=92=BB=E5=BD=92=E5=9B=A0?= =?UTF-8?q?=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../causalInference/drillDown.test.ts | 75 ++++++++++ packages/ava/package.json | 4 +- packages/ava/src/index.ts | 3 +- packages/ava/src/insight/index.ts | 2 + .../causalInference/dimensionDrillDown.ts | 54 +++++++ .../extractors/causalInference/types.ts | 133 ++++++++++++++++++ .../extractors/causalInference/util.ts | 128 +++++++++++++++++ 7 files changed, 397 insertions(+), 2 deletions(-) create mode 100644 packages/ava/__tests__/unit/insight/extractors/causalInference/drillDown.test.ts create mode 100644 packages/ava/src/insight/insights/extractors/causalInference/dimensionDrillDown.ts create mode 100644 packages/ava/src/insight/insights/extractors/causalInference/types.ts create mode 100644 packages/ava/src/insight/insights/extractors/causalInference/util.ts diff --git a/packages/ava/__tests__/unit/insight/extractors/causalInference/drillDown.test.ts b/packages/ava/__tests__/unit/insight/extractors/causalInference/drillDown.test.ts new file mode 100644 index 000000000..45abef03d --- /dev/null +++ b/packages/ava/__tests__/unit/insight/extractors/causalInference/drillDown.test.ts @@ -0,0 +1,75 @@ +import { dimensionDrillDownAttribution } from '../../../../../src/insight/insights/extractors/causalInference/dimensionDrillDown'; + +import type { DrillDownProps } from '../../../../../src/insight/insights/extractors/causalInference/types'; + +const data = [ + { + City: 'huangshishi', + Province: 'shanxi', + ClientGender: 'male', + OrderDate: '2022/2/19', + Price: 13722.76, + UnitCost: 292.72, + }, + { + City: 'huangshishi', + Province: 'shanxi', + ClientGender: 'male', + OrderDate: '2022/2/21', + Price: 24020.88, + UnitCost: 5447.57, + }, + { + City: 'huangshishi', + Province: 'shanxi', + ClientGender: 'male', + OrderDate: '2022/2/16', + Price: 40145.8, + UnitCost: 3696.42, + }, + { + City: 'huanggangshi', + Province: 'qinghai', + ClientGender: 'male', + OrderDate: '2022/2/18', + Price: 99980.16, + UnitCost: 34393.38, + }, + { + City: 'huanggangshi', + Province: 'qinghai', + ClientGender: 'ma le', + OrderDate: '2022/2/17', + Price: 12656.11, + UnitCost: 6012.96, + }, + { + City: 'huanggangshi', + Province: 'qinghai', + ClientGender: 'male', + OrderDate: '2022/2/19', + Price: 42464.05, + UnitCost: 3113.68, + }, +]; +describe('MultiDim Test', () => { + test('check the disassemable result', () => { + const props: DrillDownProps = { + sourceData: data, + dimensions: ['Province', 'City'], + targetMeasure: 'Price', + timeSeriesDim: 'OrderDate', + baseInterval: { + startPoint: '2022/2/17', + endPoint: '2022/2/18', + }, + currInterval: { + startPoint: '2022/2/19', + endPoint: '2022/2/20', + }, + }; + const tempResult = dimensionDrillDownAttribution(props); + const treeResult = tempResult.resultInTree; + expect(treeResult?.City.huangshishi.info.currValue).toBe(13722.76); + }); +}); diff --git a/packages/ava/package.json b/packages/ava/package.json index ab8ff543b..8a542fafa 100644 --- a/packages/ava/package.json +++ b/packages/ava/package.json @@ -60,7 +60,9 @@ "heap-js": "^2.1.6", "lodash": "^4.17.21", "regression": "^2.0.1", - "tslib": "^2.3.1" + "tslib": "^2.3.1", + "expr-eval": "^2.0.2", + "moment": "^2.29.4" }, "devDependencies": { "@antv/algorithm": "^0.1.25", diff --git a/packages/ava/src/index.ts b/packages/ava/src/index.ts index e667d7dbe..b0d670a81 100644 --- a/packages/ava/src/index.ts +++ b/packages/ava/src/index.ts @@ -93,7 +93,7 @@ export type { } from './data'; /* insight */ -export { getInsights, generateInsightVisualizationSpec } from './insight'; +export { getInsights, generateInsightVisualizationSpec, dimensionDrillDownAttribution } from './insight'; export type { Datum, DomainType, @@ -124,6 +124,7 @@ export type { LowVarianceInfo, CorrelationInfo, InsightsResult, + DimensionDrillDownProps, } from './insight'; /* NTV (Narrative Text Vis) */ diff --git a/packages/ava/src/insight/index.ts b/packages/ava/src/insight/index.ts index bbd86ea35..c6cc03e9c 100644 --- a/packages/ava/src/insight/index.ts +++ b/packages/ava/src/insight/index.ts @@ -1,3 +1,5 @@ export { getInsights } from './pipeline'; export { generateInsightVisualizationSpec } from './pipeline/visualize'; +export { dimensionDrillDownAttribution } from './insights/extractors/causalInference/dimensionDrillDown'; +export type { DrillDownProps as DimensionDrillDownProps } from './insights/extractors/causalInference/types'; export * from './types'; diff --git a/packages/ava/src/insight/insights/extractors/causalInference/dimensionDrillDown.ts b/packages/ava/src/insight/insights/extractors/causalInference/dimensionDrillDown.ts new file mode 100644 index 000000000..89570f6b0 --- /dev/null +++ b/packages/ava/src/insight/insights/extractors/causalInference/dimensionDrillDown.ts @@ -0,0 +1,54 @@ +import { locatedInInterval, enumerateAllDimensionCombinationsByDFS } from './util'; + +import type { InfoType, DimensionDrillDownResult, TreeDim, DataLocation, DrillDownProps } from './types'; + +/** Main function for dimension drill down attribution */ +export const dimensionDrillDownAttribution = ({ + sourceData, + dimensions, + targetMeasure: measure, + timeSeriesDim: fluctuationDim, + baseInterval, + currInterval, +}: DrillDownProps): DimensionDrillDownResult => { + /** remove invalid data */ + const data = sourceData.filter((item) => !Object.values(item).some((v) => v === null || v === undefined)); + + const globalDiff: InfoType = { + baseValue: 0, + currValue: 0, + diff: 0, + }; + + const resultTree: TreeDim = {}; + const DictFlatten = {}; + /** traverse the input data and build the result data structure; */ + data.forEach((item) => { + let location: DataLocation = 'none'; + if (locatedInInterval(item[fluctuationDim], baseInterval.startPoint, baseInterval.endPoint)) { + location = 'left'; + globalDiff.baseValue += item[measure] as number; + } + if (locatedInInterval(item[fluctuationDim], currInterval.startPoint, currInterval.endPoint)) { + location = 'right'; + globalDiff.currValue += item[measure] as number; + } + if (location !== 'none') { + const deque: string[] = []; + enumerateAllDimensionCombinationsByDFS( + item, + 0, + dimensions, + resultTree, + DictFlatten, + deque, + measure, + fluctuationDim, + location + ); + } + }); + globalDiff.diff = globalDiff.currValue - globalDiff.baseValue; + + return { resultInTree: resultTree, globalDiff, resultInList: Object.values(DictFlatten) }; +}; diff --git a/packages/ava/src/insight/insights/extractors/causalInference/types.ts b/packages/ava/src/insight/insights/extractors/causalInference/types.ts new file mode 100644 index 000000000..02be8cdb2 --- /dev/null +++ b/packages/ava/src/insight/insights/extractors/causalInference/types.ts @@ -0,0 +1,133 @@ +import type { Datum } from '../../../types'; + +/** DataConfig specifies the input data with its focused dimensions and target measure to be analysed. */ +export type DataConfig = { + sourceData: Datum[]; + dimensions: string[]; + measures: string[]; + expression?: string; +}; + +/** FluctInfo is the necessary input for fluctuation analysis. */ +export type FluctInfo = { + /** Typically, flucDim is a time dimension that the measure value varies on */ + fluctDim: string; + /** baseInterval is the time interval that is assigned as the basement */ + baseInterval: CompareInterval; + /** currInterval is the time interval that user is focusing on */ + currInterval: CompareInterval; +}; + +/** Time Interval in Fluctuation Analysis */ +export type CompareInterval = { + /** start time of this interval */ + startPoint: string | number; + /** end time of this interval */ + endPoint: string | number; +}; + +/** a flag that indicates the belongings of a single line of data, which is useful for aggregation */ +export type DataLocation = 'left' | 'right' | 'none'; + +/** Record the calculation result */ +export type InfoType = { + /** Calculation value corresponding to baseInterval */ + baseValue: number; + /** Calculation value corresponding to currInterval */ + currValue: number; + /** diff = currValue - baseValue */ + diff: number; +}; + +/** Dimension drill down Result type */ +export type DimensionDrillDownResult = { + /** total difference */ + globalDiff: InfoType; + /** Tree like returned data */ + resultInTree?: TreeDim; + /** Flatten returned data */ + resultInList?: FlattenResult[]; +}; + +/** Dimension drill down Result type */ +export type MeasureDecomposeResult = { + /** total difference */ + globalDiff: InfoType; + + resultByMeasure?: FunctionBasedResult; +}; + +/** The first string is dimension name, the second string is dimension value */ +export type DimWithValue = Record; + +/** Dimension drill down attribution Result type that has been formalized into Datum[] */ +export type FlattenResult = Partial; + +/** Function based attribution Result type */ +export type FunctionBasedResult = Record; + +/** Example: Tree Data Structure for dimension drill down analysis + * + * { + * dimNameA: { + * dimValueA1: { + * info: {baseValue: 222, curValue: 555, diff: 333}, + * drillDown: { + * dimNameB: { + * dimValueB1: { + * info: {baseValue: 000, curValue: 111, diff: 111}, + * drillDown: {} + * }, + * dimValueB2: { + * info: {baseValue: 222, curValue: 444, diff: 222}, + * drillDown: {} + * } + * } + * } + * }, + * }, + * dimNameB: { + * dimValueB1: { + * info: {baseValue: 000, curValue: 111, diff: 111}, + * drillDown: {} + * }, + * dimValueB2: { + * info: {baseValue: 222, curValue: 444, diff: 222}, + * drillDown: {} + * } + * } + * } + * + */ + +/** The dimension name level of the tree or the first level in drillDown */ +export interface TreeDim { + [dimName: string]: TreeDimVal; +} + +/** The dimension value level of the tree which is inside the dimension name level */ +export type TreeDimVal = Record; + +/** The information level inside the dimension value */ +export interface TreeDrillDown { + info: InfoType; + drillDown: TreeDim; +} + +interface CausalInferenceProps { + sourceData: Datum[]; + dimensions: string[]; + timeSeriesDim: string; + baseInterval: CompareInterval; + currInterval: CompareInterval; +} + +/** Dimension drill down based attribution function props */ +export interface DrillDownProps extends CausalInferenceProps { + targetMeasure: string; +} + +export interface MeasureDecomposeProps extends CausalInferenceProps { + allMeasures: string[]; + expression: string; +} diff --git a/packages/ava/src/insight/insights/extractors/causalInference/util.ts b/packages/ava/src/insight/insights/extractors/causalInference/util.ts new file mode 100644 index 000000000..797ff2489 --- /dev/null +++ b/packages/ava/src/insight/insights/extractors/causalInference/util.ts @@ -0,0 +1,128 @@ +import { cloneDeep as _cloneDeep } from 'lodash'; +import moment from 'moment'; + +import type { Datum } from '../../../types'; +import type { TreeDim, FlattenResult, DataLocation, TreeDrillDown } from './types'; + +/** Const for string[] join */ +const joinSign = '-'; + +/** A helper function that can figure out a time point whether belongs to a time interval */ +export const locatedInInterval = ( + comparedPoint: string | number, + startPoint: string | number, + endPoint: string | number +) => { + if (typeof comparedPoint === 'number' && typeof startPoint === 'number' && typeof endPoint === 'number') { + return comparedPoint <= endPoint && comparedPoint >= startPoint; + } + if (typeof comparedPoint === 'string' && typeof startPoint === 'string' && typeof endPoint === 'string') { + const compareMoment = moment(comparedPoint); + const startMoment = moment(startPoint); + const endMoment = moment(endPoint); + if (!compareMoment.isValid() || !startMoment.isValid() || !endMoment.isValid()) { + /* eslint-disable */ + console.error('Invalid time input.'); + /* eslint-enable */ + return false; + } + /** rule: left <= target < right */ + return compareMoment.isBefore(endMoment) && compareMoment.isSameOrAfter(startMoment); + } + return false; +}; + +export const enumerateAllDimensionCombinationsByDFS = ( + item: Datum, + index: number, + dimensions: string[], + resultTree: TreeDim, + dictFlatten: Record, + deque: string[], + measure: string, + fluctuationDim: string, + location: DataLocation +) => { + const resultTreePointer = resultTree; + const DictFlattenPointer = dictFlatten; + if (index === dimensions.length) { + if (deque.length === 0) { + return; + } + let currRoot = resultTreePointer; + let currDimName; + let currDimVal; + const dequeClone = _cloneDeep(deque); + while (dequeClone.length > 0) { + currDimName = dequeClone.shift(); + currDimVal = item[currDimName]; + if (!Object.prototype.hasOwnProperty.call(currRoot, currDimName)) { + currRoot[currDimName] = {}; + } + if (!Object.prototype.hasOwnProperty.call(currRoot[currDimName], currDimVal)) { + const treeDrillDown: TreeDrillDown = { + info: { + baseValue: 0, + currValue: 0, + diff: 0, + }, + drillDown: {}, + }; + currRoot[currDimName][currDimVal] = treeDrillDown; + } + if (dequeClone.length > 0) { + currRoot = currRoot[currDimName][currDimVal].drillDown; + } + } + const currTreeDimVal = currRoot[currDimName][currDimVal]; + + currTreeDimVal.info.baseValue += location === 'left' ? (item[measure] as number) : 0; + currTreeDimVal.info.currValue += location === 'right' ? (item[measure] as number) : 0; + if (!(currTreeDimVal.info.baseValue === 0 && currTreeDimVal.info.currValue === 0)) { + currTreeDimVal.info.diff = currTreeDimVal.info.currValue - currTreeDimVal.info.baseValue; + } + + if (deque.length === dimensions.length) { + const tempKey = deque.join(joinSign); + if (!Object.prototype.hasOwnProperty.call(DictFlattenPointer, tempKey)) { + DictFlattenPointer[tempKey] = {}; + } + deque.forEach((dimName) => { + DictFlattenPointer[tempKey][dimName] = item[dimName] as string; + }); + DictFlattenPointer[tempKey].baseValue = currTreeDimVal.info.baseValue; + DictFlattenPointer[tempKey].currValue = currTreeDimVal.info.currValue; + DictFlattenPointer[tempKey].diff = currTreeDimVal.info.diff; + } + + return; + } + + /** Case1: dimensions[index] is included */ + deque.push(dimensions[index]); + enumerateAllDimensionCombinationsByDFS( + item, + index + 1, + dimensions, + resultTreePointer, + DictFlattenPointer, + deque, + measure, + fluctuationDim, + location + ); + deque.pop(); + + /** Case2: dimensions[index] is not included */ + enumerateAllDimensionCombinationsByDFS( + item, + index + 1, + dimensions, + resultTreePointer, + DictFlattenPointer, + deque, + measure, + fluctuationDim, + location + ); +};