-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Bandit selects and assigns action from weighted actions #95
Changes from all commits
5df0a40
e735ce3
42e82ac
f9f35db
1e3519b
6143fb6
5e7c7a9
9e6f1ae
a067bf6
3ba8349
755b581
eeece6f
28986f7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,6 +29,11 @@ describe('BanditEvaluator', () => { | |
gamma: number, | ||
actionProbabilityFloor: number, | ||
) => Record<string, number>; | ||
selectAction: ( | ||
flagKey: string, | ||
subjectKey: string, | ||
actionWeights: Record<string, number>, | ||
) => string; | ||
}; | ||
|
||
describe('scoreNumericAttributes', () => { | ||
|
@@ -297,4 +302,121 @@ describe('BanditEvaluator', () => { | |
).toBe(true); | ||
}); | ||
}); | ||
|
||
describe('selectAction', () => { | ||
const flagKey = 'flag'; | ||
const actionWeights = { action1: 0.2, action2: 0.5, action3: 0.3 }; | ||
|
||
it('selects actions', () => { | ||
expect(exposedEvaluator.selectAction(flagKey, 'subjectA', actionWeights)).toBe('action1'); | ||
expect(exposedEvaluator.selectAction(flagKey, 'subjectB', actionWeights)).toBe('action2'); | ||
expect(exposedEvaluator.selectAction(flagKey, 'subjectE', actionWeights)).toBe('action3'); | ||
}); | ||
}); | ||
|
||
describe('evaluateBandit', () => { | ||
it('evaluates the bandit with action contexts', () => { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Based on this Python SDK test, but instead of mocking out the sharder, we choose two subjects that hit each of the two actions. |
||
const flagKey = 'test_flag'; | ||
const subjectAttributes = { age: 25, location: 'US' }; | ||
const actions: Record<string, Attributes> = { | ||
action1: { price: 10, category: 'A' }, | ||
action2: { price: 20, category: 'B' }, | ||
}; | ||
const banditModel: BanditModelData = { | ||
gamma: 0.1, | ||
defaultActionScore: 0.0, | ||
actionProbabilityFloor: 0.1, | ||
coefficients: { | ||
action1: { | ||
actionKey: 'action1', | ||
intercept: 0.5, | ||
subjectNumericCoefficients: [ | ||
{ attributeKey: 'age', coefficient: 0.1, missingValueCoefficient: 0.0 }, | ||
], | ||
subjectCategoricalCoefficients: [ | ||
{ | ||
attributeKey: 'location', | ||
missingValueCoefficient: 0.0, | ||
valueCoefficients: { US: 0.2 }, | ||
}, | ||
], | ||
actionNumericCoefficients: [ | ||
{ attributeKey: 'price', coefficient: 0.05, missingValueCoefficient: 0.0 }, | ||
], | ||
actionCategoricalCoefficients: [ | ||
{ | ||
attributeKey: 'category', | ||
missingValueCoefficient: 0.0, | ||
valueCoefficients: { A: 0.3 }, | ||
}, | ||
], | ||
}, | ||
action2: { | ||
actionKey: 'action2', | ||
intercept: 0.3, | ||
subjectNumericCoefficients: [ | ||
{ attributeKey: 'age', coefficient: 0.1, missingValueCoefficient: 0.0 }, | ||
], | ||
subjectCategoricalCoefficients: [ | ||
{ | ||
attributeKey: 'location', | ||
missingValueCoefficient: 0.0, | ||
valueCoefficients: { US: 0.2 }, | ||
}, | ||
], | ||
actionNumericCoefficients: [ | ||
{ attributeKey: 'price', coefficient: 0.05, missingValueCoefficient: 0.0 }, | ||
], | ||
actionCategoricalCoefficients: [ | ||
{ | ||
attributeKey: 'category', | ||
missingValueCoefficient: 0.0, | ||
valueCoefficients: { B: 0.3 }, | ||
}, | ||
], | ||
}, | ||
}, | ||
}; | ||
|
||
// Subject A gets assigned action 2 | ||
const subjectKeyA = 'subjectA'; | ||
const resultA = banditEvaluator.evaluateBandit( | ||
flagKey, | ||
subjectKeyA, | ||
subjectAttributes, | ||
actions, | ||
banditModel, | ||
); | ||
|
||
expect(resultA.flagKey).toBe(flagKey); | ||
expect(resultA.subjectKey).toBe(subjectKeyA); | ||
expect(resultA.subjectAttributes).toStrictEqual(subjectAttributes); | ||
expect(resultA.actionKey).toBe('action2'); | ||
expect(resultA.actionAttributes).toStrictEqual(actions.action2); | ||
expect(resultA.actionScore).toBe(4.3); | ||
expect(resultA.actionWeight).toBeCloseTo(0.5074); | ||
expect(resultA.gamma).toBe(banditModel.gamma); | ||
expect(resultA.optimalityGap).toBe(0); | ||
|
||
// Subject B gets assigned action 1 | ||
const subjectKeyB = 'subjectB'; | ||
const resultB = banditEvaluator.evaluateBandit( | ||
flagKey, | ||
subjectKeyB, | ||
subjectAttributes, | ||
actions, | ||
banditModel, | ||
); | ||
|
||
expect(resultB.flagKey).toBe(flagKey); | ||
expect(resultB.subjectKey).toBe(subjectKeyB); | ||
expect(resultB.subjectAttributes).toStrictEqual(subjectAttributes); | ||
expect(resultB.actionKey).toBe('action1'); | ||
expect(resultB.actionAttributes).toStrictEqual(actions.action1); | ||
expect(resultB.actionScore).toBe(4); | ||
expect(resultB.actionWeight).toBeCloseTo(0.4926); | ||
expect(resultB.gamma).toBe(banditModel.gamma); | ||
expect(resultB.optimalityGap).toBeCloseTo(0.3); | ||
}); | ||
}); | ||
}); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,10 @@ | ||
import { BANDIT_ASSIGNMENT_SHARDS } from './constants'; | ||
import { | ||
BanditCategoricalAttributeCoefficients, | ||
BanditModelData, | ||
BanditNumericAttributeCoefficients, | ||
} from './interfaces'; | ||
import { MD5Sharder, Sharder } from './sharders'; | ||
import { Attributes } from './types'; | ||
|
||
export interface BanditEvaluation { | ||
|
@@ -18,6 +20,9 @@ export interface BanditEvaluation { | |
} | ||
|
||
export class BanditEvaluator { | ||
private assignmentShards = BANDIT_ASSIGNMENT_SHARDS; // We just hard code this for now | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We've decided for now to just have the bandit assignment shards hard-coded in the SDKs as opposed to passing it through as we do with flags or even just using the flag's number of shards. 🗨️ Relevant slack conversation: "...should this shard count be tied to the Flag.TotalShards field for the given flag..." |
||
private sharder: Sharder = new MD5Sharder(); | ||
|
||
public evaluateBandit( | ||
flagKey: string, | ||
subjectKey: string, | ||
|
@@ -36,7 +41,13 @@ export class BanditEvaluator { | |
banditModel.actionProbabilityFloor, | ||
); | ||
const selectedActionKey: string = this.selectAction(flagKey, subjectKey, actionWeights); | ||
const optimalityGap = 0; // TODO: compute difference between selected and max | ||
|
||
// Compute optimality gap in terms of score | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For now we want optimality gap in terms of score (as opposed to percentage). 🗨️ Relevant slack conversation: "...should optimality gap be in terms of percentage (e.g., weighted) than absolute score?..." |
||
const topScore = Object.values(actionScores).reduce( | ||
(maxScore, score) => (score > maxScore ? score : maxScore), | ||
-Infinity, | ||
); | ||
const optimalityGap = topScore - actionScores[selectedActionKey]; | ||
|
||
return { | ||
flagKey, | ||
|
@@ -173,6 +184,43 @@ export class BanditEvaluator { | |
subjectKey: string, | ||
actionWeights: Record<string, number>, | ||
): string { | ||
return Object.keys(actionWeights)[0]; // TODO: math and stuff | ||
// Deterministically "shuffle" the actions | ||
// This way as action weights shift, a bunch of users who were on the edge of one action won't all be shifted to the | ||
// same new action at the same time. | ||
const shuffledActions = Object.entries(actionWeights).sort((a, b) => { | ||
const actionAShard = this.sharder.getShard( | ||
`${flagKey}-${subjectKey}-${a[0]}`, | ||
this.assignmentShards, | ||
); | ||
const actionBShard = this.sharder.getShard( | ||
`${flagKey}-${subjectKey}-${b[0]}`, | ||
this.assignmentShards, | ||
); | ||
let result = actionAShard - actionBShard; | ||
if (result === 0) { | ||
// In the unlikely case of a tie in randomized assigned shards, break the tie with the action names | ||
result = a[0] < b[0] ? -1 : 1; | ||
} | ||
return result; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: could simplify with just returning with a ternary operator return actionAShard - actionBShard === 0 ? a[0] < b[0] ? -1 : 1 : result; There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. True, but I like how the if statement let's me locate a comment about the tie-breaking in a sensible location |
||
}); | ||
|
||
// Select action from the shuffled actions, based on weight | ||
const assignedShard = this.sharder.getShard(`${flagKey}-${subjectKey}`, this.assignmentShards); | ||
const assignmentWeightThreshold = assignedShard / this.assignmentShards; | ||
let cumulativeWeight = 0; | ||
let assignedAction: string | null = null; | ||
for (const actionWeight of shuffledActions) { | ||
cumulativeWeight += actionWeight[1]; | ||
if (cumulativeWeight > assignmentWeightThreshold) { | ||
assignedAction = actionWeight[0]; | ||
break; | ||
} | ||
} | ||
if (assignedAction === null) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i noticed we tend to do There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In this particular case, there is a (small) possibility that, for whatever reason, a customer uses an action that is the empty string, so I figured I'd be explicit. |
||
throw new Error( | ||
`No bandit action selected for flag "${flagKey}" and subject "${subjectKey}"`, | ||
); | ||
} | ||
return assignedAction; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
import { Attributes } from './types'; | ||
|
||
export interface IBanditEvent { | ||
timestamp: string; | ||
featureFlag: string; | ||
bandit: string; | ||
subject: string; | ||
action: string | null; | ||
actionProbability: number | null; | ||
optimalityGap: number | null; | ||
modelVersion: string; | ||
subjectNumericAttributes: Attributes; | ||
subjectCategoricalAttributes: Attributes; | ||
actionNumericAttributes: Attributes; | ||
actionCategoricalAttributes: Attributes; | ||
metaData?: Record<string, unknown>; | ||
} | ||
|
||
export interface IBanditLogger { | ||
logBanditAction(assignment: IBanditEvent): void; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Continuing the pattern to expose
private
methods for the sake of enabling more modular testing without creating a confusing public interface.