Skip to content

Commit 0516ea3

Browse files
chore: adds accuracyRunStatus to snapshot entries
The new field `accuracyRunStatus` is supposed to help guard against cases where jest might fail in between, maybe due to LLM rate limit errors or something else, and we then have a partially saved state of an accuracy run. With the new field `accuracyRunStatus` we should be able to safely look for last runs where `accuracyRunStatus` is done and have complete state of accuracy snapshot.
1 parent 7f931dc commit 0516ea3

File tree

6 files changed

+56
-6
lines changed

6 files changed

+56
-6
lines changed

package-lock.json

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@
6464
"prettier": "^3.5.3",
6565
"simple-git": "^3.28.0",
6666
"ts-jest": "^29.3.1",
67-
"tsx": "^4.19.3",
67+
"tsx": "^4.20.3",
6868
"typescript": "^5.8.2",
6969
"typescript-eslint": "^8.29.1",
7070
"uuid": "^11.1.0",

scripts/mark-accuracy-run-finished.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import { getAccuracySnapshotStorage } from "../tests/accuracy/sdk/accuracy-snapshot-storage/get-snapshot-storage.js";
2+
3+
console.time(`Marked accuracy run id - ${process.env.MDB_ACCURACY_RUN_ID} as finished in`);
4+
const storage = await getAccuracySnapshotStorage();
5+
await storage.accuracyRunFinished();
6+
await storage.close();
7+
console.timeEnd(`Marked accuracy run id - ${process.env.MDB_ACCURACY_RUN_ID} as finished in`);

scripts/run-accuracy-tests.sh

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,27 @@
22
# Variables necessary for the accuracy test runs
33
export MDB_ACCURACY_RUN_ID=$(npx uuid v4)
44

5+
# For providing access tokens for different LLM providers
6+
# export MDB_OPEN_AI_API_KEY=""
7+
# export MDB_GEMINI_API_KEY=""
8+
# export MDB_AZURE_OPEN_AI_API_KEY=""
9+
# export MDB_AZURE_OPEN_AI_API_URL=""
10+
11+
# For providing a mongodb based storage to store accuracy snapshots
12+
# export MDB_ACCURACY_MDB_URL=""
13+
# export MDB_ACCURACY_MDB_DB=""
14+
# export MDB_ACCURACY_MDB_COLLECTION=""
15+
16+
# By default we run all the tests under tests/accuracy folder unless a path is
17+
# specified in the command line. Such as:
18+
# npm run test:accuracy -- tests/accuracy/some-test.test.ts
519
TEST_PATH_PATTERN="${1:-tests/accuracy}"
620
shift || true
7-
node --experimental-vm-modules node_modules/jest/bin/jest.js --testPathPattern "$TEST_PATH_PATTERN" "$@"
21+
node --experimental-vm-modules node_modules/jest/bin/jest.js --testPathPattern "$TEST_PATH_PATTERN" "$@"
22+
23+
# Each test run submits an accuracy snapshot entry for each prompt with the
24+
# accuracyRunStatus: "in-progress". When all the tests are done and jest exits
25+
# with an exit code of 0, we can safely mark accuracy run as finished.
26+
if [ $? -eq 0 ]; then
27+
npx tsx scripts/mark-accuracy-run-finished.ts
28+
fi

tests/accuracy/sdk/accuracy-snapshot-storage/mdb-snapshot-storage.ts

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
import { Collection, MongoClient } from "mongodb";
2-
import { AccuracySnapshotEntry, AccuracySnapshotEntrySchema, AccuracySnapshotStorage } from "./snapshot-storage.js";
2+
import {
3+
AccuracyRunStatus,
4+
AccuracySnapshotEntry,
5+
AccuracySnapshotEntrySchema,
6+
AccuracySnapshotStorage,
7+
} from "./snapshot-storage.js";
38

49
export class MongoDBSnapshotStorage implements AccuracySnapshotStorage {
510
private readonly client: MongoClient;
@@ -46,6 +51,7 @@ export class MongoDBSnapshotStorage implements AccuracySnapshotStorage {
4651
...snapshotEntry,
4752
commitSHA: this.commitSHA,
4853
accuracyRunId: this.accuracyRunId,
54+
accuracyRunStatus: AccuracyRunStatus.InProgress,
4955
createdOn: Date.now(),
5056
};
5157
await this.snapshotCollection.insertOne(snapshotWithMeta);
@@ -70,6 +76,13 @@ export class MongoDBSnapshotStorage implements AccuracySnapshotStorage {
7076
return AccuracySnapshotEntrySchema.array().parse(snapshotEntries);
7177
}
7278

79+
async accuracyRunFinished(): Promise<void> {
80+
await this.snapshotCollection.updateMany(
81+
{ accuracyRunId: this.accuracyRunId },
82+
{ $set: { accuracyRunStatus: AccuracyRunStatus.Done } }
83+
);
84+
}
85+
7386
static getStorage(commitSHA: string, accuracyRunId: string): MongoDBSnapshotStorage {
7487
const mongodbUrl = process.env.MDB_ACCURACY_MDB_URL;
7588
const database = process.env.MDB_ACCURACY_MDB_DB;

tests/accuracy/sdk/accuracy-snapshot-storage/snapshot-storage.ts

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,22 @@ const ExpectedToolCallSchema = z.object({
44
toolName: z.string(),
55
parameters: z.record(z.string(), z.unknown()),
66
});
7+
export type ExpectedToolCall = z.infer<typeof ExpectedToolCallSchema>;
78

89
const ActualToolCallSchema = ExpectedToolCallSchema.extend({ toolCallId: z.string() });
9-
10-
export type ExpectedToolCall = z.infer<typeof ExpectedToolCallSchema>;
1110
export type ActualToolCall = z.infer<typeof ActualToolCallSchema>;
1211

12+
export const AccuracyRunStatus = {
13+
Done: "done",
14+
InProgress: "in-progress",
15+
} as const;
16+
1317
export const AccuracySnapshotEntrySchema = z.object({
1418
// Git and meta information for snapshot entries
1519
accuracyRunId: z.string(),
20+
accuracyRunStatus: z
21+
.enum([AccuracyRunStatus.Done, AccuracyRunStatus.InProgress])
22+
.default(AccuracyRunStatus.InProgress),
1623
createdOn: z.number(),
1724
commitSHA: z.string(),
1825
// Accuracy info
@@ -60,5 +67,7 @@ export interface AccuracySnapshotStorage {
6067

6168
getLatestSnapshotsForCommit(commit: string): Promise<AccuracySnapshotEntry[]>;
6269

70+
accuracyRunFinished(): Promise<void>;
71+
6372
close(): Promise<void>;
6473
}

0 commit comments

Comments
 (0)