Skip to content

Commit

Permalink
WIP attempt
Browse files Browse the repository at this point in the history
  • Loading branch information
mattpocock committed Dec 11, 2024
1 parent 7b72ecf commit f24149b
Show file tree
Hide file tree
Showing 8 changed files with 213 additions and 170 deletions.
47 changes: 27 additions & 20 deletions packages/evalite-core/src/db.ts
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,15 @@ export const saveRun = (
result?: {
state: TaskState;
};
meta: {
evalite?: Evalite.TaskMeta;
};
tasks: {
name: string;
result?: {
state: TaskState;
};
meta: {
evalite?: Evalite.TaskMeta;
};
}[];
}[];
}[];
}
Expand All @@ -155,7 +161,7 @@ export const saveRun = (
.run({ runType }).lastInsertRowid;

for (const file of files) {
for (const task of file.tasks) {
for (const suite of file.tasks) {
const evalId = db
.prepare(
`
Expand All @@ -165,23 +171,24 @@ export const saveRun = (
)
.run({
runId,
name: task.name,
name: suite.name,
filepath: file.filepath,
duration: task.meta.evalite?.duration ?? 0,
status: task.result?.state === "fail" ? "fail" : "success",
duration: 0, // TODO - go with max duration
status: suite.result?.state === "fail" ? "fail" : "success",
}).lastInsertRowid;

if (task.meta.evalite) {
let order = 0;
for (const { input, output, scores, duration, expected, traces } of task
.meta.evalite.results) {
let order = 0;
for (const task of suite.tasks) {
if (task.meta.evalite?.result) {
order += 1;
const { duration, input, output, expected, scores, traces } =
task.meta.evalite.result;
const resultId = db
.prepare(
`
INSERT INTO results (eval_id, duration, input, output, expected, col_order)
VALUES (@evalId, @duration, @input, @output, @expected, @col_order)
`
INSERT INTO results (eval_id, duration, input, output, expected, col_order)
VALUES (@evalId, @duration, @input, @output, @expected, @col_order)
`
)
.run({
evalId,
Expand All @@ -195,9 +202,9 @@ export const saveRun = (
for (const score of scores) {
db.prepare(
`
INSERT INTO scores (result_id, name, score, description, metadata)
VALUES (@resultId, @name, @score, @description, @metadata)
`
INSERT INTO scores (result_id, name, score, description, metadata)
VALUES (@resultId, @name, @score, @description, @metadata)
`
).run({
resultId,
name: score.name,
Expand All @@ -212,9 +219,9 @@ export const saveRun = (
traceOrder += 1;
db.prepare(
`
INSERT INTO traces (result_id, input, output, start_time, end_time, prompt_tokens, completion_tokens, col_order)
VALUES (@resultId, @input, @output, @start_time, @end_time, @prompt_tokens, @completion_tokens, @col_order)
`
INSERT INTO traces (result_id, input, output, start_time, end_time, prompt_tokens, completion_tokens, col_order)
VALUES (@resultId, @input, @output, @start_time, @end_time, @prompt_tokens, @completion_tokens, @col_order)
`
).run({
resultId,
input: JSON.stringify(trace.input),
Expand Down
2 changes: 1 addition & 1 deletion packages/evalite-core/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ export declare namespace Evalite {
};

export type TaskMeta = {
results: Result[];
result: Result;
duration: number | undefined;
};

Expand Down
2 changes: 1 addition & 1 deletion packages/evalite-tests/tests/basics.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ it("Should report the basics correctly", async () => {
expect(captured.getOutput()).toContain("100% basics.eval.ts (1 eval)");
});

it("Should create a evalite-report.jsonl", async () => {
it("Should save the basic information in a db", async () => {
using fixture = loadFixture("basics");

const captured = captureStdout();
Expand Down
47 changes: 25 additions & 22 deletions packages/evalite/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import type { Evalite } from "@evalite/core";
import { inject, it } from "vitest";
import { afterEach, beforeEach, describe, inject, it } from "vitest";
import { reportTraceLocalStorage } from "./traces.js";

declare module "vitest" {
Expand Down Expand Up @@ -78,34 +78,37 @@ export const evalite = <TInput, TExpected = TInput>(
testName: string,
opts: Evalite.RunnerOpts<TInput, TExpected>
) => {
return it(testName, async ({ task }) => {
const data = await opts.data();
const start = performance.now();
const results = await Promise.all(
data.map(async ({ input, expected }): Promise<Evalite.Result> => {
return describe(testName, async () => {
const dataset = await opts.data();

let index = 0;
for (const data of dataset) {
index++;
it(`${testName} ${index}`, { concurrent: true }, async ({ task }) => {
const start = performance.now();

const traces: Evalite.Trace[] = [];
reportTraceLocalStorage.enterWith((trace) => traces.push(trace));

const { output, scores, duration } = await runTask({
expected,
input,
expected: data.expected,
input: data.input,
scores: opts.scorers,
task: opts.task,
});

return {
input,
output,
scores,
duration,
expected,
traces,
task.meta.evalite = {
result: {
duration,
expected: data.expected,
input: data.input,
output,
scores,
traces,
},
duration: Math.round(performance.now() - start),
};
})
);
task.meta.evalite = {
results,
duration: Math.round(performance.now() - start),
};
});
}
});
};

Expand Down
Loading

0 comments on commit f24149b

Please sign in to comment.