Skip to content

Commit

Permalink
Completed refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
mattpocock committed Dec 11, 2024
1 parent f24149b commit e304201
Show file tree
Hide file tree
Showing 8 changed files with 163 additions and 142 deletions.
15 changes: 10 additions & 5 deletions packages/evalite-core/src/db.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import type * as BetterSqlite3 from "better-sqlite3";
import Database from "better-sqlite3";
import type { Evalite } from "./index.js";
import type { TaskState } from "vitest";
import { max } from "./utils.js";

export type SQLiteDatabase = BetterSqlite3.Database;

Expand Down Expand Up @@ -138,7 +139,7 @@ export const saveRun = (
result?: {
state: TaskState;
};
tasks: {
tasks?: {
name: string;
result?: {
state: TaskState;
Expand All @@ -162,6 +163,12 @@ export const saveRun = (

for (const file of files) {
for (const suite of file.tasks) {
if (!suite.tasks) {
throw new Error(
"An unknown error occurred - did you nest evalite inside a describe block?"
);
}

const evalId = db
.prepare(
`
Expand All @@ -173,15 +180,13 @@ export const saveRun = (
runId,
name: suite.name,
filepath: file.filepath,
duration: 0, // TODO - go with max duration
duration: max(suite.tasks, (t) => t.meta.evalite?.duration ?? 0),
status: suite.result?.state === "fail" ? "fail" : "success",
}).lastInsertRowid;

let order = 0;
for (const task of suite.tasks) {
if (task.meta.evalite?.result) {
order += 1;
const { duration, input, output, expected, scores, traces } =
const { duration, input, output, expected, scores, traces, order } =
task.meta.evalite.result;
const resultId = db
.prepare(
Expand Down
46 changes: 25 additions & 21 deletions packages/evalite-core/src/db/tests/db.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,30 +20,34 @@ describe("getEvalsAverageScores", () => {
tasks: [
{
name: "task",
meta: {
evalite: {
duration: 100,
results: [
{
input: "input",
tasks: [
{
name: "task 1",
meta: {
evalite: {
duration: 100,
output: "result",
expected: "expected",
scores: [
{
name: "score",
score: 1,
},
{
name: "Other Score",
score: 0,
},
],
traces: [],
result: {
order: 1,
input: "input",
duration: 100,
output: "result",
expected: "expected",
scores: [
{
name: "score",
score: 1,
},
{
name: "Other Score",
score: 0,
},
],
traces: [],
},
},
],
},
},
},
],
},
],
},
Expand Down
1 change: 1 addition & 0 deletions packages/evalite-core/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ export declare namespace Evalite {
export type MaybePromise<T> = T | Promise<T>;

export type Result = {
order: number;
input: unknown;
output: unknown;
expected: unknown;
Expand Down
3 changes: 3 additions & 0 deletions packages/evalite-tests/tests/failing.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ it("Should report a failing test", async () => {

expect(captured.getOutput()).toContain("failing-test.eval.ts");
expect(captured.getOutput()).toContain("Score ✖ (1 failed)");

// Should not display a table
expect(captured.getOutput()).not.toContain("Input");
});

it("Should save the test as failed in the database", async () => {
Expand Down
3 changes: 2 additions & 1 deletion packages/evalite/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@
"table": "^6.8.2",
"commander": "^12.1.0",
"tinyrainbow": "^1.2.0",
"@evalite/core": "workspace:*"
"@evalite/core": "workspace:*",
"@vitest/runner": "^2.1.8"
},
"devDependencies": {
"@types/ws": "^8.5.13",
Expand Down
1 change: 1 addition & 0 deletions packages/evalite/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ export const evalite = <TInput, TExpected = TInput>(
});
task.meta.evalite = {
result: {
order: index,
duration,
expected: data.expected,
input: data.input,
Expand Down
234 changes: 120 additions & 114 deletions packages/evalite/src/reporter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { inspect } from "util";
import type { RunnerTask, RunnerTestFile, TaskResultPack, Test } from "vitest";
import { BasicReporter } from "vitest/reporters";
import { average, sum } from "./utils.js";
import { getSuites, getTasks, getTests } from "@vitest/runner/utils";

export interface EvaliteReporterOptions {
isWatching: boolean;
Expand Down Expand Up @@ -121,119 +122,121 @@ export default class EvaliteReporter extends BasicReporter {
super.onFinished(files, errors);
};

// protected override printTask(task: RunnerTask): void {
// // Tasks can be files or individual tests, and
// // this ensures we only print files
// if (
// !("filepath" in task) ||
// !task.result?.state ||
// task.result?.state === "run"
// ) {
// return;
// }

// const hasNoEvalite = task.tasks.every((t) => !t.meta.evalite);

// if (hasNoEvalite) {
// return super.printTask(task);
// }

// const scores: number[] = [];

// const failed = task.tasks.some((t) => t.result?.state === "fail");

// for (const { meta } of task.tasks) {
// if (meta.evalite) {
// scores.push(
// ...meta.evalite!.results.flatMap((r) =>
// r.scores.map((s) => s.score ?? 0)
// )
// );
// }
// }

// const totalScore = scores.reduce((a, b) => a + b, 0);
// const averageScore = totalScore / scores.length;

// const title = failed ? c.red("✖") : displayScore(averageScore);

// const toLog = [
// ` ${title} `,
// `${task.name} `,
// c.dim(
// `(${task.tasks.length} ${task.tasks.length > 1 ? "evals" : "eval"})`
// ),
// ];

// // if (task.result.duration) {
// // toLog.push(" " + c.dim(`${Math.round(task.result.duration ?? 0)}ms`));
// // }

// this.ctx.logger.log(toLog.join(""));
// }

// override reportTestSummary(files: RunnerTestFile[], errors: unknown[]): void {
// // this.printErrorsSummary(errors); // TODO

// const evals = files.flatMap((file) =>
// file.tasks.filter((task) => task.meta.evalite)
// );

// const scores = evals.flatMap((task) =>
// task.meta.evalite!.results.flatMap((r) => r.scores.map((s) => s.score))
// );

// const totalScore = sum(scores, (score) => score ?? 0);
// const averageScore = totalScore / scores.length;

// const collectTime = files.reduce((a, b) => a + (b.collectDuration || 0), 0);
// const testsTime = files.reduce((a, b) => a + (b.result?.duration || 0), 0);
// const setupTime = files.reduce((a, b) => a + (b.setupDuration || 0), 0);

// const totalDuration = collectTime + testsTime + setupTime;

// const failedTasks = files.filter((file) => {
// return file.tasks.some((task) => task.result?.state === "fail");
// });

// const scoreDisplay =
// failedTasks.length > 0
// ? c.red("✖ ") + c.dim(`(${failedTasks.length} failed)`)
// : displayScore(averageScore);

// this.ctx.logger.log(
// [" ", c.dim("Score"), " ", scoreDisplay].join("")
// );

// this.ctx.logger.log(
// [" ", c.dim("Eval Files"), " ", files.length].join("")
// );

// this.ctx.logger.log(
// [
// " ",
// c.dim("Evals"),
// " ",
// files.reduce((a, b) => a + b.tasks.length, 0),
// ].join("")
// );

// this.ctx.logger.log(
// [" ", c.dim("Duration"), " ", `${Math.round(totalDuration)}ms`].join(
// ""
// )
// );

// if (evals.length === 1 && evals[0]) {
// this.renderTable(
// evals[0].meta.evalite!.results.map((result) => ({
// input: result.input,
// output: result.output,
// score: average(result.scores, (s) => s.score ?? 0),
// }))
// );
// }
// }
protected override printTask(file: RunnerTask): void {
// Tasks can be files or individual tests, and
// this ensures we only print files
if (
!("filepath" in file) ||
!file.result?.state ||
file.result?.state === "run"
) {
return;
}

const tests = getTests(file);

const hasNoEvalite = tests.every((t) => !t.meta.evalite);

if (hasNoEvalite) {
return super.printTask(file);
}

const scores: number[] = [];

const failed = tests.some((t) => t.result?.state === "fail");

for (const { meta } of tests) {
if (meta.evalite) {
scores.push(...meta.evalite!.result.scores.map((s) => s.score ?? 0));
}
}

const totalScore = scores.reduce((a, b) => a + b, 0);
const averageScore = totalScore / scores.length;

const title = failed ? c.red("✖") : displayScore(averageScore);

const toLog = [
` ${title} `,
`${file.name} `,
c.dim(
`(${file.tasks.length} ${file.tasks.length > 1 ? "evals" : "eval"})`
),
];

// if (task.result.duration) {
// toLog.push(" " + c.dim(`${Math.round(task.result.duration ?? 0)}ms`));
// }

this.ctx.logger.log(toLog.join(""));
}

override reportTestSummary(files: RunnerTestFile[], errors: unknown[]): void {
/**
* These tasks are the actual tests that were run
*/
const tests = getTests(files);

const scores = tests.flatMap((test) =>
test.meta.evalite?.result.scores.map((s) => s.score ?? 0)
);

const totalScore = sum(scores, (score) => score ?? 0);
const averageScore = totalScore / scores.length;

const collectTime = files.reduce((a, b) => a + (b.collectDuration || 0), 0);
const testsTime = files.reduce((a, b) => a + (b.result?.duration || 0), 0);
const setupTime = files.reduce((a, b) => a + (b.setupDuration || 0), 0);

const totalDuration = collectTime + testsTime + setupTime;

const failedTasks = files.filter((file) => {
return file.tasks.some((task) => task.result?.state === "fail");
});

const scoreDisplay =
failedTasks.length > 0
? c.red("✖ ") + c.dim(`(${failedTasks.length} failed)`)
: displayScore(averageScore);

this.ctx.logger.log(
[" ", c.dim("Score"), " ", scoreDisplay].join("")
);

this.ctx.logger.log(
[" ", c.dim("Eval Files"), " ", files.length].join("")
);

this.ctx.logger.log(
[
" ",
c.dim("Evals"),
" ",
files.reduce((a, b) => a + b.tasks.length, 0),
].join("")
);

this.ctx.logger.log(
[" ", c.dim("Duration"), " ", `${Math.round(totalDuration)}ms`].join(
""
)
);

const totalFiles = new Set(files.map((f) => f.filepath)).size;

if (totalFiles === 1 && failedTasks.length === 0) {
this.renderTable(
tests
.filter((t) => typeof t.meta.evalite === "object")
.map((t) => t.meta.evalite!.result)
.map((result) => ({
input: result.input,
output: result.output,
score: average(result.scores, (s) => s.score ?? 0),
}))
);
}
}

private renderTable(
props: {
Expand Down Expand Up @@ -330,10 +333,13 @@ export default class EvaliteReporter extends BasicReporter {

startingTestFiles.forEach((file) => this.onTestFilePrepare(file));
startingTests.forEach((test) => this.onTestStart(test));

super.onTaskUpdate(packs);
}
}

const displayScore = (score: number) => {
const displayScore = (_score: number) => {
const score = Number.isNaN(_score) ? 0 : _score;
const percentageScore = Math.round(score * 100);
if (percentageScore >= 80) {
return c.bold(c.green(percentageScore + "%"));
Expand Down
Loading

0 comments on commit e304201

Please sign in to comment.