Skip to content

Commit

Permalink
Added durations
Browse files Browse the repository at this point in the history
  • Loading branch information
Matt Pocock committed Nov 12, 2024
1 parent 7494973 commit d4ab048
Show file tree
Hide file tree
Showing 8 changed files with 770 additions and 28 deletions.
5 changes: 5 additions & 0 deletions packages/evalite-vitest/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export declare namespace Evalite {
input: unknown;
result: unknown;
scores: Score[];
duration: number;
};

export type TaskReport = {
Expand Down Expand Up @@ -54,7 +55,9 @@ export const evalite = <T>(testName: string, opts: Evalite.RunnerOpts<T>) => {
}
task.meta.evalite = { results: [] };
for (const { input, expected } of await opts.data()) {
const start = performance.now();
const result = await opts.task(input);
const duration = Math.round(performance.now() - start);

const scores: {
score: number;
Expand All @@ -69,12 +72,14 @@ export const evalite = <T>(testName: string, opts: Evalite.RunnerOpts<T>) => {
input,
result,
scores,
duration,
});

task.file.meta.evalite.results.push({
input,
result,
scores,
duration,
});
}
});
Expand Down
80 changes: 76 additions & 4 deletions packages/evalite-vitest/src/reporter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@ import { BasicReporter } from "vitest/reporters";
import type { Evalite } from "./index.js";

import c from "tinyrainbow";
import { writeFile } from "fs/promises";

export const sum = <T>(arr: T[], fn: (item: T) => number | undefined) => {
return arr.reduce((a, b) => a + (fn(b) || 0), 0);
};

export const average = <T>(arr: T[], fn: (item: T) => number | undefined) => {
return sum(arr, fn) / arr.length;
};

export default class EvaliteReporter extends BasicReporter {
override onInit(ctx: any): void {
Expand All @@ -17,23 +26,78 @@ export default class EvaliteReporter extends BasicReporter {
) => {
const data: Evalite.TaskReport[] = [];

type ReadableTask = {
task: string;
score: number;
duration: number;
results: {
input: unknown;
result: unknown;
scores: Evalite.Score[];
duration: number;
}[];
};

type ReadableFile = {
file: string;
score: number;
tasks: ReadableTask[];
};

const readableReports: ReadableFile[] = [];

for (const file of files) {
const report: ReadableFile = {
file: file.name,
score: average(file.tasks, (task) => {
return average(task.meta.evalite?.results || [], (t) => {
return average(t.scores, (s) => s.score);
});
}),
tasks: [],
};
for (const task of file.tasks) {
const readableTask: ReadableTask = {
task: task.name,
score: average(task.meta.evalite?.results || [], (t) => {
return average(t.scores, (s) => s.score);
}),
duration: sum(task.meta.evalite?.results || [], (t) => t.duration),
results: [],
};

if (task.meta.evalite) {
for (const { input, result, scores } of task.meta.evalite.results) {
for (const { input, result, scores, duration } of task.meta.evalite
.results) {
data.push({
file: file.name,
task: task.name,
input,
result,
scores,
});

readableTask.results.push({
input,
result,
scores,
duration,
});
}
}

report.tasks.push(readableTask);
}

readableReports.push(report);
}

// this.ctx.logger.log("TODO: Report Run");
await writeFile(
"./report.json",
JSON.stringify(readableReports, null, 2),
"utf-8"
);

super.onFinished(files, errors);
};

Expand All @@ -50,7 +114,13 @@ export default class EvaliteReporter extends BasicReporter {

const scores: number[] = [];

for (const { meta } of task.tasks) {
let failed = false;

for (const { meta, result } of task.tasks) {
if ((result?.errors?.length || 0) > 0) {
failed = true;
break;
}
if (meta.evalite) {
scores.push(
...meta.evalite!.results.flatMap((r) => r.scores.map((s) => s.score))
Expand All @@ -64,8 +134,10 @@ export default class EvaliteReporter extends BasicReporter {
const color =
averageScore >= 80 ? c.green : averageScore >= 50 ? c.yellow : c.red;

const title = failed ? c.red("✖") : c.bold(color(averageScore + "%"));

const toLog = [
` ${c.bold(color(averageScore + "%"))} `,
` ${title} `,
`${task.name} `,
c.dim(
`(${task.tasks.length} ${task.tasks.length > 1 ? "evals" : "eval"})`
Expand Down
5 changes: 4 additions & 1 deletion packages/example/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
"./reporter": "./dist/reporter.js"
},
"dependencies": {
"evalite-vitest": "workspace:*"
"evalite-vitest": "workspace:*",
"ai": "^3.4.33",
"@ai-sdk/openai": "0.0.72",
"dotenv": "^16.4.5"
}
}
70 changes: 70 additions & 0 deletions packages/example/report.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
[
{
"file": "src/index.test.ts",
"score": 1,
"tasks": [
{
"task": "Add 'world' to end of pharse",
"score": 1,
"duration": 1997,
"results": [
{
"input": "Hello",
"result": "Hello World",
"scores": [
{
"name": "Levenshtein",
"score": 1
}
],
"duration": 545
},
{
"input": "Hello Mr",
"result": "Hello Mr World",
"scores": [
{
"name": "Levenshtein",
"score": 1
}
],
"duration": 298
},
{
"input": "World",
"result": "World World",
"scores": [
{
"name": "Levenshtein",
"score": 1
}
],
"duration": 451
},
{
"input": "World World World World",
"result": "World World World World World",
"scores": [
{
"name": "Levenshtein",
"score": 1
}
],
"duration": 443
},
{
"input": "",
"result": "World",
"scores": [
{
"name": "Levenshtein",
"score": 1
}
],
"duration": 260
}
]
}
]
}
]
45 changes: 42 additions & 3 deletions packages/example/src/index.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import { openai } from "@ai-sdk/openai";
import { generateText } from "ai";
import { evalite, Levenshtein } from "evalite-vitest";

evalite("Testing my LLM awesomeness", {
evalite("Add 'world' to end of pharse", {
// Replace with your dataset
data: async () => [
{
Expand All @@ -9,12 +11,49 @@ evalite("Testing my LLM awesomeness", {
},
{
input: "Hello Mr",
expected: "Hello Mr World!!",
expected: "Hello Mr World",
},
{
input: "World",
expected: "World World",
},
{
input: "World World World World",
expected: "World World World World World",
},
{
input: "",
expected: "World",
},
],
// Replace with your LLM call
task: async (input) => {
return input + " World";
const result = await generateText({
model: openai("gpt-3.5-turbo"),
system: `
<instructions>Add "World" to the end of the input.</instructions>
<instructions>When an empty prompt is encountered, return "World".</instructions>
<example>
<input>Interesting</input>
<output>Interesting World</output>
</example>
<example>
<input>World</input>
<output>World World</output>
</example>
<example>
<input></input>
<output>World</output>
</example>
<example>
<input>This is the best place in the</input>
<output>This is the best place in the World</output>
</example>
`,
prompt: input,
});

return result.text;
},
scores: [Levenshtein],
});
18 changes: 0 additions & 18 deletions packages/example/src/longer-test-name.test.ts

This file was deleted.

1 change: 1 addition & 0 deletions packages/example/vite.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@ import { defineConfig } from "vitest/config";
export default defineConfig({
test: {
reporters: ["evalite-vitest/reporter"],
setupFiles: ["dotenv/config"],
},
});
Loading

0 comments on commit d4ab048

Please sign in to comment.