Skip to content

Commit

Permalink
Merge pull request #1 from mattpocock/matt/only
Browse files Browse the repository at this point in the history
Matt/only
  • Loading branch information
mattpocock authored Dec 3, 2024
2 parents ab6850f + 72d5e7f commit 90c98a0
Show file tree
Hide file tree
Showing 11 changed files with 312 additions and 39 deletions.
2 changes: 2 additions & 0 deletions .husky/pre-commit
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
cp packages/evalite/readme.md readme.md
git add readme.md
6 changes: 4 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
"ci": "turbo build test lint",
"build": "turbo build",
"release": "pnpm run ci && changeset publish",
"test-example": "cd packages/example && evalite"
"test-example": "cd packages/example && evalite",
"prepare": "husky"
},
"keywords": [],
"author": "Matt Pocock",
Expand All @@ -26,7 +27,8 @@
"tsx": "^4.19.0",
"turbo": "2.3.3",
"typescript": "5.6.2",
"vitest": "^2.0.5"
"vitest": "^2.0.5",
"husky": "^9.1.7"
},
"resolutions": {
"typescript": "5.6.2"
Expand Down
8 changes: 8 additions & 0 deletions packages/evalite-core/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@ export declare namespace Evalite {
duration: number;
};

export interface Runner {
<TInput, TExpected>(
testName: string,
runnerOpts: RunnerOpts<TInput, TExpected>
): void;
only: Runner;
}

export type Score = {
/**
* A number between 0 and 1.
Expand Down
4 changes: 2 additions & 2 deletions packages/evalite-core/src/json-db.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ export const appendToJsonDb = async (opts: {
const jsonDbTask: JsonDBEval = {
name: task.name,
score: average(task.meta.evalite?.results || [], (t) => {
return average(t.scores, (s) => s.score);
return average(t.scores, (s) => s.score ?? 0);
}),
duration: task.meta.evalite?.duration ?? 0,
results: [],
Expand All @@ -58,7 +58,7 @@ export const appendToJsonDb = async (opts: {
expected,
scores,
duration,
score: average(scores, (s) => s.score),
score: average(scores, (s) => s.score ?? 0),
traces: task.meta.evalite.traces,
});
}
Expand Down
2 changes: 2 additions & 0 deletions packages/evalite/readme.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
<!-- packages/evalite/readme.md is the source of truth for this file -->

# Evalite

The TypeScript-native, open-source tool for testing LLM-powered apps.
Expand Down
88 changes: 53 additions & 35 deletions packages/evalite/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import type { Evalite } from "@evalite/core";
import { inject, it } from "vitest";
import { inject, it, type Test } from "vitest";
import { reportTraceLocalStorage } from "./traces.js";

declare module "vitest" {
Expand Down Expand Up @@ -32,46 +32,64 @@ const runTask = async <TInput, TExpected>(opts: {
};
};

export const evalite = <TInput, TExpected>(
testName: string,
const runEval = async <TInput, TExpected>(
task: Readonly<Test>,
opts: Evalite.RunnerOpts<TInput, TExpected>
) => {
return it(testName, async ({ task }) => {
if (opts.scorers.length === 0) {
throw new Error("You must provide at least one scorer.");
}
if (opts.scorers.length === 0) {
throw new Error("You must provide at least one scorer.");
}

const traces: Evalite.StoredTrace[] = [];
const traces: Evalite.StoredTrace[] = [];

reportTraceLocalStorage.enterWith((trace) => traces.push(trace));
reportTraceLocalStorage.enterWith((trace) => traces.push(trace));

const sourceCodeHash = inject("evaliteInputHash");
const sourceCodeHash = inject("evaliteInputHash");

const data = await opts.data();
const start = performance.now();
const results = await Promise.all(
data.map(async ({ input, expected }): Promise<Evalite.Result> => {
const { result, scores, duration } = await runTask({
expected,
input,
scores: opts.scorers,
task: opts.task,
});
const data = await opts.data();
const start = performance.now();
const results = await Promise.all(
data.map(async ({ input, expected }): Promise<Evalite.Result> => {
const { result, scores, duration } = await runTask({
expected,
input,
scores: opts.scorers,
task: opts.task,
});

return {
input,
result,
scores,
duration,
expected,
};
})
);
task.meta.evalite = {
results,
duration: Math.round(performance.now() - start),
sourceCodeHash,
traces,
};
return {
input,
result,
scores,
duration,
expected,
};
})
);
task.meta.evalite = {
results,
duration: Math.round(performance.now() - start),
sourceCodeHash,
traces,
};
};

function evaliteBase<TInput, TExpected>(
testName: string,
opts: Evalite.RunnerOpts<TInput, TExpected>
) {
return it(testName, async ({ task }) => {
await runEval(task, opts);
});
}

evaliteBase.only = function evaliteOnly<TInput, TExpected>(
testName: string,
opts: Evalite.RunnerOpts<TInput, TExpected>
) {
return it.only(testName, async ({ task }) => {
await runEval(task, opts);
});
};

export const evalite = evaliteBase as Evalite.Runner;
18 changes: 18 additions & 0 deletions packages/evalite/src/tests/fixtures/only/only-2.eval.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import { evalite } from "../../../index.js";
import { reportTrace } from "../../../traces.js";
import { Levenshtein } from "autoevals";

evalite("Also Not Run", {
data: () => {
return [
{
input: "abc",
expected: "abcdef",
},
];
},
task: async (input) => {
return input + "def";
},
scorers: [Levenshtein],
});
32 changes: 32 additions & 0 deletions packages/evalite/src/tests/fixtures/only/only.eval.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import { Levenshtein } from "autoevals";
import { evalite } from "../../../index.js";

evalite.only("Only", {
data: () => {
return [
{
input: "abc",
expected: "abcdef",
},
];
},
task: async (input) => {
return input + "def";
},
scorers: [Levenshtein],
});

evalite("Not Run", {
data: () => {
return [
{
input: "abc",
expected: "abcdef",
},
];
},
task: async (input) => {
return input + "def";
},
scorers: [Levenshtein],
});
26 changes: 26 additions & 0 deletions packages/evalite/src/tests/only.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import { getJsonDbEvals } from "@evalite/core";
import { assert, expect, it } from "vitest";
import { runVitest } from "../command.js";
import { captureStdout, loadFixture } from "./test-utils.js";

it.only("Should only run the targeted eval", async () => {
using fixture = loadFixture("only");

const captured = captureStdout();

await runVitest({
cwd: fixture.dir,
path: undefined,
testOutputWritable: captured.writable,
});

console.log(captured.getOutput());

const evals = await getJsonDbEvals({
dbLocation: fixture.jsonDbLocation,
});

expect(evals["Only"]).toBeDefined();
expect(evals["Not Run"]).toBeUndefined();
expect(evals["Also Not Run"]).toBeUndefined();
});
10 changes: 10 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 90c98a0

Please sign in to comment.