Skip to content

Commit

Permalink
Reapply "Added only, but failing tests"
Browse files Browse the repository at this point in the history
This reverts commit 5c68369.
  • Loading branch information
mattpocock committed Dec 3, 2024
1 parent 5c68369 commit e2cdac1
Show file tree
Hide file tree
Showing 5 changed files with 137 additions and 35 deletions.
8 changes: 8 additions & 0 deletions packages/evalite-core/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@ export declare namespace Evalite {
duration: number;
};

export interface Runner {
<TInput, TExpected>(
testName: string,
runnerOpts: RunnerOpts<TInput, TExpected>
): void;
only: Runner;
}

export type Score = {
/**
* A number between 0 and 1.
Expand Down
88 changes: 53 additions & 35 deletions packages/evalite/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import type { Evalite } from "@evalite/core";
import { inject, it } from "vitest";
import { inject, it, type Test } from "vitest";
import { reportTraceLocalStorage } from "./traces.js";

declare module "vitest" {
Expand Down Expand Up @@ -32,46 +32,64 @@ const runTask = async <TInput, TExpected>(opts: {
};
};

export const evalite = <TInput, TExpected>(
testName: string,
const runEval = async <TInput, TExpected>(
task: Readonly<Test>,
opts: Evalite.RunnerOpts<TInput, TExpected>
) => {
return it(testName, async ({ task }) => {
if (opts.scorers.length === 0) {
throw new Error("You must provide at least one scorer.");
}
if (opts.scorers.length === 0) {
throw new Error("You must provide at least one scorer.");
}

const traces: Evalite.StoredTrace[] = [];
const traces: Evalite.StoredTrace[] = [];

reportTraceLocalStorage.enterWith((trace) => traces.push(trace));
reportTraceLocalStorage.enterWith((trace) => traces.push(trace));

const sourceCodeHash = inject("evaliteInputHash");
const sourceCodeHash = inject("evaliteInputHash");

const data = await opts.data();
const start = performance.now();
const results = await Promise.all(
data.map(async ({ input, expected }): Promise<Evalite.Result> => {
const { result, scores, duration } = await runTask({
expected,
input,
scores: opts.scorers,
task: opts.task,
});
const data = await opts.data();
const start = performance.now();
const results = await Promise.all(
data.map(async ({ input, expected }): Promise<Evalite.Result> => {
const { result, scores, duration } = await runTask({
expected,
input,
scores: opts.scorers,
task: opts.task,
});

return {
input,
result,
scores,
duration,
expected,
};
})
);
task.meta.evalite = {
results,
duration: Math.round(performance.now() - start),
sourceCodeHash,
traces,
};
return {
input,
result,
scores,
duration,
expected,
};
})
);
task.meta.evalite = {
results,
duration: Math.round(performance.now() - start),
sourceCodeHash,
traces,
};
};

function evaliteBase<TInput, TExpected>(
testName: string,
opts: Evalite.RunnerOpts<TInput, TExpected>
) {
return it(testName, async ({ task }) => {
await runEval(task, opts);
});
}

evaliteBase.only = function evaliteOnly<TInput, TExpected>(
testName: string,
opts: Evalite.RunnerOpts<TInput, TExpected>
) {
return it.only(testName, async ({ task }) => {
await runEval(task, opts);
});
};

export const evalite = evaliteBase as Evalite.Runner;
18 changes: 18 additions & 0 deletions packages/evalite/src/tests/fixtures/only/only-2.eval.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import { evalite } from "../../../index.js";
import { reportTrace } from "../../../traces.js";
import { Levenshtein } from "autoevals";

evalite("Also Not Run", {
data: () => {
return [
{
input: "abc",
expected: "abcdef",
},
];
},
task: async (input) => {
return input + "def";
},
scorers: [Levenshtein],
});
32 changes: 32 additions & 0 deletions packages/evalite/src/tests/fixtures/only/only.eval.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import { Levenshtein } from "autoevals";
import { evalite } from "../../../index.js";

evalite.only("Only", {
data: () => {
return [
{
input: "abc",
expected: "abcdef",
},
];
},
task: async (input) => {
return input + "def";
},
scorers: [Levenshtein],
});

evalite("Not Run", {
data: () => {
return [
{
input: "abc",
expected: "abcdef",
},
];
},
task: async (input) => {
return input + "def";
},
scorers: [Levenshtein],
});
26 changes: 26 additions & 0 deletions packages/evalite/src/tests/only.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import { getJsonDbEvals } from "@evalite/core";
import { assert, expect, it } from "vitest";
import { runVitest } from "../command.js";
import { captureStdout, loadFixture } from "./test-utils.js";

it.only("Should only run the targeted eval", async () => {
using fixture = loadFixture("only");

const captured = captureStdout();

await runVitest({
cwd: fixture.dir,
path: undefined,
testOutputWritable: captured.writable,
});

console.log(captured.getOutput());

const evals = await getJsonDbEvals({
dbLocation: fixture.jsonDbLocation,
});

expect(evals["Only"]).toBeDefined();
expect(evals["Not Run"]).toBeUndefined();
expect(evals["Also Not Run"]).toBeUndefined();
});

0 comments on commit e2cdac1

Please sign in to comment.