From e2cdac17294323d2afc48c46b1798f80139940c1 Mon Sep 17 00:00:00 2001 From: Matt Pocock Date: Tue, 3 Dec 2024 11:18:38 +0000 Subject: [PATCH] Reapply "Added only, but failing tests" This reverts commit 5c683698444364926790bfb3860cb2b14acff8dc. --- packages/evalite-core/src/index.ts | 8 ++ packages/evalite/src/index.ts | 88 +++++++++++-------- .../src/tests/fixtures/only/only-2.eval.ts | 18 ++++ .../src/tests/fixtures/only/only.eval.ts | 32 +++++++ packages/evalite/src/tests/only.test.ts | 26 ++++++ 5 files changed, 137 insertions(+), 35 deletions(-) create mode 100644 packages/evalite/src/tests/fixtures/only/only-2.eval.ts create mode 100644 packages/evalite/src/tests/fixtures/only/only.eval.ts create mode 100644 packages/evalite/src/tests/only.test.ts diff --git a/packages/evalite-core/src/index.ts b/packages/evalite-core/src/index.ts index 3b8df4b..d91a0cd 100644 --- a/packages/evalite-core/src/index.ts +++ b/packages/evalite-core/src/index.ts @@ -19,6 +19,14 @@ export declare namespace Evalite { duration: number; }; + export interface Runner { + ( + testName: string, + runnerOpts: RunnerOpts + ): void; + only: Runner; + } + export type Score = { /** * A number between 0 and 1. diff --git a/packages/evalite/src/index.ts b/packages/evalite/src/index.ts index 329d5d5..984fd25 100644 --- a/packages/evalite/src/index.ts +++ b/packages/evalite/src/index.ts @@ -1,5 +1,5 @@ import type { Evalite } from "@evalite/core"; -import { inject, it } from "vitest"; +import { inject, it, type Test } from "vitest"; import { reportTraceLocalStorage } from "./traces.js"; declare module "vitest" { @@ -32,46 +32,64 @@ const runTask = async (opts: { }; }; -export const evalite = ( - testName: string, +const runEval = async ( + task: Readonly, opts: Evalite.RunnerOpts ) => { - return it(testName, async ({ task }) => { - if (opts.scorers.length === 0) { - throw new Error("You must provide at least one scorer."); - } + if (opts.scorers.length === 0) { + throw new Error("You must provide at least one scorer."); + } - const traces: Evalite.StoredTrace[] = []; + const traces: Evalite.StoredTrace[] = []; - reportTraceLocalStorage.enterWith((trace) => traces.push(trace)); + reportTraceLocalStorage.enterWith((trace) => traces.push(trace)); - const sourceCodeHash = inject("evaliteInputHash"); + const sourceCodeHash = inject("evaliteInputHash"); - const data = await opts.data(); - const start = performance.now(); - const results = await Promise.all( - data.map(async ({ input, expected }): Promise => { - const { result, scores, duration } = await runTask({ - expected, - input, - scores: opts.scorers, - task: opts.task, - }); + const data = await opts.data(); + const start = performance.now(); + const results = await Promise.all( + data.map(async ({ input, expected }): Promise => { + const { result, scores, duration } = await runTask({ + expected, + input, + scores: opts.scorers, + task: opts.task, + }); - return { - input, - result, - scores, - duration, - expected, - }; - }) - ); - task.meta.evalite = { - results, - duration: Math.round(performance.now() - start), - sourceCodeHash, - traces, - }; + return { + input, + result, + scores, + duration, + expected, + }; + }) + ); + task.meta.evalite = { + results, + duration: Math.round(performance.now() - start), + sourceCodeHash, + traces, + }; +}; + +function evaliteBase( + testName: string, + opts: Evalite.RunnerOpts +) { + return it(testName, async ({ task }) => { + await runEval(task, opts); + }); +} + +evaliteBase.only = function evaliteOnly( + testName: string, + opts: Evalite.RunnerOpts +) { + return it.only(testName, async ({ task }) => { + await runEval(task, opts); }); }; + +export const evalite = evaliteBase as Evalite.Runner; diff --git a/packages/evalite/src/tests/fixtures/only/only-2.eval.ts b/packages/evalite/src/tests/fixtures/only/only-2.eval.ts new file mode 100644 index 0000000..38e2c91 --- /dev/null +++ b/packages/evalite/src/tests/fixtures/only/only-2.eval.ts @@ -0,0 +1,18 @@ +import { evalite } from "../../../index.js"; +import { reportTrace } from "../../../traces.js"; +import { Levenshtein } from "autoevals"; + +evalite("Also Not Run", { + data: () => { + return [ + { + input: "abc", + expected: "abcdef", + }, + ]; + }, + task: async (input) => { + return input + "def"; + }, + scorers: [Levenshtein], +}); diff --git a/packages/evalite/src/tests/fixtures/only/only.eval.ts b/packages/evalite/src/tests/fixtures/only/only.eval.ts new file mode 100644 index 0000000..07a1eab --- /dev/null +++ b/packages/evalite/src/tests/fixtures/only/only.eval.ts @@ -0,0 +1,32 @@ +import { Levenshtein } from "autoevals"; +import { evalite } from "../../../index.js"; + +evalite.only("Only", { + data: () => { + return [ + { + input: "abc", + expected: "abcdef", + }, + ]; + }, + task: async (input) => { + return input + "def"; + }, + scorers: [Levenshtein], +}); + +evalite("Not Run", { + data: () => { + return [ + { + input: "abc", + expected: "abcdef", + }, + ]; + }, + task: async (input) => { + return input + "def"; + }, + scorers: [Levenshtein], +}); diff --git a/packages/evalite/src/tests/only.test.ts b/packages/evalite/src/tests/only.test.ts new file mode 100644 index 0000000..91a99cc --- /dev/null +++ b/packages/evalite/src/tests/only.test.ts @@ -0,0 +1,26 @@ +import { getJsonDbEvals } from "@evalite/core"; +import { assert, expect, it } from "vitest"; +import { runVitest } from "../command.js"; +import { captureStdout, loadFixture } from "./test-utils.js"; + +it.only("Should only run the targeted eval", async () => { + using fixture = loadFixture("only"); + + const captured = captureStdout(); + + await runVitest({ + cwd: fixture.dir, + path: undefined, + testOutputWritable: captured.writable, + }); + + console.log(captured.getOutput()); + + const evals = await getJsonDbEvals({ + dbLocation: fixture.jsonDbLocation, + }); + + expect(evals["Only"]).toBeDefined(); + expect(evals["Not Run"]).toBeUndefined(); + expect(evals["Also Not Run"]).toBeUndefined(); +});