Skip to content

Commit

Permalink
Experiments with tuple task
Browse files Browse the repository at this point in the history
  • Loading branch information
mattpocock committed Dec 2, 2024
1 parent a5263dc commit 4664a9f
Show file tree
Hide file tree
Showing 12 changed files with 57 additions and 43 deletions.
21 changes: 19 additions & 2 deletions packages/evalite-core/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,28 @@ export declare namespace Evalite {
opts: ScoreInput<TExpected>
) => MaybePromise<Score>;

export type RunnerOpts<TInput, TExpected> = {
export type RunnerOpts<
TInput,
TExpected,
TImport extends Record<string, any>,
TKey extends keyof TImport,
> = {
data: () => MaybePromise<{ input: TInput; expected?: TExpected }[]>;
task: (input: TInput) => MaybePromise<TExpected>;
task: TImport[TKey] extends (input: TInput) => MaybePromise<TExpected>
? readonly [Promise<TImport>, TKey]
: ErrorMessageForRunnerOpts<TInput, TExpected, TImport[TKey]>;
scorers: Scorer<TExpected>[];
};

export type ErrorMessageForRunnerOpts<
TInput,
TExpected,
TFunc extends (input: any) => any,
> = TFunc extends (input: TInput) => any
? "Return type of function does not match expected value of task"
: TFunc extends (input: any) => PromiseLike<TExpected>
? "Input type of task does not match the dataset passed."
: "The task passed does not match the type definition for a task.";
}

export * from "./json-db.js";
14 changes: 11 additions & 3 deletions packages/evalite-vitest/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,15 @@ const runTask = async <TInput, TExpected>(opts: {
};
};

export const evalite = <TInput, TExpected>(
export const evalite = <
TInput,
TExpected,
TImport extends Record<string, any>,
TKey extends keyof TImport,
TExample = TImport[TKey],
>(
testName: string,
opts: Evalite.RunnerOpts<TInput, TExpected>
opts: Evalite.RunnerOpts<TInput, TExpected, TImport, TKey>
) => {
return it(testName, async ({ task }) => {
if (opts.scorers.length === 0) {
Expand All @@ -43,6 +49,8 @@ export const evalite = <TInput, TExpected>(

const sourceCodeHash = inject("evaliteInputHash");

const resolvedTask = ((await opts.task[0]) as any)[opts.task[1]];

const data = await opts.data();
const start = performance.now();
const results = await Promise.all(
Expand All @@ -51,7 +59,7 @@ export const evalite = <TInput, TExpected>(
expected,
input,
scores: opts.scorers,
task: opts.task,
task: resolvedTask,
});

return {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@ evalite("Basics", {
},
];
},
task: async (input) => {
// To test whether duration is calculated properly
await setTimeout(10);
return input + "def";
},
task: [import("./basics.js"), "basics"],
scorers: [Levenshtein],
});
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@ evalite("Failing", {
},
];
},
task: (input) => {
throw new Error("This is a failing test");
},
task: [import("./failing.js"), "failing"],
scorers: [Levenshtein],
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
export const failing = () => {
throw new Error("This is a failing test");
};
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,6 @@ evalite("Much Data", {
},
];
},
task: async (input) => {
// To test whether duration is calculated properly
await setTimeout(10);
return input + "def";
},
task: [import("./muchData.js"), "basics"],
scorers: [Levenshtein],
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import { setTimeout } from "timers/promises";

export async function basics(input: string) {
// To test whether duration is calculated properly
await setTimeout(10);
return input + "def";
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@ evalite("Multiple 1", {
},
];
},
task: async (input) => {
// To test whether duration is calculated properly
await setTimeout(10);
return input + "def";
},
task: [import("./multi.js"), "multi"],
scorers: [Levenshtein],
});
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@ evalite("Multiple 2", {
},
];
},
task: async (input) => {
// To test whether duration is calculated properly
await setTimeout(10);
return input + "def";
},
task: [import("./multi.js"), "multi"],
scorers: [Levenshtein],
});
12 changes: 2 additions & 10 deletions packages/evalite-vitest/src/tests/fixtures/multi/multi-3.eval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,7 @@ evalite("Multiple 3", {
},
];
},
task: async (input) => {
// To test whether duration is calculated properly
await setTimeout(10);
return input + "def";
},
task: [import("./multi.js"), "multi"],
scorers: [Levenshtein],
});

Expand All @@ -27,10 +23,6 @@ evalite("Multiple 4", {
},
];
},
task: async (input) => {
// To test whether duration is calculated properly
await setTimeout(10);
return input + "def";
},
task: [import("./multi.js"), "multi"],
scorers: [Levenshtein],
});
7 changes: 7 additions & 0 deletions packages/evalite-vitest/src/tests/fixtures/multi/multi.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import { setTimeout } from "timers/promises";

export async function multi(input: string) {
// To test whether duration is calculated properly
await setTimeout(10);
return input + "def";
}
8 changes: 3 additions & 5 deletions packages/evalite-vitest/src/tests/much-data.test.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import { readFileSync } from "fs";
import path from "path";
import { assert, expect, it } from "vitest";
import { getJsonDbEvals } from "@evalite/core";
import { expect, it } from "vitest";
import { runVitest } from "../command.js";
import { captureStdout, loadFixture } from "./test-utils.js";
import { getJsonDbEvals, getRows } from "@evalite/core";

it("Should report long datasets consistently in the same order", async () => {
using fixture = loadFixture("much-data");
Expand All @@ -20,7 +18,7 @@ it("Should report long datasets consistently in the same order", async () => {
dbLocation: fixture.jsonDbLocation,
});

expect(jsonDbEvals["Much Data"][0]!.results).toMatchObject([
expect(jsonDbEvals["Much Data"]![0]!.results).toMatchObject([
{
input: "first",
},
Expand Down

0 comments on commit 4664a9f

Please sign in to comment.