Skip to content

Commit

Permalink
Various fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
mattpocock committed Dec 4, 2024
1 parent 66ca1d9 commit b8d4db4
Show file tree
Hide file tree
Showing 9 changed files with 86 additions and 21 deletions.
4 changes: 2 additions & 2 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@
"^@radix-ui",
"^cmdk$"
],
"typescript.preferences.includePackageJsonAutoImports": "on",
}
"typescript.preferences.includePackageJsonAutoImports": "on"
}
16 changes: 16 additions & 0 deletions apps/evalite-ui/app/components/score.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,19 @@ export const Score = (props: {
</span>
);
};

export const getScoreState = (
score: number,
prevScore: number | null | undefined
) => {
const state: ScoreState =
typeof prevScore === "undefined" || prevScore === null
? "first"
: score > prevScore
? "up"
: score < prevScore
? "down"
: "same";

return state;
};
10 changes: 2 additions & 8 deletions apps/evalite-ui/app/root.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ import {
TestServerStateContext,
useSubscribeToTestServer,
} from "./use-subscribe-to-socket";
import { Score, type ScoreState } from "./components/score";
import { getScoreState, Score, type ScoreState } from "./components/score";

export const links: LinksFunction = () => [
{ rel: "preconnect", href: "https://fonts.googleapis.com" },
Expand Down Expand Up @@ -79,13 +79,7 @@ export const clientLoader = async () => {

const score = mostRecentEval.score;

const state: ScoreState = !secondMostRecentEval
? "first"
: score > secondMostRecentEval.score
? "up"
: score < secondMostRecentEval.score
? "down"
: "same";
const state = getScoreState(score, secondMostRecentEval?.score);
return {
name: key,
state,
Expand Down
17 changes: 12 additions & 5 deletions apps/evalite-ui/app/routes/eval.$name.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import type { MetaFunction } from "@remix-run/node";
import { useLoaderData, type ClientLoaderFunctionArgs } from "@remix-run/react";
import { useContext } from "react";
import { InnerPageLayout } from "~/components/page-header";
import { Score } from "~/components/score";
import { getScoreState, Score } from "~/components/score";
import {
Table,
TableBody,
Expand Down Expand Up @@ -33,9 +33,10 @@ export const clientLoader = async (args: ClientLoaderFunctionArgs) => {
};

export default function Page() {
const { name, evaluation } = useLoaderData<typeof clientLoader>();
const { name, evaluation, prevEvaluation } =
useLoaderData<typeof clientLoader>();

const firstResult = evaluation.results[0]!;
const firstResult = evaluation.results[0];

const serverState = useContext(TestServerStateContext);

Expand All @@ -47,7 +48,7 @@ export default function Page() {
<TableHead>Input</TableHead>
<TableHead>Output</TableHead>
<TableHead>Expected</TableHead>
{firstResult.scores.map((scorer) => (
{firstResult?.scores.map((scorer) => (
<TableHead key={scorer.name}>{scorer.name}</TableHead>
))}
</TableRow>
Expand All @@ -60,6 +61,9 @@ export default function Page() {
<TableCell>{result.result as any}</TableCell>
<TableCell>{result.expected as any}</TableCell>
{result.scores.map((scorer) => {
const scoreInPreviousEvaluation = prevEvaluation?.results
.find((r) => r.input === result.input)
?.scores.find((s) => s.name === scorer.name);
return (
<TableCell key={scorer.name}>
<Score
Expand All @@ -68,7 +72,10 @@ export default function Page() {
serverState.state.type === "running" &&
serverState.state.filepaths.has(evaluation.filepath)
}
state="up" // TODO
state={getScoreState(
scorer.score ?? 0,
scoreInPreviousEvaluation?.score
)}
/>
</TableCell>
);
Expand Down
4 changes: 0 additions & 4 deletions packages/evalite/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,6 @@ export const evalite = <TInput, TExpected>(
opts: Evalite.RunnerOpts<TInput, TExpected>
) => {
return it(testName, async ({ task }) => {
if (opts.scorers.length === 0) {
throw new Error("You must provide at least one scorer.");
}

const traces: Evalite.StoredTrace[] = [];

reportTraceLocalStorage.enterWith((trace) => traces.push(trace));
Expand Down
2 changes: 1 addition & 1 deletion packages/evalite/src/reporter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ export default class EvaliteReporter extends BasicReporter {
// super.onTaskUpdate(packs);
// }

override onWatcherStart(files: RunnerTestFile[], errors?: unknown[]): void {
override onWatcherStart(files?: RunnerTestFile[], errors?: unknown[]): void {
super.onWatcherStart(files, errors);
}

Expand Down
2 changes: 1 addition & 1 deletion packages/evalite/src/run-vitest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ export const runVitest = async (opts: {
},
}),
],
slowTestThreshold: 30_000,
testTimeout: 30_000,
},
{},
{
Expand Down
48 changes: 48 additions & 0 deletions packages/example/src/content-generation.eval.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import { generateText } from "ai";
import { createScorer, evalite } from "evalite";
import { cacheModel } from "./cache-model";
import { openai } from "@ai-sdk/openai";
import { createStorage } from "unstorage";
import fsDriver from "unstorage/drivers/fs";
import { Humor } from "autoevals";

const storage = createStorage({
driver: (fsDriver as any)({
base: "./llm-cache.local",
}),
});

evalite("Content generation", {
data: async () => {
return [
{
input: "Write a TypeScript tweet",
},
{
input: "Write a tweet about TypeScript template literals types.",
},
];
},
task: async (input) => {
const result = await generateText({
model: cacheModel(openai("gpt-4o-mini"), storage),
prompt: input,
system: `
You are a helpful social media assistant.
You will be asked to write a tweet on a given topic.
Return only the tweet.
Do not use emojis.
Do not use hashtags.
Use code examples where required.
`,
});

return result.text;
},
scorers: [
Humor,
createScorer("No Hashtags", ({ output }) => {
return output.includes("#") ? 0 : 1;
}),
],
});
4 changes: 4 additions & 0 deletions packages/example/src/example.eval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ evalite("Test Capitals", {
input: `Name all the capitals of each part of the UK.`,
expected: `London, Edinburgh, Cardiff, Belfast`,
},
{
input: `What's the capital of Antarctica?`,
expected: `Antarctica has no capital.`,
},
],
task: async (input) => {
const result = await generateText({
Expand Down

0 comments on commit b8d4db4

Please sign in to comment.