Skip to content

Commit

Permalink
Showing 8 changed files with 610 additions and 31 deletions.
11 changes: 11 additions & 0 deletions apps/evalite-ui/app/components/display-input.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import ReactMarkdown from "react-markdown";

export const DisplayInput = (props: { input: unknown }) => {
if (typeof props.input === "string") {
return (
<ReactMarkdown className="prose prose-sm">{props.input}</ReactMarkdown>
);
}

return JSON.stringify(props.input, null, 2);
};
40 changes: 20 additions & 20 deletions apps/evalite-ui/app/components/ui/table.tsx
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import * as React from "react"
import * as React from "react";

import { cn } from "~/lib/utils"
import { cn } from "~/lib/utils";

const Table = React.forwardRef<
HTMLTableElement,
@@ -13,16 +13,16 @@ const Table = React.forwardRef<
{...props}
/>
</div>
))
Table.displayName = "Table"
));
Table.displayName = "Table";

const TableHeader = React.forwardRef<
HTMLTableSectionElement,
React.HTMLAttributes<HTMLTableSectionElement>
>(({ className, ...props }, ref) => (
<thead ref={ref} className={cn("[&_tr]:border-b", className)} {...props} />
))
TableHeader.displayName = "TableHeader"
));
TableHeader.displayName = "TableHeader";

const TableBody = React.forwardRef<
HTMLTableSectionElement,
@@ -33,8 +33,8 @@ const TableBody = React.forwardRef<
className={cn("[&_tr:last-child]:border-0", className)}
{...props}
/>
))
TableBody.displayName = "TableBody"
));
TableBody.displayName = "TableBody";

const TableFooter = React.forwardRef<
HTMLTableSectionElement,
@@ -48,8 +48,8 @@ const TableFooter = React.forwardRef<
)}
{...props}
/>
))
TableFooter.displayName = "TableFooter"
));
TableFooter.displayName = "TableFooter";

const TableRow = React.forwardRef<
HTMLTableRowElement,
@@ -63,8 +63,8 @@ const TableRow = React.forwardRef<
)}
{...props}
/>
))
TableRow.displayName = "TableRow"
));
TableRow.displayName = "TableRow";

const TableHead = React.forwardRef<
HTMLTableCellElement,
@@ -78,20 +78,20 @@ const TableHead = React.forwardRef<
)}
{...props}
/>
))
TableHead.displayName = "TableHead"
));
TableHead.displayName = "TableHead";

const TableCell = React.forwardRef<
HTMLTableCellElement,
React.TdHTMLAttributes<HTMLTableCellElement>
>(({ className, ...props }, ref) => (
<td
ref={ref}
className={cn("p-4 align-middle [&:has([role=checkbox])]:pr-0", className)}
className={cn("p-4 align-top [&:has([role=checkbox])]:pr-0", className)}
{...props}
/>
))
TableCell.displayName = "TableCell"
));
TableCell.displayName = "TableCell";

const TableCaption = React.forwardRef<
HTMLTableCaptionElement,
@@ -102,8 +102,8 @@ const TableCaption = React.forwardRef<
className={cn("mt-4 text-sm text-muted-foreground", className)}
{...props}
/>
))
TableCaption.displayName = "TableCaption"
));
TableCaption.displayName = "TableCaption";

export {
Table,
@@ -114,4 +114,4 @@ export {
TableRow,
TableCell,
TableCaption,
}
};
38 changes: 30 additions & 8 deletions apps/evalite-ui/app/routes/eval.$name.tsx
Original file line number Diff line number Diff line change
@@ -2,6 +2,7 @@ import { getEvalRunsByName } from "@evalite/core/sdk";
import type { MetaFunction } from "@remix-run/node";
import { useLoaderData, type ClientLoaderFunctionArgs } from "@remix-run/react";
import { useContext } from "react";
import { DisplayInput } from "~/components/display-input";
import { InnerPageLayout } from "~/components/page-header";
import { getScoreState, Score } from "~/components/score";
import {
@@ -13,6 +14,7 @@ import {
TableHeader,
TableRow,
} from "~/components/ui/table";
import { cn } from "~/lib/utils";
import { TestServerStateContext } from "~/use-subscribe-to-socket";

export const meta: MetaFunction<typeof clientLoader> = (args) => {
@@ -40,32 +42,52 @@ export default function Page() {

const serverState = useContext(TestServerStateContext);

const showExpectedColumn = evaluation.results.every(
(result) => result.expected !== undefined
);

return (
<InnerPageLayout title={name}>
<Table>
<TableHeader>
<TableRow>
<TableHead>Input</TableHead>
<TableHead>Output</TableHead>
<TableHead>Expected</TableHead>
{firstResult?.scores.map((scorer) => (
<TableHead key={scorer.name}>{scorer.name}</TableHead>
{showExpectedColumn && <TableHead>Expected</TableHead>}
{firstResult?.scores.map((scorer, index) => (
<TableHead
key={scorer.name}
className={cn(index === 0 && "border-l")}
>
{scorer.name}
</TableHead>
))}
</TableRow>
</TableHeader>
<TableBody>
{evaluation.results.map((result) => {
return (
<TableRow key={result.input as any}>
<TableCell>{result.input as any}</TableCell>
<TableCell>{result.result as any}</TableCell>
<TableCell>{result.expected as any}</TableCell>
{result.scores.map((scorer) => {
<TableCell>
<DisplayInput input={result.input} />
</TableCell>
<TableCell>
<DisplayInput input={result.result} />
</TableCell>
{showExpectedColumn && (
<TableCell>
<DisplayInput input={result.expected} />
</TableCell>
)}
{result.scores.map((scorer, index) => {
const scoreInPreviousEvaluation = prevEvaluation?.results
.find((r) => r.input === result.input)
?.scores.find((s) => s.name === scorer.name);
return (
<TableCell key={scorer.name}>
<TableCell
key={scorer.name}
className={cn(index === 0 && "border-l")}
>
<Score
score={scorer.score ?? 0}
isRunning={
2 changes: 2 additions & 0 deletions apps/evalite-ui/package.json
Original file line number Diff line number Diff line change
@@ -31,11 +31,13 @@
"react": "^18.2.0",
"react-day-picker": "8.10.1",
"react-dom": "^18.2.0",
"react-markdown": "^9.0.1",
"tailwind-merge": "^2.5.5",
"tailwindcss-animate": "^1.0.7"
},
"devDependencies": {
"@remix-run/dev": "^2.14.0",
"@tailwindcss/typography": "^0.5.15",
"@types/react": "^18.2.20",
"@types/react-dom": "^18.2.7",
"@typescript-eslint/eslint-plugin": "^6.7.4",
2 changes: 1 addition & 1 deletion apps/evalite-ui/tailwind.config.ts
Original file line number Diff line number Diff line change
@@ -76,5 +76,5 @@ export default {
},
},
},
plugins: [require("tailwindcss-animate")],
plugins: [require("tailwindcss-animate"), require("@tailwindcss/typography")],
} satisfies Config;
2 changes: 1 addition & 1 deletion packages/evalite/src/index.ts
Original file line number Diff line number Diff line change
@@ -76,7 +76,7 @@ export const evalite = <TInput, TExpected>(
});
};

export const createScorer = <TInput, TExpected>(
export const createScorer = <TInput, TExpected = TInput>(
name: string,
scorer: (
input: Evalite.ScoreInput<TInput, TExpected>
8 changes: 7 additions & 1 deletion packages/example/src/content-generation.eval.ts
Original file line number Diff line number Diff line change
@@ -21,6 +21,13 @@ evalite("Content generation", {
{
input: "Write a tweet about TypeScript template literal types.",
},
{
input: 'Write a tweet about "TypeScript is a superset of JavaScript."',
},
{
input:
"Write a short article about whether TypeScript is a linter or not. Use markdown formatting.",
},
];
},
task: async (input) => {
@@ -40,7 +47,6 @@ evalite("Content generation", {
return result.text;
},
scorers: [
Humor,
createScorer("No Hashtags", ({ output }) => {
return output.includes("#") ? 0 : 1;
}),
538 changes: 538 additions & 0 deletions pnpm-lock.yaml

Large diffs are not rendered by default.

0 comments on commit a602536

Please sign in to comment.