From 79d3bf75c0d502a56464a370a47168ba183cdbc5 Mon Sep 17 00:00:00 2001 From: Matt Pocock Date: Tue, 3 Dec 2024 09:42:10 +0000 Subject: [PATCH] Readme --- packages/evalite/readme.md | 75 +++++++++++++++++++++++++++++++- packages/evalite/src/reporter.ts | 4 +- 2 files changed, 76 insertions(+), 3 deletions(-) diff --git a/packages/evalite/readme.md b/packages/evalite/readme.md index cc4264a..4311c3b 100644 --- a/packages/evalite/readme.md +++ b/packages/evalite/readme.md @@ -1,6 +1,79 @@ # Evalite -It feels crazy to me that there's no TypeScript-native, local-first tool for testing LLM-powered apps. +The TypeScript-native, open-source tool for testing LLM-powered apps. + +- Fully open source: **No API Key required** +- Based on Vitest +- Supports + +## Quickstart + +### 1. Install `evalite` and `autoevals`: + +Install `evalite`, and a scoring library like `autoevals`: + +```bash +pnpm add -D evalite autoevals +``` + +### 2. Add an `eval` script: + +Add an `eval` script to your package.json: + +```json +{ + "scripts": { + "eval": "evalite" + } +} +``` + +### 3. Create your first eval: + +Create `my-eval.eval.ts`: + +```ts +// my-eval.eval.ts + +import { evalite } from "evalite"; +import { Levenshtein } from "autoevals"; + +evalite("My Eval", { + // A function that returns an array of test data + // - TODO: Replace with your test data + data: async () => { + return [{ input: "Hello", output: "Hello World!" }]; + }, + // The task to perform + // - TODO: Replace with your LLM call + task: async (input) => { + return input + " World!"; + }, + // The scoring methods for the eval + scorers: [Levenshtein], +}); +``` + +> [!NOTE] +> +> `.eval.ts` is the extension Evalite looks for when scanning for evals. + +### 4. Run Your Eval + +Run `pnpm run eval`. + +This runs `evalite`, which runs the evals: + +- Runs the `data` function to get the test data +- Runs the `task` function on each test data +- Scores the output of the `task` function using the `scorers` + +It then produces: + +- A report of the +- If you only ran one eval, it also shows table summarizing the eval in the terminal + +## I want a simple test runner that can: diff --git a/packages/evalite/src/reporter.ts b/packages/evalite/src/reporter.ts index 91eed35..0461a10 100644 --- a/packages/evalite/src/reporter.ts +++ b/packages/evalite/src/reporter.ts @@ -1,10 +1,10 @@ import type { RunnerTask, RunnerTestFile, TaskResultPack } from "vitest"; import { BasicReporter } from "vitest/reporters"; -import { appendToJsonDb, DEFAULT_SERVER_PORT } from "@evalite/core"; +import { appendToJsonDb } from "@evalite/core"; +import { table } from "table"; import c from "tinyrainbow"; import { average, sum } from "./utils.js"; -import { table } from "table"; export interface EvaliteReporterOptions { jsonDbLocation: string;