Skip to content

Commit

Permalink
Documented tracing
Browse files Browse the repository at this point in the history
  • Loading branch information
mattpocock committed Dec 3, 2024
1 parent 79d3bf7 commit 8eae50f
Show file tree
Hide file tree
Showing 8 changed files with 111 additions and 30 deletions.
17 changes: 11 additions & 6 deletions packages/evalite-core/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ export declare namespace Evalite {
results: Result[];
duration: number | undefined;
sourceCodeHash: string;
traces: Trace[];
traces: StoredTrace[];
};

export type Scorer<TExpected> = (
Expand All @@ -46,15 +46,20 @@ export declare namespace Evalite {
scorers: Scorer<TExpected>[];
};

export interface Trace {
export interface UserProvidedTrace {
prompt: TracePrompt[];
usage: {
promptTokens: number;
completionTokens: number;
};
usage:
| {
promptTokens: number;
completionTokens: number;
}
| undefined;
output: string;
start: number;
end: number;
}

export interface StoredTrace extends UserProvidedTrace {
duration: number;
}

Expand Down
2 changes: 1 addition & 1 deletion packages/evalite-core/src/json-db.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ export type JsonDbRun = {
scores: Evalite.Score[];
duration: number;
score: number;
traces: Evalite.Trace[];
traces: Evalite.UserProvidedTrace[];
};

export const appendToJsonDb = async (opts: {
Expand Down
4 changes: 3 additions & 1 deletion packages/evalite/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
},
"exports": {
".": "./dist/index.js",
"./reporter": "./dist/reporter.js"
"./reporter": "./dist/reporter.js",
"./traces": "./dist/traces.js",
"./ai-sdk": "./dist/ai-sdk.js"
},
"dependencies": {
"table": "^6.8.2",
Expand Down
89 changes: 77 additions & 12 deletions packages/evalite/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ The TypeScript-native, open-source tool for testing LLM-powered apps.

- Fully open source: **No API Key required**
- Based on Vitest
- Supports
- Supports tracing, custom scorers, and

## Quickstart

Expand Down Expand Up @@ -67,22 +67,87 @@ This runs `evalite`, which runs the evals:
- Runs the `data` function to get the test data
- Runs the `task` function on each test data
- Scores the output of the `task` function using the `scorers`
- Appends the result of the eval to a `evalite-report.jsonl` file

It then produces:
It then:

- A report of the
- If you only ran one eval, it also shows table summarizing the eval in the terminal
- Shows a UI for viewing the traces, scores, inputs and outputs at http://localhost:3006.
- If you only ran one eval, it also shows a table summarizing the eval in the terminal.

##
### 5. View Your Eval

I want a simple test runner that can:
Open http://localhost:3006 in your browser to view the results of the eval.

-Run my evals on a watch script
-Show me a UI for viewing traces, scores, inputs and outputs
-Not need me to sign up for an API key
## Guides

So, I'm building one.
### Traces

It's based on Vitest, and it's called Evalite.
Traces are used to track the behaviour of each individual call to an LLM inside your task.

Here's an [early preview](https://www.aihero.dev/evalite-an-early-preview).
You can report a trace by calling `reportTrace` inside an `evalite` eval:

```ts
import { evalite, type Evalite } from "evalite";
import { reportTrace } from "evalite/evals";

evalite("My Eval", {
data: async () => {
return [{ input: "Hello", output: "Hello World!" }];
},
task: async (input) => {
// Track the start time
const start = performance.now();

// Call our LLM
const result = await myLLMCall();

// Report the trace once it's finished
reportTrace({
start,
end: performance.now(),
output: result.output,
prompt: [
{
role: "user",
content: input,
},
],
usage: {
completionTokens: result.completionTokens,
promptTokens: result.promptTokens,
},
});

// Return the output
return result.output;
},
scorers: [Levenshtein],
});
```

> [!NOTE]
>
> `reportTrace` is a no-op in production, so you can leave it in your code without worrying about performance.
#### Reporting Traces Automatically

If you're using the [Vercel AI SDK](https://sdk.vercel.ai/docs/introduction), you can automatically report traces by wrapping your model in `traceAISDKModel` function:

```ts
import { traceAISDKModel } from "evalite/ai-sdk";
import { generateText } from "ai";
import { openai } from "@ai-sdk/openai";

// All calls to this model will be recorded in evalite!
const tracedModel = traceAISDKModel(openai("gpt-3.5-turbo"));

const result = await generateText({
model: tracedModel,
system: `Answer the question concisely.`,
prompt: `What is the capital of France?`,
});
```

> [!NOTE]
>
> `traceAISDKModel`, like `reportTrace`, is a no-op in production.
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import { experimental_wrapLanguageModel, type LanguageModelV1 } from "ai";
import { reportTrace } from "./traces.js";
import { reportTrace, shouldReportTrace } from "./traces.js";

export const traceAISDKModel = (model: LanguageModelV1) => {
export const traceAISDKModel = (model: LanguageModelV1): LanguageModelV1 => {
if (!shouldReportTrace()) return model;
return experimental_wrapLanguageModel({
model,
middleware: {
Expand All @@ -23,7 +24,7 @@ export const traceAISDKModel = (model: LanguageModelV1) => {
const content = prompt.content.map((content) => {
if (content.type !== "text") {
throw new Error(
`Unsupported content type: ${content.type}. Only text is currently supported.`
`Unsupported content type: ${content.type}. Only text is currently supported by traceAISDKModel.`
);
}

Expand All @@ -39,7 +40,6 @@ export const traceAISDKModel = (model: LanguageModelV1) => {
};
}),
usage: generated.usage,
duration: end - start,
start,
end,
});
Expand Down
2 changes: 1 addition & 1 deletion packages/evalite/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ export const evalite = <TInput, TExpected>(
throw new Error("You must provide at least one scorer.");
}

const traces: Evalite.Trace[] = [];
const traces: Evalite.StoredTrace[] = [];

reportTraceLocalStorage.enterWith((trace) => traces.push(trace));

Expand Down
2 changes: 0 additions & 2 deletions packages/evalite/src/tests/fixtures/traces/traces.eval.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import { evalite, Levenshtein, reportTrace } from "../../../index.js";
import { setTimeout } from "node:timers/promises";

evalite("Traces", {
data: () => {
Expand All @@ -12,7 +11,6 @@ evalite("Traces", {
},
task: async (input) => {
reportTrace({
duration: 100,
start: 0,
end: 100,
output: "abcdef",
Expand Down
17 changes: 14 additions & 3 deletions packages/evalite/src/traces.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,18 @@ import type { Evalite } from "@evalite/core";
import { AsyncLocalStorage } from "async_hooks";

export const reportTraceLocalStorage = new AsyncLocalStorage<
(trace: Evalite.Trace) => void
(trace: Evalite.StoredTrace) => void
>();

export const reportTrace = (trace: Evalite.Trace) => {
export const shouldReportTrace = (): boolean => {
return process.env.NODE_ENV === "test";
};

export const reportTrace = (trace: Evalite.UserProvidedTrace): void => {
if (!shouldReportTrace()) {
return;
}

const _reportTrace = reportTraceLocalStorage.getStore();

if (!_reportTrace) {
Expand All @@ -14,5 +22,8 @@ export const reportTrace = (trace: Evalite.Trace) => {
);
}

_reportTrace(trace);
_reportTrace({
...trace,
duration: trace.end - trace.start,
});
};

0 comments on commit 8eae50f

Please sign in to comment.