diff --git a/apps/evalite-docs/astro.config.mjs b/apps/evalite-docs/astro.config.mjs
index 7818e15..dd3a58c 100644
--- a/apps/evalite-docs/astro.config.mjs
+++ b/apps/evalite-docs/astro.config.mjs
@@ -8,18 +8,42 @@ export default defineConfig({
       title: "Evalite",
       social: {
         github: "https://github.com/mattpocock/evalite",
+        discord: "https://mattpocock.com/ai-discord",
       },
       sidebar: [
+        {
+          label: "Getting Started",
+          items: [
+            {
+              label: "Quickstart",
+              slug: "quickstart",
+            },
+          ],
+        },
         {
           label: "Guides",
           items: [
-            // Each item here is one entry in the navigation menu.
-            { label: "Example Guide", slug: "guides/example" },
+            {
+              label: "Environment Variables",
+              slug: "guides/environment-variables",
+            },
+            {
+              label: "Scorers",
+              slug: "guides/scorers",
+            },
+            {
+              label: "Traces",
+              slug: "guides/traces",
+            },
+            {
+              label: "Streams",
+              slug: "guides/streams",
+            },
           ],
         },
         {
-          label: "Reference",
-          autogenerate: { directory: "reference" },
+          label: "Examples",
+          items: [{ label: "AI SDK", slug: "examples/ai-sdk" }],
         },
       ],
     }),
diff --git a/apps/evalite-docs/src/content/docs/examples/ai-sdk.md b/apps/evalite-docs/src/content/docs/examples/ai-sdk.md
new file mode 100644
index 0000000..c8adb17
--- /dev/null
+++ b/apps/evalite-docs/src/content/docs/examples/ai-sdk.md
@@ -0,0 +1,47 @@
+---
+title: AI SDK
+---
+
+Vercel's [AI SDK](https://sdk.vercel.ai/docs/introduction) is a great way to get started with AI in your apps.
+
+It abstracts away the differences between different AI providers, so you can **switch between them easily**.
+
+Here's how it might look with Evalite:
+
+```ts
+// my-eval.eval.ts
+
+import { openai } from "@ai-sdk/openai";
+import { streamText } from "ai";
+import { Factuality, Levenshtein } from "autoevals";
+import { evalite } from "evalite";
+import { traceAISDKModel } from "evalite/ai-sdk";
+
+evalite("Test Capitals", {
+  data: async () => [
+    {
+      input: `What's the capital of France?`,
+      expected: `Paris`,
+    },
+    {
+      input: `What's the capital of Germany?`,
+      expected: `Berlin`,
+    },
+  ],
+  task: async (input) => {
+    const result = await streamText({
+      model: traceAISDKModel(openai("gpt-4o-mini")),
+      system: `
+        Answer the question concisely. Answer in as few words as possible.
+        Remove full stops from the end of the output.
+        If the country has no capital, return '<country> has no capital'.
+        If the country does not exist, return 'Unknown'.
+      `,
+      prompt: input,
+    });
+
+    return result.textStream;
+  },
+  scorers: [Factuality, Levenshtein],
+});
+```
diff --git a/apps/evalite-docs/src/content/docs/guides/environment-variables.mdx b/apps/evalite-docs/src/content/docs/guides/environment-variables.mdx
new file mode 100644
index 0000000..946f38b
--- /dev/null
+++ b/apps/evalite-docs/src/content/docs/guides/environment-variables.mdx
@@ -0,0 +1,51 @@
+---
+title: Environment Variables
+---
+
+import { Steps } from "@astrojs/starlight/components";
+
+To call your LLM from a third-party service, you'll likely need some environment variables to keep your API keys safe.
+
+Since **Evalite is based on Vitest**, it should already pick them up from your `vite.config.ts`.
+
+## Setting Up Env Variables
+
+If you don't have Vitest set up, here's how to do it:
+
+<Steps>
+
+1. Create a `.env` file in the root of your project:
+
+   ```
+   OPENAI_API_KEY=your-api-key
+   ```
+
+2. Add `.env` to your `.gitignore`, if it's not already there
+
+   ```
+   .env
+   ```
+
+3. Install `dotenv`:
+
+   ```bash
+   pnpm add -D dotenv
+   ```
+
+4. Add a `vite.config.ts` file:
+
+   ```ts
+   // vite.config.ts
+
+   import { defineConfig } from "vite/config";
+
+   export default defineConfig({
+     test: {
+       setupFiles: ["dotenv/config"],
+     },
+   });
+   ```
+
+</Steps>
+
+Now, your environment variables will be available in your evals.
diff --git a/apps/evalite-docs/src/content/docs/guides/example.md b/apps/evalite-docs/src/content/docs/guides/example.md
deleted file mode 100644
index ebd0f3b..0000000
--- a/apps/evalite-docs/src/content/docs/guides/example.md
+++ /dev/null
@@ -1,11 +0,0 @@
----
-title: Example Guide
-description: A guide in my new Starlight docs site.
----
-
-Guides lead a user through a specific task they want to accomplish, often with a sequence of steps.
-Writing a good guide requires thinking about what your users are trying to do.
-
-## Further reading
-
-- Read [about how-to guides](https://diataxis.fr/how-to-guides/) in the Diátaxis framework
diff --git a/apps/evalite-docs/src/content/docs/guides/scorers.mdx b/apps/evalite-docs/src/content/docs/guides/scorers.mdx
new file mode 100644
index 0000000..e14d343
--- /dev/null
+++ b/apps/evalite-docs/src/content/docs/guides/scorers.mdx
@@ -0,0 +1,64 @@
+---
+title: Scorers
+---
+
+import { Aside } from "@astrojs/starlight/components";
+
+Scorers are used to score the output of your LLM call.
+
+[Autoevals](https://github.com/braintrustdata/autoevals) is a great library of scorers to get you started.
+
+You can create your own using `createScorer`:
+
+```ts
+import { createScorer } from "evalite";
+
+const containsParis = createScorer<string>({
+  name: "Contains Paris",
+  description: "Checks if the output contains the word 'Paris'.",
+  score: (output) => {
+    return output.includes("Paris") ? 1 : 0;
+  },
+});
+
+evalite("My Eval", {
+  data: async () => {
+    return [{ input: "Hello", output: "Hello World!" }];
+  },
+  task: async (input) => {
+    return input + " World!";
+  },
+  scorers: [containsParis],
+});
+```
+
+The `name` and `description` of the scorer will be displayed in the Evalite UI.
+
+## Scorer Metadata
+
+You can provide metadata along with your custom scorer:
+
+```ts
+import { createScorer } from "evalite";
+
+const containsParis = createScorer<string>({
+  name: "Contains Paris",
+  description: "Checks if the output contains the word 'Paris'.",
+  score: (output) => {
+    return {
+      score: output.includes("Paris") ? 1 : 0,
+      metadata: {
+        // Can be anything!
+      },
+    };
+  },
+});
+```
+
+This will be visible along with the score in the Evalite UI.
+
+<Aside type="tip">
+
+This is especially useful for debugging LLM-as-a-judge evals. In autoevals `Factuality` scorer, the metadata will include a rationale for why the scorer gave the score it did.
+
+</Aside>
diff --git a/apps/evalite-docs/src/content/docs/guides/streams.md b/apps/evalite-docs/src/content/docs/guides/streams.md
new file mode 100644
index 0000000..cc6f32b
--- /dev/null
+++ b/apps/evalite-docs/src/content/docs/guides/streams.md
@@ -0,0 +1,28 @@
+---
+title: Streams
+---
+
+You can handle streams in Evalite by returning any async iterable (including a `ReadableStream`) from your task. This means you can test functions like the AI SDK `streamText` function easily:
+
+```ts
+import { evalite } from "evalite";
+import { streamText } from "ai";
+import { openai } from "@ai-sdk/openai";
+import { Factuality } from "autoevals";
+
+evalite("My Eval", {
+  data: async () => {
+    return [{ input: "What is the capital of France?", expected: "Paris" }];
+  },
+  task: async (input) => {
+    const result = await streamText({
+      model: openai("your-model"),
+      system: `Answer the question concisely.`,
+      prompt: input,
+    });
+
+    return result.textStream;
+  },
+  scorers: [Factuality],
+});
+```
diff --git a/apps/evalite-docs/src/content/docs/guides/traces.mdx b/apps/evalite-docs/src/content/docs/guides/traces.mdx
new file mode 100644
index 0000000..3309373
--- /dev/null
+++ b/apps/evalite-docs/src/content/docs/guides/traces.mdx
@@ -0,0 +1,81 @@
+---
+title: Traces
+---
+
+import { Aside } from "@astrojs/starlight/components";
+
+Traces are used to track the behaviour of each individual call to an LLM inside your task.
+
+## `reportTrace`
+
+You can report a trace by calling `reportTrace` inside an `evalite` eval:
+
+```ts
+import { evalite, type Evalite } from "evalite";
+import { reportTrace } from "evalite/evals";
+
+evalite("My Eval", {
+  data: async () => {
+    return [{ input: "Hello", expected: "Hello World!" }];
+  },
+  task: async (input) => {
+    // Track the start time
+    const start = performance.now();
+
+    // Call our LLM
+    const result = await myLLMCall();
+
+    // Report the trace once it's finished
+    reportTrace({
+      start,
+      end: performance.now(),
+      output: result.output,
+      input: [
+        {
+          role: "user",
+          content: input,
+        },
+      ],
+      usage: {
+        completionTokens: result.completionTokens,
+        promptTokens: result.promptTokens,
+      },
+    });
+
+    // Return the output
+    return result.output;
+  },
+  scorers: [Levenshtein],
+});
+```
+
+<Aside>
+
+`reportTrace` is a no-op in production, so you can leave it in your code without worrying about performance.
+
+</Aside>
+
+## `traceAISDKModel`
+
+If you're using the [Vercel AI SDK](https://sdk.vercel.ai/docs/introduction), you can automatically report traces by wrapping your model in `traceAISDKModel` function:
+
+```ts
+import { traceAISDKModel } from "evalite/ai-sdk";
+import { generateText } from "ai";
+import { openai } from "@ai-sdk/openai";
+
+// All calls to this model will be recorded in evalite!
+const tracedModel = traceAISDKModel(openai("gpt-4o-mini"));
+
+const result = await generateText({
+  model: tracedModel,
+  system: `Answer the question concisely.`,
+  prompt: `What is the capital of France?`,
+});
+```
+
+<Aside>
+
+`traceAISDKModel`, like `reportTrace`, is a no-op in production.
+
+</Aside>
diff --git a/apps/evalite-docs/src/content/docs/quickstart.mdx b/apps/evalite-docs/src/content/docs/quickstart.mdx
new file mode 100644
index 0000000..63db40a
--- /dev/null
+++ b/apps/evalite-docs/src/content/docs/quickstart.mdx
@@ -0,0 +1,92 @@
+---
+title: Quickstart
+description: A guide in my new Starlight docs site.
+---
+
+import { Aside, Steps } from "@astrojs/starlight/components";
+
+We're going to walk through setting up Evalite in an existing project.
+
+<Steps>
+
+1. Install `evalite`, `vitest` and `autoevals`:
+
+Install `evalite`, `vitest`, and a scoring library like `autoevals`:
+
+```bash
+pnpm add -D evalite vitest autoevals
+```
+
+2. Add an `eval:dev` script:
+
+   Add an `eval:dev` script to your package.json:
+
+   ```json
+   {
+     "scripts": {
+       "eval:dev": "evalite watch"
+     }
+   }
+   ```
+
+3. Create your first eval:
+
+   Create `my-eval.eval.ts`:
+
+   ```ts
+   // my-eval.eval.ts
+
+   import { evalite } from "evalite";
+   import { Levenshtein } from "autoevals";
+
+   evalite("My Eval", {
+     // A function that returns an array of test data
+     // - TODO: Replace with your test data
+     data: async () => {
+       return [{ input: "Hello", expected: "Hello World!" }];
+     },
+     // The task to perform
+     // - TODO: Replace with your LLM call
+     task: async (input) => {
+       return input + " World!";
+     },
+     // The scoring methods for the eval
+     scorers: [Levenshtein],
+   });
+   ```
+
+   <Aside type="tip">
+
+   `.eval.ts` is the extension Evalite looks for when scanning for evals.
+
+   </Aside>
+
+4. Run Your Eval
+
+   Run `pnpm run eval:dev`.
+
+   ```bash
+   pnpm run eval:dev
+   ```
+
+   This runs `evalite`, which runs the evals:
+
+   - Runs the `data` function to get the test data
+   - Runs the `task` function on each test data
+   - Scores the output of the `task` function using the `scorers`
+   - Saves the results to a sqlite database in `node_modules/.evalite`
+
+   It then:
+
+   - Shows a UI for viewing the traces, scores, inputs and outputs at http://localhost:3006.
+   - If you only ran one eval, it also shows a table summarizing the eval in the terminal.
+
+5. View Your Eval
+
+   Open http://localhost:3006 in your browser to view the results of the eval.
+
+</Steps>
+
+### What Next?
+
+Head to the [AI SDK example](/examples/ai-sdk) to see a fully-fleshed out example of Evalite in action.
diff --git a/apps/evalite-docs/src/content/docs/reference/example.md b/apps/evalite-docs/src/content/docs/reference/example.md
deleted file mode 100644
index 0224f09..0000000
--- a/apps/evalite-docs/src/content/docs/reference/example.md
+++ /dev/null
@@ -1,11 +0,0 @@
----
-title: Example Reference
-description: A reference page in my new Starlight docs site.
----
-
-Reference pages are ideal for outlining how things work in terse and clear terms.
-Less concerned with telling a story or addressing a specific use case, they should give a comprehensive outline of what you're documenting.
-
-## Further reading
-
-- Read [about reference](https://diataxis.fr/reference/) in the Diátaxis framework
diff --git a/package.json b/package.json
index 71de5b6..392cac1 100644
--- a/package.json
+++ b/package.json
@@ -10,6 +10,7 @@
   "private": true,
   "scripts": {
     "dev": "turbo watch dev",
+    "docs:dev": "turbo watch dev --filter=evalite-docs",
     "wsl:dev": "pnpm run -r --parallel dev",
     "ci": "turbo build test lint after-build",
     "build": "turbo build after-build",
diff --git a/packages/evalite/readme.md b/packages/evalite/readme.md
index 6e5f1b8..925722b 100644
--- a/packages/evalite/readme.md
+++ b/packages/evalite/readme.md
@@ -121,201 +121,3 @@ This also works for `watch` mode:
 ```bash
 evalite watch my-eval.eval.ts
 ```
-
-### Environment Variables
-
-To call your LLM from a third-party service, you'll likely need some environment variables to keep your API keys safe.
-
-Since Evalite is based on Vitest, it should already pick them up from your `vite.config.ts`.
-
-If you don't have Vitest set up, here's how to do it:
-
-1. Create a `.env` file in the root of your project:
-
-```
-OPENAI_API_KEY=your-api-key
-```
-
-2. Add `.env` to your `.gitignore`, if it's not already there
-
-```
-.env
-```
-
-3. Install `dotenv`:
-
-```bash
-pnpm add -D dotenv
-```
-
-4. Add a `vite.config.ts` file:
-
-```ts
-// vite.config.ts
-
-import { defineConfig } from "vite/config";
-
-export default defineConfig({
-  test: {
-    setupFiles: ["dotenv/config"],
-  },
-});
-```
-
-Now, your environment variables will be available in your evals.
-
-### Scorers
-
-Scorers are used to score the output of your LLM call.
-
-[Autoevals](https://github.com/braintrustdata/autoevals) is a great library of scorers to get you started.
-
-You can create your own using `createScorer`:
-
-```ts
-import { createScorer } from "evalite";
-
-const containsParis = createScorer<string>({
-  name: "Contains Paris",
-  description: "Checks if the output contains the word 'Paris'.",
-  score: (output) => {
-    return output.includes("Paris") ? 1 : 0;
-  },
-});
-
-evalite("My Eval", {
-  data: async () => {
-    return [{ input: "Hello", output: "Hello World!" }];
-  },
-  task: async (input) => {
-    return input + " World!";
-  },
-  scorers: [containsParis],
-});
-```
-
-#### Metadata
-
-You can provide metadata along with your custom scorer:
-
-```ts
-import { createScorer } from "evalite";
-
-const containsParis = createScorer<string>({
-  name: "Contains Paris",
-  description: "Checks if the output contains the word 'Paris'.",
-  score: (output) => {
-    return {
-      score: output.includes("Paris") ? 1 : 0,
-      metadata: {
-        // Can be anything!
-      },
-    };
-  },
-});
-```
-
-This will be visible along with the score in the Evalite UI.
-
-> [!TIP]
->
-> This is especially useful for debugging LLM-as-a-judge evals. In autoevals `Factuality` scorer, the metadata will include a rationale for why the scorer gave the score it did.
-
-### Traces
-
-Traces are used to track the behaviour of each individual call to an LLM inside your task.
-
-You can report a trace by calling `reportTrace` inside an `evalite` eval:
-
-```ts
-import { evalite, type Evalite } from "evalite";
-import { reportTrace } from "evalite/evals";
-
-evalite("My Eval", {
-  data: async () => {
-    return [{ input: "Hello", expected: "Hello World!" }];
-  },
-  task: async (input) => {
-    // Track the start time
-    const start = performance.now();
-
-    // Call our LLM
-    const result = await myLLMCall();
-
-    // Report the trace once it's finished
-    reportTrace({
-      start,
-      end: performance.now(),
-      output: result.output,
-      input: [
-        {
-          role: "user",
-          content: input,
-        },
-      ],
-      usage: {
-        completionTokens: result.completionTokens,
-        promptTokens: result.promptTokens,
-      },
-    });
-
-    // Return the output
-    return result.output;
-  },
-  scorers: [Levenshtein],
-});
-```
-
-> [!NOTE]
->
-> `reportTrace` is a no-op in production, so you can leave it in your code without worrying about performance.
-
-#### Reporting Traces Automatically
-
-If you're using the [Vercel AI SDK](https://sdk.vercel.ai/docs/introduction), you can automatically report traces by wrapping your model in `traceAISDKModel` function:
-
-```ts
-import { traceAISDKModel } from "evalite/ai-sdk";
-import { generateText } from "ai";
-import { openai } from "@ai-sdk/openai";
-
-// All calls to this model will be recorded in evalite!
-const tracedModel = traceAISDKModel(openai("gpt-4o-mini"));
-
-const result = await generateText({
-  model: tracedModel,
-  system: `Answer the question concisely.`,
-  prompt: `What is the capital of France?`,
-});
-```
-
-> [!NOTE]
->
-> `traceAISDKModel`, like `reportTrace`, is a no-op in production.
-
-### Streams
-
-You can handle streams in Evalite by returning any async iterable (including a `ReadableStream`) from your task. This means you can test functions like the AI SDK `streamText` function easily:
-
-```ts
-import { evalite } from "evalite";
-import { streamText } from "ai";
-import { openai } from "@ai-sdk/openai";
-import { Factuality } from "autoevals";
-
-evalite("My Eval", {
-  data: async () => {
-    return [{ input: "What is the capital of France?", expected: "Paris" }];
-  },
-  task: async (input) => {
-    const result = await streamText({
-      model: openai("your-model"),
-      system: `Answer the question concisely.`,
-      prompt: input,
-    });
-
-    return result.textStream;
-  },
-  scorers: [Factuality],
-});
-```