From a7596e225bd56d26a4e7e6753785504161908fc7 Mon Sep 17 00:00:00 2001 From: Filip Michalsky Date: Mon, 23 Sep 2024 21:35:20 -0400 Subject: [PATCH] use brainstrust gh action --- .github/workflows/ci.yml | 15 +++++++++++---- evals/index.eval.ts | 28 +++++++--------------------- 2 files changed, 18 insertions(+), 25 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d689f372..db91038b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,11 +28,18 @@ jobs: - name: Install Playwright browsers run: pnpm exec playwright install --with-deps - - name: Run Evals + - name: Compile scripts + run: pnpm build-dom-scripts + + - name: Run Braintrust Evals + uses: braintrustdata/eval-action@v1 + with: + api_key: ${{ secrets.BRAINTRUST_API_KEY }} + runtime: node + root: '.' + paths: 'evals/index.eval.ts' env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }} - HEADLESS: true - run: pnpm evals + HEADLESS: 'true' timeout-minutes: 12 diff --git a/evals/index.eval.ts b/evals/index.eval.ts index 4903b255..bce4597e 100644 --- a/evals/index.eval.ts +++ b/evals/index.eval.ts @@ -219,27 +219,13 @@ const exactMatch = (args: { input; output; expected? }) => { Eval("stagehand", { data: () => { return [ - { - input: { - name: "vanta", - }, - }, - { - input: { - name: "vanta_h", - }, - }, - { - input: { - name: "peeler_simple", - }, - }, - { - input: { name: "wikipedia" }, - }, - { input: { name: "peeler_complex" } }, - { input: { name: "costar" } }, - { input: { name: "google_jobs" } }, + { input: { name: "vanta" } }, + { input: { name: "vanta_h" } }, + { input: { name: "peeler_simple" } }, + // { input: { name: "wikipedia" } }, + // { input: { name: "peeler_complex" } }, + // { input: { name: "costar" } }, + // { input: { name: "google_jobs" } } ]; }, task: async (input) => {