Skip to content

Commit

Permalink
use brainstrust gh action
Browse files Browse the repository at this point in the history
  • Loading branch information
Filip Michalsky committed Sep 24, 2024
1 parent 09ab563 commit a7596e2
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 25 deletions.
15 changes: 11 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,18 @@ jobs:
- name: Install Playwright browsers
run: pnpm exec playwright install --with-deps

- name: Run Evals
- name: Compile scripts
run: pnpm build-dom-scripts

- name: Run Braintrust Evals
uses: braintrustdata/eval-action@v1
with:
api_key: ${{ secrets.BRAINTRUST_API_KEY }}
runtime: node
root: '.'
paths: 'evals/index.eval.ts'
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
HEADLESS: true
run: pnpm evals
HEADLESS: 'true'
timeout-minutes: 12
28 changes: 7 additions & 21 deletions evals/index.eval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -219,27 +219,13 @@ const exactMatch = (args: { input; output; expected? }) => {
Eval("stagehand", {
data: () => {
return [
{
input: {
name: "vanta",
},
},
{
input: {
name: "vanta_h",
},
},
{
input: {
name: "peeler_simple",
},
},
{
input: { name: "wikipedia" },
},
{ input: { name: "peeler_complex" } },
{ input: { name: "costar" } },
{ input: { name: "google_jobs" } },
{ input: { name: "vanta" } },
{ input: { name: "vanta_h" } },
{ input: { name: "peeler_simple" } },
// { input: { name: "wikipedia" } },
// { input: { name: "peeler_complex" } },
// { input: { name: "costar" } },
// { input: { name: "google_jobs" } }
];
},
task: async (input) => {
Expand Down

0 comments on commit a7596e2

Please sign in to comment.