Skip to content

Update eval_config.json #15

Update eval_config.json

Update eval_config.json #15

Workflow file for this run

name: Evaluate RAG answer flow
on:
issue_comment:
types: [created]
# Set up permissions for deploying with secretless Azure federated credentials
# https://learn.microsoft.com/azure/developer/github/connect-from-azure?tabs=azure-portal%2Clinux#set-up-azure-login-with-openid-connect-authentication
permissions:
id-token: write
contents: read
issues: write
pull-requests: write
jobs:
evaluate:
if: |
github.event.issue.pull_request &&
github.event.comment.body == '/evaluate'
runs-on: ubuntu-latest
env:
UV_SYSTEM_PYTHON: 1
AZURE_CLIENT_ID: ${{ vars.AZURE_CLIENT_ID }}
AZURE_TENANT_ID: ${{ vars.AZURE_TENANT_ID }}
AZURE_SUBSCRIPTION_ID: ${{ vars.AZURE_SUBSCRIPTION_ID }}
AZURE_CREDENTIALS: ${{ secrets.AZURE_CREDENTIALS }}
AZURE_RESOURCE_GROUP: ${{ vars.AZURE_RESOURCE_GROUP }}
POSTGRES_HOST: localhost
POSTGRES_USERNAME: postgres
POSTGRES_PASSWORD: root
POSTGRES_DATABASE: postgres
POSTGRES_SSL: disable
OPENAI_CHAT_HOST: ${{ vars.OPENAI_CHAT_HOST }}
OPENAI_EMBED_HOST: ${{ vars.OPENAI_EMBED_HOST }}
AZURE_OPENAI_ENDPOINT: ${{ vars.AZURE_OPENAI_ENDPOINT }}
AZURE_OPENAI_VERSION: ${{ vars.AZURE_OPENAI_VERSION }}
AZURE_OPENAI_CHAT_DEPLOYMENT: ${{ vars.AZURE_OPENAI_CHAT_DEPLOYMENT }}
AZURE_OPENAI_CHAT_MODEL: ${{ vars.AZURE_OPENAI_CHAT_MODEL }}
AZURE_OPENAI_EMBED_DEPLOYMENT: ${{ vars.AZURE_OPENAI_EMBED_DEPLOYMENT }}
AZURE_OPENAI_EMBED_MODEL: ${{ vars.AZURE_OPENAI_EMBED_MODEL }}
AZURE_OPENAI_EMBED_DIMENSIONS: ${{ vars.AZURE_OPENAI_EMBED_DIMENSIONS }}
AZURE_OPENAI_EMBEDDING_COLUMN: ${{ vars.AZURE_OPENAI_EMBEDDING_COLUMN }}
AZURE_OPENAI_EVAL_DEPLOYMENT: ${{ vars.AZURE_OPENAI_EVAL_DEPLOYMENT }}
AZURE_OPENAI_EVAL_MODEL: ${{ vars.AZURE_OPENAI_EVAL_MODEL }}
steps:
- name: Comment on pull request
uses: actions/github-script@v7
with:
script: |
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: "Starting evaluation! Check the Actions tab for progress, or wait for a comment with the results."
})
- name: Checkout pull request
uses: actions/checkout@v4
with:
ref: refs/pull/${{ github.event.issue.number }}/head
- name: Install pgvector
run: |
sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh -y
sudo apt-get install postgresql-14-pgvector
- name: Start postgres
run: sudo systemctl start postgresql
- name: Set password for postgres user
run: sudo -u postgres psql -c "ALTER USER postgres PASSWORD 'root'"
- name: Create pgvector extension
run: sudo -u postgres psql -c 'CREATE EXTENSION vector'
- name: Install python
uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Install uv
uses: astral-sh/setup-uv@v3
with:
enable-cache: true
version: "0.4.20"
cache-dependency-glob: "requirements**.txt"
- name: Install azd
uses: Azure/[email protected]
- name: Login to Azure
uses: azure/login@v2
with:
client-id: ${{ env.AZURE_CLIENT_ID }}
tenant-id: ${{ env.AZURE_TENANT_ID }}
subscription-id: ${{ env.AZURE_SUBSCRIPTION_ID }}
- name: Set az account
uses: azure/CLI@v2
with:
inlineScript: |
az account set --subscription ${{env.AZURE_SUBSCRIPTION_ID}}
- name: Log in with Azure (Federated Credentials)
if: ${{ env.AZURE_CLIENT_ID != '' }}
run: |
azd auth login `
--client-id "$Env:AZURE_CLIENT_ID" `
--federated-credential-provider "github" `
--tenant-id "$Env:AZURE_TENANT_ID"
shell: pwsh
- name: Install dependencies
run: |
uv pip install -r requirements-dev.txt
- name: Install app as editable app
run: |
uv pip install -e src/backend
- name: Setup local database with seed data
run: |
python ./src/backend/fastapi_app/setup_postgres_database.py
python ./src/backend/fastapi_app/setup_postgres_seeddata.py
- name: Setup node
uses: actions/setup-node@v4
with:
node-version: 18
- name: Build frontend
run: |
cd ./src/frontend
npm install
npm run build
- name: Run local server in background
run: |
RUNNER_TRACKING_ID="" && (nohup python3 -m uvicorn fastapi_app:create_app --factory > serverlogs.out 2> serverlogs.err &)
- name: Install evaluate dependencies
run: |
uv pip install -r evals/requirements.txt
- name: Evaluate local RAG flow
run: |
python evals/evaluate.py --targeturl=http://127.0.0.1:8000/chat --resultsdir=evals/results/pr${{ github.event.issue.number }}
- name: Upload server logs as build artifact
uses: actions/upload-artifact@v4
with:
name: server_logs
path: ./serverlogs.out
- name: Upload server error logs as build artifact
uses: actions/upload-artifact@v4
with:
name: server_error_logs
path: ./serverlogs.err
- name: Upload eval results as build artifact
uses: actions/upload-artifact@v4
with:
name: eval_result
path: ./evals/results/pr${{ github.event.issue.number }}
- name: Summarize results
if: ${{ success() }}
run: |
echo "## Evaluation results" >> eval-summary.md
python -m evaltools summary evals/results --output=markdown >> eval-summary.md
echo "## Answer differences across runs" >> run-diff.md
python -m evaltools diff evals/results/baseline evals/results/pr${{ github.event.issue.number }} --output=markdown >> run-diff.md
cat eval-summary.md >> $GITHUB_STEP_SUMMARY
cat run-diff.md >> $GITHUB_STEP_SUMMARY
- name: Comment on pull request
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const summaryPath = "eval-summary.md";
const summary = fs.readFileSync(summaryPath, 'utf8');
const runId = process.env.GITHUB_RUN_ID;
const repo = process.env.GITHUB_REPOSITORY;
const actionsUrl = `https://github.com/${repo}/actions/runs/${runId}`;
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: `${summary}\n\n[Check the workflow run for more details](${actionsUrl}).`
})