Update eval_config.json #2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Evaluate RAG answer flow | |
on: | |
issue_comment: | |
types: [created] | |
# Set up permissions for deploying with secretless Azure federated credentials | |
# https://learn.microsoft.com/azure/developer/github/connect-from-azure?tabs=azure-portal%2Clinux#set-up-azure-login-with-openid-connect-authentication | |
permissions: | |
id-token: write | |
contents: read | |
issues: write | |
pull-requests: write | |
jobs: | |
evaluate: | |
if: | | |
contains('["OWNER", "CONTRIBUTOR", "COLLABORATOR", "MEMBER"]', github.event.comment.author_association) && | |
github.event.issue.pull_request && | |
github.event.comment.body == '/evaluate' | |
runs-on: ubuntu-latest | |
env: | |
UV_SYSTEM_PYTHON: 1 | |
AZURE_CLIENT_ID: ${{ vars.AZURE_CLIENT_ID }} | |
AZURE_TENANT_ID: ${{ vars.AZURE_TENANT_ID }} | |
AZURE_SUBSCRIPTION_ID: ${{ vars.AZURE_SUBSCRIPTION_ID }} | |
AZURE_CREDENTIALS: ${{ secrets.AZURE_CREDENTIALS }} | |
AZURE_RESOURCE_GROUP: ${{ vars.AZURE_RESOURCE_GROUP }} | |
POSTGRES_HOST: localhost | |
POSTGRES_USERNAME: postgres | |
POSTGRES_PASSWORD: root | |
POSTGRES_DATABASE: postgres | |
POSTGRES_SSL: disable | |
OPENAI_CHAT_HOST: ${{ vars.OPENAI_CHAT_HOST }} | |
OPENAI_EMBED_HOST: ${{ vars.OPENAI_EMBED_HOST }} | |
AZURE_OPENAI_ENDPOINT: ${{ vars.AZURE_OPENAI_ENDPOINT }} | |
AZURE_OPENAI_VERSION: ${{ vars.AZURE_OPENAI_VERSION }} | |
AZURE_OPENAI_CHAT_DEPLOYMENT: ${{ vars.AZURE_OPENAI_CHAT_DEPLOYMENT }} | |
AZURE_OPENAI_CHAT_MODEL: ${{ vars.AZURE_OPENAI_CHAT_MODEL }} | |
AZURE_OPENAI_EMBED_DEPLOYMENT: ${{ vars.AZURE_OPENAI_EMBED_DEPLOYMENT }} | |
AZURE_OPENAI_EMBED_MODEL: ${{ vars.AZURE_OPENAI_EMBED_MODEL }} | |
AZURE_OPENAI_EMBED_DIMENSIONS: ${{ vars.AZURE_OPENAI_EMBED_DIMENSIONS }} | |
AZURE_OPENAI_EMBEDDING_COLUMN: ${{ vars.AZURE_OPENAI_EMBEDDING_COLUMN }} | |
AZURE_OPENAI_EVAL_DEPLOYMENT: ${{ vars.AZURE_OPENAI_EVAL_DEPLOYMENT }} | |
AZURE_OPENAI_EVAL_MODEL: ${{ vars.AZURE_OPENAI_EVAL_MODEL }} | |
steps: | |
- name: Comment on pull request | |
uses: actions/github-script@v7 | |
with: | |
script: | | |
github.rest.issues.createComment({ | |
issue_number: context.issue.number, | |
owner: context.repo.owner, | |
repo: context.repo.repo, | |
body: "Starting evaluation! Check the Actions tab for progress, or wait for a comment with the results." | |
}) | |
- name: Checkout pull request | |
uses: actions/checkout@v4 | |
with: | |
ref: refs/pull/${{ github.event.issue.number }}/head | |
- name: Install pgvector | |
run: | | |
sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh -y | |
sudo apt-get install postgresql-14-pgvector | |
- name: Start postgres | |
run: sudo systemctl start postgresql | |
- name: Set password for postgres user | |
run: sudo -u postgres psql -c "ALTER USER postgres PASSWORD 'root'" | |
- name: Create pgvector extension | |
run: sudo -u postgres psql -c 'CREATE EXTENSION vector' | |
- name: Install python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '3.12' | |
- name: Install uv | |
uses: astral-sh/setup-uv@v3 | |
with: | |
enable-cache: true | |
version: "0.4.20" | |
cache-dependency-glob: "requirements**.txt" | |
- name: Install azd | |
uses: Azure/[email protected] | |
- name: Login to Azure | |
uses: azure/login@v2 | |
with: | |
client-id: ${{ env.AZURE_CLIENT_ID }} | |
tenant-id: ${{ env.AZURE_TENANT_ID }} | |
subscription-id: ${{ env.AZURE_SUBSCRIPTION_ID }} | |
- name: Set az account | |
uses: azure/CLI@v2 | |
with: | |
inlineScript: | | |
az account set --subscription ${{env.AZURE_SUBSCRIPTION_ID}} | |
- name: Log in with Azure (Federated Credentials) | |
if: ${{ env.AZURE_CLIENT_ID != '' }} | |
run: | | |
azd auth login ` | |
--client-id "$Env:AZURE_CLIENT_ID" ` | |
--federated-credential-provider "github" ` | |
--tenant-id "$Env:AZURE_TENANT_ID" | |
shell: pwsh | |
- name: Install dependencies | |
run: | | |
uv pip install -r requirements-dev.txt | |
- name: Install app as editable app | |
run: | | |
uv pip install -e src/backend | |
- name: Setup local database with seed data | |
run: | | |
python ./src/backend/fastapi_app/setup_postgres_database.py | |
python ./src/backend/fastapi_app/setup_postgres_seeddata.py | |
- name: Setup node | |
uses: actions/setup-node@v4 | |
with: | |
node-version: 18 | |
- name: Build frontend | |
run: | | |
cd ./src/frontend | |
npm install | |
npm run build | |
- name: Run local server in background | |
run: | | |
RUNNER_TRACKING_ID="" && (nohup python3 -m uvicorn fastapi_app:create_app --factory > serverlogs.out 2> serverlogs.err &) | |
- name: Install evaluate dependencies | |
run: | | |
uv pip install -r evals/requirements.txt | |
- name: Evaluate local RAG flow | |
run: | | |
python evals/evaluate.py --targeturl=http://127.0.0.1:8000/chat --resultsdir=evals/results/pr${{ github.event.issue.number }} | |
- name: Upload server logs as build artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
name: server_logs | |
path: ./serverlogs.out | |
- name: Upload server error logs as build artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
name: server_error_logs | |
path: ./serverlogs.err | |
- name: Upload eval results as build artifact | |
uses: actions/upload-artifact@v4 | |
with: | |
name: eval_result | |
path: ./evals/results/pr${{ github.event.issue.number }} | |
- name: Summarize results | |
if: ${{ success() }} | |
run: | | |
echo "## Evaluation results" >> eval-summary.md | |
python -m evaltools summary evals/results --output=markdown >> eval-summary.md | |
echo "## Answer differences across runs" >> run-diff.md | |
python -m evaltools diff evals/results/baseline evals/results/pr${{ github.event.issue.number }} --output=markdown >> run-diff.md | |
cat eval-summary.md >> $GITHUB_STEP_SUMMARY | |
cat run-diff.md >> $GITHUB_STEP_SUMMARY | |
- name: Comment on pull request | |
uses: actions/github-script@v7 | |
with: | |
script: | | |
const fs = require('fs'); | |
const summaryPath = "eval-summary.md"; | |
const summary = fs.readFileSync(summaryPath, 'utf8'); | |
const runId = process.env.GITHUB_RUN_ID; | |
const repo = process.env.GITHUB_REPOSITORY; | |
const actionsUrl = `https://github.com/${repo}/actions/runs/${runId}`; | |
github.rest.issues.createComment({ | |
issue_number: context.issue.number, | |
owner: context.repo.owner, | |
repo: context.repo.repo, | |
body: `${summary}\n\n[Check the workflow run for more details](${actionsUrl}).` | |
}) |