allenai · shatu · Jul 16, 2025 · Jul 16, 2025 · Jul 16, 2025 · Jul 16, 2025
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,87 @@
+# Git directories (can be large)
+**/.git/
+.gitignore
+.github/
+
+# Python cache and compiled files
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+*.egg-info/
+dist/
+build/
+*.egg
+local_dataset_cache/
+
+
+# Virtual environments
+.venv/
+venv/
+ENV/
+env/
+
+# IDE and editor files
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+.DS_Store
+
+# Test and documentation
+tests/
+test/
+docs/
+*.md
+!README.md
+.pytest_cache/
+.coverage
+htmlcov/
+.tox/
+
+# Logs and databases
+*.log
+*.sql
+*.sqlite
+*.db
+
+# Package manager files
+node_modules/
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+
+# Jupyter notebooks checkpoints
+.ipynb_checkpoints/
+
+# Model and data files (if stored locally)
+*.ckpt
+*.pth
+*.h5
+*.safetensors
+data/
+models/
+checkpoints/
+
+# Temporary files
+tmp/
+temp/
+*.tmp
+
+# Build artifacts
+*.o
+*.a
+*.so
+*.dylib
+
+# Cache directories
+.cache/
+.mypy_cache/
+.ruff_cache/
+
+# Docker files (avoid recursion)
+Dockerfile*
+docker-compose*
+.dockerignore
diff --git a/.github/actions/push/action.yaml b/.github/actions/push/action.yaml
@@ -20,10 +20,18 @@ runs:
     - shell: bash
       if: inputs.beaker != '' # previously startsWith(github.ref, 'refs/tags/') && ...
       run: |
+        beaker_user=$(beaker account whoami --format json | jq -r '.[0].name')
         # Push release to Beaker.
         SHORT_SHA=$(git rev-parse --short HEAD)
-        beaker image create --name "${{ inputs.beaker }}-${SHORT_SHA}-${{ github.run_id }}" ${{ inputs.image }}
+        DESCRIPTION="Created from commit: ${SHORT_SHA}"
+        beaker image create \
+               --name "${{ inputs.beaker }}-${SHORT_SHA}-${{ github.run_id }}" ${{ inputs.image }} \
+               --description "$DESCRIPTION"
         # We can't delete the old image because it might be used by a running job. Instead, we rename it to an empty 
         # string, so it will not be resolved by the Beaker client.
-        beaker image rename nathanl/${{ inputs.beaker }} "" || true
-        beaker image create --name ${{ inputs.beaker }} ${{ inputs.image }}
+        echo "Deleting image $beaker_user/${{ inputs.beaker }}."
+        beaker image rename $beaker_user/${{ inputs.beaker }} "" || true
+        echo "Creating image $beaker_user/inputs.beaker."
+        beaker image create \
+               --name ${{ inputs.beaker }} ${{ inputs.image }} \
+               --description "$DESCRIPTION"
diff --git a/.github/workflows/beaker-experiment.yml b/.github/workflows/beaker-experiment.yml
@@ -0,0 +1,191 @@
+name: Beaker Experiment Launch
+
+on:
+  merge_group:
+
+  # Adding a comment to trigger a run.
+  workflow_dispatch:  # This allows us to manually trigger a build through the GitHub UI.
+  # pull_request:
+  #   branches: [main]
+  #   paths:
+  #     - 'open_instruct/**'
+  #     - '!open_instruct/README.md'
+  #     - 'requirements.txt'
+  #     - 'Dockerfile'
+  #     - '.github/workflows/beaker-experiment.yml'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  DOCKER_BUILDKIT: "1"
+
+jobs:
+  launch-experiment:
+    name: Launch Beaker Experiment
+    runs-on: 8-Core-XL-Runner-Ubuntu-Latest
+    timeout-minutes: 35
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0  # Need full history to get commit author info
+
+      - name: Checkout oe-eval-internal
+        uses: actions/checkout@v4
+        with:
+          repository: allenai/oe-eval-internal
+          path: './oe-eval-internal'
+          ssh-key: ${{ secrets.OE_EVAL_GIT_CLONE_ACCESS_PRIVATE_SSH_DEPLOY_KEY }}
+          fetch-depth: 1
+          filter: 'blob:none'
+
+      - name: Get trigger information
+        id: get-trigger-info
+        run: |
+          if [ "${{ github.event_name }}" = "push" ]; then
+            # Get the commit author for push events
+            AUTHOR_NAME=$(git log -1 --pretty=format:'%an')
+            echo "trigger_info=Push by ${AUTHOR_NAME}" >> $GITHUB_OUTPUT
+          elif [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
+            # Get the user who triggered the manual dispatch
+            echo "trigger_info=Manual dispatch by ${{ github.actor }}" >> $GITHUB_OUTPUT
+          else
+            # For scheduled runs
+            echo "trigger_info=Scheduled run" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Setup Python environment
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.10'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v3
+
+      - name: Setup Beaker
+        uses: allenai/setup-beaker@v2
+        with:
+          token: ${{ secrets.BEAKER_TOKEN }}
+          workspace: ai2/tulu-thinker
+
+      - name: Install dependencies
+        run: |
+          # Install development dependencies needed for mason.py
+          uv sync --frozen
+
+      - name: Build image and launch experiment
+        id: launch
+        env:
+          BEAKER_TOKEN: ${{ secrets.BEAKER_TOKEN }}
+          GITHUB_RUN_ID: ${{ github.run_id }}
+          GITHUB_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+        run: |
+          set -euo pipefail
+
+          # Make scripts executable
+          chmod +x scripts/train/build_image_and_launch.sh scripts/train/debug/single_gpu_on_beaker.sh
+
+          echo "Building Docker image and launching experiment..."
+          echo "Git commit: $(git rev-parse --short HEAD)"
+
+          # Build image and launch experiment
+          # Use tee to both stream output and capture it for parsing
+          ./scripts/train/build_image_and_launch.sh scripts/train/debug/single_gpu_integration_test.sh 2>&1 | tee /tmp/beaker_output.log || {
+            EXIT_CODE=$?
+            echo "ERROR: build_image_and_launch.sh failed with exit code $EXIT_CODE"
+            exit $EXIT_CODE
+          }
+
+          # Extract experiment ID from the saved output
+          EXPERIMENT_ID=$(grep -oP 'https://beaker.org/ex/\K[a-zA-Z0-9]+' /tmp/beaker_output.log | tail -1)
+          if [ -z "$EXPERIMENT_ID" ]; then
+            echo "ERROR: Failed to extract experiment ID from output"
+            echo "DEBUG: Full output log:"
+            cat /tmp/beaker_output.log
+            echo "---"
+            echo "Please check that the experiment was created successfully."
+            exit 1
+          fi
+
+          echo "experiment_id=$EXPERIMENT_ID" >> $GITHUB_OUTPUT
+          echo "Experiment ID: $EXPERIMENT_ID"
+          echo "Experiment URL: https://beaker.org/ex/$EXPERIMENT_ID"
+
+      - name: Wait for Beaker experiment completion
+        env:
+          BEAKER_TOKEN: ${{ secrets.BEAKER_TOKEN }}
+        run: |
+          EXPERIMENT_ID="${{ steps.launch.outputs.experiment_id }}"
+          echo "Waiting for experiment $EXPERIMENT_ID to complete..."
+
+          # Maximum wait time: 20 minutes (1200 seconds)
+          MAX_WAIT_TIME=1200
+          CHECK_INTERVAL=30
+          ELAPSED_TIME=0
+
+          while [ $ELAPSED_TIME -lt $MAX_WAIT_TIME ]; do
+            # Get job status directly
+            JOB_STATUS=$(beaker experiment get $EXPERIMENT_ID --format json | jq -r '.[0].jobs[0].status' 2>/dev/null || echo "null")
+
+            # Check if exitCode exists (experiment is done)
+            if [ "$JOB_STATUS" = "null" ]; then
+              EXIT_CODE="pending"
+            else
+              EXIT_CODE=$(echo "$JOB_STATUS" | jq -r '.exitCode // "pending"')
+            fi
+
+            if [ "$EXIT_CODE" = "pending" ]; then
+              echo "=== Experiment still running (elapsed: ${ELAPSED_TIME}s) ==="
+            else
+              echo "=== Experiment finished with exit code: $EXIT_CODE (elapsed: ${ELAPSED_TIME}s) ==="
+            fi
+
+            # Stream new logs since last check
+            echo "--- Recent logs ---"
+            beaker experiment logs $EXPERIMENT_ID 2>/dev/null | tail -n 50 || echo "No logs available yet"
+            echo "--- End of logs ---"
+
+            # Check if experiment has completed
+            if [ "$EXIT_CODE" != "pending" ]; then
+              if [ "$EXIT_CODE" = "0" ]; then
+                echo "✅ Experiment completed successfully!"
+                # Show final logs
+                echo "=== Final logs ==="
+                beaker experiment logs $EXPERIMENT_ID | tail -n 100
+                exit 0
+              else
+                echo "❌ Experiment failed with exit code $EXIT_CODE"
+                # Show error logs
+                echo "=== Error logs ==="
+                beaker experiment logs $EXPERIMENT_ID | tail -n 200
+                exit 1
+              fi
+            fi
+
+            # Wait before next check
+            sleep $CHECK_INTERVAL
+            ELAPSED_TIME=$((ELAPSED_TIME + CHECK_INTERVAL))
+          done
+
+          echo "⏱️ Timeout: Experiment did not complete within 20 minutes"
+          exit 1
+
+      - name: Summary
+        if: always()
+        run: |
+          echo "## Beaker Experiment Summary" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "**Trigger:** ${{ steps.get-trigger-info.outputs.trigger_info }}" >> $GITHUB_STEP_SUMMARY
+          echo "**Docker Image:** Built locally by build_image_and_launch.sh" >> $GITHUB_STEP_SUMMARY
+          if [ -n "${{ steps.launch.outputs.experiment_id }}" ]; then
+            echo "**Beaker Experiment:** [View on Beaker](https://beaker.org/ex/${{ steps.launch.outputs.experiment_id }})" >> $GITHUB_STEP_SUMMARY
+          fi
+          echo "" >> $GITHUB_STEP_SUMMARY
+          if [ "${{ job.status }}" = "success" ]; then
+            echo "✅ **Status:** Experiment completed successfully!" >> $GITHUB_STEP_SUMMARY
+          else
+            echo "❌ **Status:** Experiment failed or timed out" >> $GITHUB_STEP_SUMMARY
+          fi
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -5,6 +5,7 @@ on:
       - master 
       - main
       - benchmark
+  merge_group:
 permissions:
   contents: write
 jobs:

diff --git a/.github/workflows/push-image.yml b/.github/workflows/push-image.yml
@@ -12,17 +12,6 @@ concurrency:
   cancel-in-progress: true
 
 on:
-  push:
-    # Run this workflow anytime a push updates one of the files in the image's directory
-    # (other than the README), and anytime there's a new release tag for this image.
-    paths:
-      - 'open_instruct/**'
-      - '!open_instruct/README.md'
-      - 'requirements.txt'
-      - 'Dockerfile'
-      - '.github/workflows/push-image.yml'
-      # Note, add .olmo dockerfile + requirements if adding auto build to those
-    branches: [main]
   # pull_request: # note, comment this out for running on every push
   #   # Also run on PRs that update the files in the image's directory (other than README).
   #   branches: [main]
@@ -32,6 +21,7 @@ on:
   #     - 'requirements.txt'
   #     - 'Dockerfile'
   #     - '.github/workflows/push-image.yml'
+  merge_group:
   workflow_dispatch:  # This allows us to manually trigger a build through the GitHub UI.
 
 env:
@@ -42,7 +32,6 @@ jobs:
     name: open_instruct
     runs-on: ubuntu-latest
     timeout-minutes: 60
-    if: (github.event_name != 'workflow_run') || (github.event.workflow_run.conclusion == 'success')
     steps:
       - uses: actions/checkout@v3
 
@@ -60,22 +49,22 @@ jobs:
           # ghcr_user: ${{ secrets.GHCR_USER }}
 
       # big images fail, trying this
+      # reference for big files in runner: https://github.com/actions/runner-images/issues/10386
       - name: Delete huge unnecessary tools folder
-        run: rm -rf /opt/hostedtoolcache /usr/share/dotnet "$AGENT_TOOLSDIRECTORY"
-
+        run: rm -rf /opt/hostedtoolcache /usr/share/dotnet "$AGENT_TOOLSDIRECTORY" /usr/local/lib/android/sdk/ndk
+
+      - name: Check remaining disk space
+        run: df -h
+
       - name: Build image
         run: |
-          docker build \
-              --build-arg BUILDKIT_INLINE_CACHE=1 \
-              --build-arg CUDA=12.1.0 --build-arg \
-              TARGET=cudnn8-devel --build-arg DIST=ubuntu20.04 \
-              --build-arg REQUIRE=requirements.txt . \
-              -t open_instruct
-
+          docker build --platform=linux/amd64 \
+            --build-arg GIT_COMMIT="$(git rev-parse --short HEAD)" \
+            --build-arg GIT_BRANCH="${GITHUB_REF#refs/heads/}" \
+            -t open_instruct .
 
       - name: Check image
-        run: |
-          docker run --rm open_instruct
+        run: docker run --rm open_instruct
 
       - name: Push image
         # if: github.event_name != 'pull_request'