From 485ff788582c2a7c589d0321d7177f161525cc75 Mon Sep 17 00:00:00 2001 From: Conner Swann <2635475+yourbuddyconner@users.noreply.github.com> Date: Thu, 19 Sep 2024 15:27:39 -0700 Subject: [PATCH] refactor to properly use matrix --- .github/workflows/adhoc-matrix.yml | 159 ++++++++++++++++++++++++++ .github/workflows/adhoc.yml | 169 ---------------------------- .github/workflows/run-on-runner.yml | 93 +++++++++++++++ 3 files changed, 252 insertions(+), 169 deletions(-) create mode 100644 .github/workflows/adhoc-matrix.yml delete mode 100644 .github/workflows/adhoc.yml create mode 100644 .github/workflows/run-on-runner.yml diff --git a/.github/workflows/adhoc-matrix.yml b/.github/workflows/adhoc-matrix.yml new file mode 100644 index 0000000..47a599e --- /dev/null +++ b/.github/workflows/adhoc-matrix.yml @@ -0,0 +1,159 @@ +name: Execute ZKVM-Perf (Matrix) + +on: + workflow_dispatch: + inputs: + provers: + description: 'Provers to use (comma-separated)' + required: false + type: string + default: 'sp1' + programs: + description: 'Programs to benchmark (comma-separated)' + required: false + type: string + default: 'loop,fibonacci,tendermint,reth1,reth2' + filename: + description: 'Filename for the benchmark' + required: false + type: string + default: 'benchmark' + trials: + description: 'Number of trials to run' + required: false + type: string + default: '1' + sp1_ref: + description: 'SP1 reference (commit hash or branch name)' + required: false + type: string + default: '2e8b0a8' + additional_params: + description: 'Additional parameters as JSON' + required: false + type: string + default: '{"hashfns":"poseidon","shard_sizes":"22"}' + +jobs: + run-benchmarks: + strategy: + matrix: + include: + - instance_type: g6.16xlarge + enable_gpu: true + ami_id: ami-079a6a210557ef0e4 + - instance_type: r7i.16xlarge + enable_gpu: false + ami_id: ami-079a6a210557ef0e4 + + name: Run on ${{ matrix.instance_type }} + runs-on: ubuntu-latest + + steps: + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ secrets.AWS_REGION }} + + - name: Start EC2 runner + id: start-ec2-runner + uses: xJonathanLEI/ec2-github-runner@main + with: + mode: start + # Must use personal access token here as `GITHUB_TOKEN` does not have access to runners. + # Use a fine-grained token with these permissions to at least this repository: + # - Administration: Read and write + # - Contents: Read and write + # - Metadata: Read-only + # - Workflows: Read and write + # - Actions: Read and write + github-token: ${{ secrets.GH_PAT }} + ec2-image-id: ${{ matrix.ami_id }} + ec2-instance-type: ${{ matrix.instance_type }} + subnet-id: ${{ secrets.AWS_SUBNET_ID }} + security-group-id: ${{ secrets.AWS_SG_ID }} + storage-size: 1024 + + - name: Run benchmarks + uses: actions/github-script@v6 + with: + github-token: ${{ secrets.GH_PAT }} + script: | + const runnerName = '${{ steps.start-ec2-runner.outputs.label }}'; + const maxAttempts = 5; + const initialDelay = 30000; // 30 seconds + + for (let attempt = 1; attempt <= maxAttempts; attempt++) { + console.log(`Attempt ${attempt} to trigger benchmark workflow`); + + await new Promise(resolve => setTimeout(resolve, initialDelay * attempt)); + + try { + await github.rest.actions.createWorkflowDispatch({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: 'run-on-runner.yml', + ref: context.ref, + inputs: { + runner_name: runnerName, + instance_type: '${{ matrix.instance_type }}', + enable_gpu: '${{ matrix.enable_gpu }}', + provers: '${{ inputs.provers }}', + programs: '${{ inputs.programs }}', + filename: '${{ inputs.filename }}_${{ matrix.instance_type }}', + trials: '${{ inputs.trials }}', + sp1_ref: '${{ inputs.sp1_ref }}', + additional_params: '${{ inputs.additional_params }}' + } + }); + console.log('Benchmark workflow triggered successfully'); + break; + } catch (error) { + console.log(`Failed to trigger workflow: ${error.message}`); + if (attempt === maxAttempts) { + core.setFailed('Failed to trigger benchmark workflow after multiple attempts'); + } + } + } + + - name: Wait for benchmark completion + uses: actions/github-script@v6 + with: + github-token: ${{ secrets.GH_PAT }} + script: | + const maxWaitTime = 3600000; // 1 hour in milliseconds + const checkInterval = 60000; // 1 minute in milliseconds + const startTime = Date.now(); + + while (true) { + const runs = await github.rest.actions.listWorkflowRuns({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: 'run-on-runner.yml', + status: 'in_progress' + }); + + if (runs.data.total_count === 0) { + console.log('Benchmark workflow completed'); + break; + } + + if (Date.now() - startTime > maxWaitTime) { + core.setFailed('Benchmark workflow did not complete within the maximum wait time'); + break; + } + + console.log('Waiting for benchmark to complete...'); + await new Promise(resolve => setTimeout(resolve, checkInterval)); + } + + - name: Stop EC2 runner + if: always() + uses: xJonathanLEI/ec2-github-runner@main + with: + mode: stop + github-token: ${{ secrets.GH_PAT }} + label: ${{ steps.start-ec2-runner.outputs.label }} + ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} \ No newline at end of file diff --git a/.github/workflows/adhoc.yml b/.github/workflows/adhoc.yml deleted file mode 100644 index 0123563..0000000 --- a/.github/workflows/adhoc.yml +++ /dev/null @@ -1,169 +0,0 @@ -name: Execute ZKVM-Perf - -on: - workflow_dispatch: - inputs: - ami_id: - description: 'AMI ID' - required: true - type: string - default: 'ami-079a6a210557ef0e4' - provers: - description: 'Provers to use (comma-separated)' - required: true - type: string - default: 'sp1,risc0' - programs: - description: 'Programs to benchmark (comma-separated, leave empty for all)' - required: false - type: string - filename: - description: 'Filename for the benchmark' - required: true - default: 'benchmark' - type: string - trials: - description: 'Number of trials to run' - required: true - default: '1' - type: number - hashfns: - description: 'Hash functions to use (comma-separated)' - required: true - type: string - default: 'poseidon' - shard_sizes: - description: 'Shard sizes to use (comma-separated)' - required: true - default: '22' - type: string - sp1_ref: - description: 'SP1 reference (commit hash or branch name)' - required: false - type: string - default: '2e8b0a8' - - pull_request: - branches: [main] - -jobs: - matrix_prep: - runs-on: ubuntu-latest - outputs: - matrix: ${{ steps.set-matrix.outputs.matrix }} - steps: - - id: set-matrix - run: | - echo "matrix=$(jq -c . << EOF - { - "instance_config": [ - {"type": "g6.16xlarge", "gpu": true}, - {"type": "r7i.16xlarge", "gpu": false} - ] - } - EOF - )" >> $GITHUB_OUTPUT - - start-runner: - needs: matrix_prep - strategy: - matrix: ${{fromJson(needs.matrix_prep.outputs.matrix)}} - name: Start Self-Hosted EC2 Runner - runs-on: ubuntu-latest - outputs: - label: ${{ steps.start-ec2-runner.outputs.label }} - ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} - - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ secrets.AWS_REGION }} - - - name: Start EC2 runner - id: "start-ec2-runner" - uses: "xJonathanLEI/ec2-github-runner@main" - with: - mode: start - github-token: ${{ secrets.GH_PAT }} - ec2-image-id: ${{ inputs.ami_id }} - ec2-instance-type: ${{ matrix.instance_config.type }} - subnet-id: ${{ secrets.AWS_SUBNET_ID }} - security-group-id: ${{ secrets.AWS_SG_ID }} - - perf: - needs: start-runner - name: Run ZKVM-Perf on ${{ matrix.instance_config.type }} - runs-on: ${{ needs.start-runner.outputs.label }} - env: - CARGO_NET_GIT_FETCH_WITH_CLI: "true" - steps: - - name: Checkout sources - uses: actions/checkout@v4 - - - name: rust-cache - uses: actions/cache@v3 - with: - path: | - ~/.cargo/bin/ - ~/.cargo/registry/index/ - ~/.cargo/registry/cache/ - ~/.cargo/git/db/ - target/ - ~/.rustup/ - key: rust-1.79.0-${{ hashFiles('**/Cargo.toml') }} - restore-keys: rust-1.79.0- - - - name: Update SP1 reference - run: | - chmod +x update_sp1_and_build.sh - SP1_REF=${{ inputs.sp1_ref }} RUN_BUILD=false ./update_sp1_and_build.sh - - - name: Run docker build - run: | - docker build -t zkvm-perf --platform linux/amd64 -f Dockerfile.gpu --build-arg SP1_REF=${{ inputs.sp1_ref }} . - - - name: Run Tests (docker) - run: | - docker run ${{ matrix.instance_config.gpu && '--gpus all' || '' }} --platform linux/amd64 \ - -v /var/run/docker.sock:/var/run/docker.sock \ - -v ./benchmarks:/usr/src/app/benchmarks \ - -e RUST_BACKTRACE=full \ - --network host \ - zkvm-perf \ - "python3 sweep.py --filename ${{ inputs.filename }} \ - --trials ${{ inputs.trials }} \ - ${{ inputs.programs && format('--programs {0}', inputs.programs) || '' }} \ - --provers ${{ inputs.provers }} \ - --hashfns ${{ inputs.hashfns }} \ - --shard-sizes ${{ inputs.shard_sizes }}" - - - name: Print Results - run: | - cat benchmarks/*.csv - - stop-runner: - name: Stop Self-Hosted EC2 Runner - needs: - - start-runner - - perf - runs-on: ubuntu-latest - if: always() - - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ secrets.AWS_REGION }} - - - name: Stop EC2 runner - uses: "xJonathanLEI/ec2-github-runner@main" - with: - mode: stop - github-token: ${{ secrets.GH_PAT }} - label: ${{ needs.start-runner.outputs.label }} - ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} \ No newline at end of file diff --git a/.github/workflows/run-on-runner.yml b/.github/workflows/run-on-runner.yml new file mode 100644 index 0000000..9ae9b1d --- /dev/null +++ b/.github/workflows/run-on-runner.yml @@ -0,0 +1,93 @@ +name: Run Benchmarks on EC2 Runner + +on: + workflow_dispatch: + inputs: + runner_name: + required: true + type: string + instance_type: + required: true + type: string + enable_gpu: + required: true + type: string + provers: + required: false + type: string + default: 'sp1' + programs: + required: false + type: string + default: 'loop,fibonacci,tendermint,reth1,reth2' + filename: + required: false + type: string + default: 'benchmark' + trials: + required: false + type: string + default: '1' + sp1_ref: + required: false + type: string + default: '2e8b0a8' + additional_params: + required: false + type: string + default: '{"hashfns":"poseidon","shard_sizes":"22"}' + +jobs: + run-benchmark: + runs-on: ${{ inputs.runner_name }} + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + - name: Set up Docker + uses: docker/setup-buildx-action@v1 + + - name: Parse additional parameters + id: parse-params + run: | + echo 'ADDITIONAL_PARAMS<> $GITHUB_ENV + echo '${{ inputs.additional_params }}' >> $GITHUB_ENV + echo 'EOF' >> $GITHUB_ENV + + - name: Update SP1 and build + env: + SP1_REF: ${{ inputs.sp1_ref }} + RUN_BUILD: "true" + run: | + chmod +x update_sp1_and_build.sh + ./update_sp1_and_build.sh + + - name: Build Docker image + run: | + docker build -t zkvm-perf --platform linux/amd64 -f Dockerfile.gpu . + + - name: Run benchmark + run: | + docker run ${{ inputs.enable_gpu == 'true' && '--gpus all' || '' }} --platform linux/amd64 \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -v ${{ github.workspace }}/benchmarks:/usr/src/app/benchmarks \ + -e RUST_BACKTRACE=full \ + --network host \ + zkvm-perf \ + python3 sweep.py \ + --filename ${{ inputs.filename }} \ + --trials ${{ inputs.trials }} \ + --programs ${{ inputs.programs }} \ + --provers ${{ inputs.provers }} \ + --hashfns ${{ fromJson(env.ADDITIONAL_PARAMS).hashfns }} \ + --shard-sizes ${{ fromJson(env.ADDITIONAL_PARAMS).shard_sizes }} + + - name: Upload benchmark results + uses: actions/upload-artifact@v2 + with: + name: benchmark-results-${{ inputs.instance_type }} + path: ${{ github.workspace }}/benchmarks/*.csv + + - name: Print Results + run: | + cat ${{ github.workspace }}/benchmarks/*.csv \ No newline at end of file