diff --git a/.github/actions/push/action.yaml b/.github/actions/push/action.yaml new file mode 100644 index 00000000..1811ef4c --- /dev/null +++ b/.github/actions/push/action.yaml @@ -0,0 +1,30 @@ +# copied from internal repo https://github.com/allenai/docker-images +name: Push image to Beaker +description: Push image to Beaker images. +inputs: + image: + description: The tag of the built image + required: true + beaker: + description: Upload release images to Beaker under this name + required: false + default: "" + latest: + description: If the image should also be pushed as latest + required: false + default: false +runs: + using: composite + # note, name used for this is associated with the beaker key in repo secrets + steps: + - shell: bash + if: inputs.beaker != '' # previously startsWith(github.ref, 'refs/tags/') && ... + run: | + # Push release to Beaker. + SHORT_SHA=$(git rev-parse --short HEAD) + beaker image create --name "${{ inputs.beaker }}-${SHORT_SHA}-${{ github.run_id }}" ${{ inputs.image }} + # We delete the previous version. This doesn't actually delete the backing Docker image, so + # we'll still benefit from layer caching when we push new version. The image might not exist + # yet, so it's ok if this fails. + beaker image delete nathanl/${{ inputs.beaker }} || true + beaker image create --name ${{ inputs.beaker }} ${{ inputs.image }} \ No newline at end of file diff --git a/.github/actions/setup/action.yaml b/.github/actions/setup/action.yaml new file mode 100644 index 00000000..957b5bb6 --- /dev/null +++ b/.github/actions/setup/action.yaml @@ -0,0 +1,25 @@ +# copied from internal repo https://github.com/allenai/docker-images +name: Build setup +description: Setup the environment for a build +inputs: + beaker_token: + description: The Beaker user token + required: true + # ghcr_token: + # description: GHCR token + # required: true + # ghcr_user: + # description: GHCR user name + # required: true +runs: + using: composite + steps: + - uses: allenai/setup-beaker@v2 + with: + token: ${{ inputs.beaker_token }} + workspace: ai2/oe-adapt + + # - shell: bash + # run: | + # # Login to GitHub container registry. + # echo ${{ inputs.ghcr_token }} | docker login ghcr.io -u ${{ inputs.ghcr_user }} --password-stdin \ No newline at end of file diff --git a/.github/workflows/push-image.yml b/.github/workflows/push-image.yml new file mode 100644 index 00000000..c12cd4d8 --- /dev/null +++ b/.github/workflows/push-image.yml @@ -0,0 +1,75 @@ +# This is an example workflow file. +# +# When you add a new image, copy this file and then change all mentions of "hello-world" with +# the name of your new image. +# +# Read through the rest of the comments in this file to figure out how it works, and what else +# you need to change. +name: build_beaker_image + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +on: + push: + # Run this workflow anytime a push updates one of the files in the image's directory + # (other than the README), and anytime there's a new release tag for this image. + paths: + - 'rewardbench/**' + - 'scripts/**' + - 'Dockerfile' + - 'setup.py' + branches: [main] +# pull_request: # note, comment this out for running on every push +# # Also run on PRs that update the files in the image's directory (other than README). +# branches: [main] +# paths: +# - 'rewardbench/**' +# - 'scripts/**' +# - 'Dockerfile' +# - 'setup.py' +# workflow_dispatch: # This allows us to manually trigger a build through the GitHub UI. + +env: + DOCKER_BUILDKIT: "1" + +jobs: + build: + name: rewardbench + runs-on: ubuntu-latest + timeout-minutes: 60 + if: (github.event_name != 'workflow_run') || (github.event.workflow_run.conclusion == 'success') + steps: + - uses: actions/checkout@v3 + + - name: Setup environment + uses: ./.github/actions/setup + with: + beaker_token: ${{ secrets.BEAKER_TOKEN }} + + - name: Verify Beaker install + run: | + beaker account whoami + + # big images fail, trying this + - name: Delete huge unnecessary tools folder + run: rm -rf /opt/hostedtoolcache /usr/share/dotnet "$AGENT_TOOLSDIRECTORY" + + - name: Build image + run: | + docker build \ + -t rewardbench . --platform linux/amd64 + + + - name: Check image + run: | + docker run --rm rewardbench + + - name: Push image + # if: github.event_name != 'pull_request' + uses: ./.github/actions/push + with: + image: rewardbench # this is the tag of the image we just built in the previous step + beaker: rewardbench_auto # this is the name of the image on Beaker + latest: true # this flag says we should also push this as the 'latest' version to GHCR \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index d5e9b721..cdf1742e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,11 +1,48 @@ -# This dockerfile is forked from ai2/cuda11.8-cudnn8-dev-ubuntu20.04 -# To get the latest id, run `beaker image pull ai2/cuda11.8-cudnn8-dev-ubuntu20.04` -# and then `docker image list`, to verify docker image is pulled -# e.g. `Image is up to date for gcr.io/ai2-beaker-core/public/cncl3kcetc4q9nvqumrg:latest` -FROM gcr.io/ai2-beaker-core/public/cq29hmn3sck728v1o7d0:latest +# Use public Nvidia images (rather than Beaker), for reproducibility +FROM --platform=linux/amd64 nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04 RUN apt update && apt install -y openjdk-8-jre-headless +ARG DEBIAN_FRONTEND="noninteractive" +ENV TZ="America/Los_Angeles" + +# Install base tools. +RUN apt-get update && apt-get install -y \ + build-essential \ + curl \ + git \ + jq \ + language-pack-en \ + make \ + sudo \ + unzip \ + vim \ + wget \ + parallel \ + iputils-ping \ + tmux + +# This ensures the dynamic linker (or NVIDIA's container runtime, I'm not sure) +# puts the right NVIDIA things in the right place (that THOR requires). +ENV NVIDIA_DRIVER_CAPABILITIES=graphics,utility,compute + +# Install conda. We give anyone in the users group the ability to run +# conda commands and install packages in the base (default) environment. +# Things installed into the default environment won't persist, but we prefer +# convenience in this case and try to make sure the user is aware of this +# with a message that's printed when the session starts. +RUN wget https://repo.anaconda.com/miniconda/Miniconda3-py310_23.1.0-1-Linux-x86_64.sh \ + && echo "32d73e1bc33fda089d7cd9ef4c1be542616bd8e437d1f77afeeaf7afdb019787 Miniconda3-py310_23.1.0-1-Linux-x86_64.sh" \ + | sha256sum --check \ + && bash Miniconda3-py310_23.1.0-1-Linux-x86_64.sh -b -p /opt/miniconda3 \ + && rm Miniconda3-py310_23.1.0-1-Linux-x86_64.sh + +ENV PATH=/opt/miniconda3/bin:/opt/miniconda3/condabin:$PATH +ENV LD_LIBRARY_PATH=/usr/local/cuda/lib:/usr/local/cuda/lib64:$LD_LIBRARY_PATH + +# Ensure users can modify their container environment. +RUN echo '%users ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers + RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash RUN apt-get -y install git-lfs @@ -23,10 +60,10 @@ RUN pip install -e . RUN chmod +x scripts/* # this is just very slow -RUN pip install flash-attn==2.5.0 --no-build-isolation +# RUN pip install flash-attn==2.5.0 --no-build-isolation # for olmo-instruct v1, weird install requirements -RUN pip install ai2-olmo +# RUN pip install ai2-olmo # for better-pairRM RUN pip install jinja2 diff --git a/README.md b/README.md index 174ee70c..93497c76 100644 --- a/README.md +++ b/README.md @@ -234,20 +234,10 @@ Notes: Do not use the character - in image names for beaker, When updating the `Dockerfile`, make sure to see the instructions at the top to update the base cuda version. -In development, we have the following docker images (most recent first as it's likely what you need). -TODO: Update it so one image has VLLM (for generative RM only) and one without. Without will load much faster. +We recently switched to automatic beaker image building workflows. +You can use this image, or the last image with the previous Dockerfile +- `nathanl/rewardbench_auto`: Automatic image [here](https://beaker.org/im/01J60RQ6Y1KGNAD0NEPK01K03T/details). - `nathanl/rb_v23`, Jul. 2024: Include support for bfloat16 models from command line -- `nathanl/rb_v22`, Jul. 2024: Include new [Generalizable Reward Model](https://arxiv.org/abs/2406.10216) -- `nathanl/rb_v20`: Fixes to DPO handling (minor) + llama 3 not quantized for dpo -- `nathanl/rb_v18`: Improvements to RewardBench CLI -- `nathanl/rb_v17` (with VLLM): add support for vllm + llm as a judge, `rb_v16` is similar without prometheus and some OpenAI models -- `nathanl/rb_v12`: add support for llama3 -- `nathanl/rewardbench_v10`: add support for `mightbe/Better-PairRM` via jinja2 -- `nathanl/rewardbench_v8`: add support for `openbmb/Eurus-RM-7b` and starcoder2 -- `nathanl/rewardbench_v5`: improve saving with DPO script -- `nathanl/rewardbench_v4`: fix EOS token bug on FastChat models (GH #90) -- `nathanl/rewardbench_v2`: fix beaver cost model -- `nathanl/rewardbench_v1`: release version ## Citation Please cite our work with the following: