allenai · natolambert · Jun 9, 2024 · Jun 8, 2024 · Jun 8, 2024 · Jun 8, 2024
diff --git a/Dockerfile b/Dockerfile
@@ -3,7 +3,7 @@
 # To get the latest id, run `beaker image pull ai2/cuda11.8-cudnn8-dev-ubuntu20.04` 
 # and then `docker image list`, to verify docker image is pulled
 # e.g. `Image is up to date for gcr.io/ai2-beaker-core/public/cncl3kcetc4q9nvqumrg:latest`
-FROM gcr.io/ai2-beaker-core/public/cp3ript9a0gcrm4lmha0:latest
+FROM gcr.io/ai2-beaker-core/public/cph14t4n343pipine0i0:latest
 
 RUN apt update && apt install -y openjdk-8-jre-headless
 

diff --git a/README.md b/README.md
@@ -9,6 +9,14 @@
 </p>
   <img width="1280" alt="Github RewardBench Logo" src="https://github.com/allenai/reward-bench/assets/10695622/39b213ba-9971-4338-b5f9-8e042d22d8fc" style="margin-left:'auto' margin-right:'auto' display:'block' "/>
 </div>
+<p align="center">
+  <a href="https://github.com/allenai/reward-bench/blob/main/LICENSE">
+    <img alt="GitHub License" src="https://img.shields.io/github/license/allenai/reward-bench">
+  </a>
+  <a href="https://pypi.org/project/rewardbench/">
+    <img alt="PyPI" src="https://img.shields.io/pypi/v/rewardbench">
+  </a>
+</p>
 
 
 ---
@@ -228,6 +236,7 @@ When updating the `Dockerfile`, make sure to see the instructions at the top to
 
 In development, we have the following docker images (most recent first as it's likely what you need).
 TODO: Update it so one image has VLLM (for generative RM only) and one without. Without will load much faster.
+- `nathanl/rb_v18`: Improvements to RewardBench CLI
 - `nathanl/rb_v17` (with VLLM): add support for vllm + llm as a judge, `rb_v16` is similar without prometheus and some OpenAI models
 - `nathanl/rb_v12`: add support for llama3
 - `nathanl/rewardbench_v10`: add support for `mightbe/Better-PairRM` via jinja2

diff --git a/rewardbench/__init__.py b/rewardbench/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "0.1.2"
+__version__ = "0.1.3"
 from .chattemplates import *  # noqa
 from .dpo import DPOInference
 from .models import DPO_MODEL_CONFIG, REWARD_MODEL_CONFIG

diff --git a/rewardbench/models/__init__.py b/rewardbench/models/__init__.py
@@ -125,6 +125,20 @@
         "custom_dialogue": False,
         "model_type": "Seq. Classifier",
     },
+    "PKU-Alignment/beaver-7b-v2.0-reward": {
+        "model_builder": LlamaForScore.from_pretrained,
+        "pipeline_builder": BeaverPipeline,
+        "quantized": True,
+        "custom_dialogue": False,
+        "model_type": "Seq. Classifier",
+    },
+    "PKU-Alignment/beaver-7b-v2.0-cost": {
+        "model_builder": LlamaForScore.from_pretrained,
+        "pipeline_builder": BeaverCostPipeline,
+        "quantized": True,
+        "custom_dialogue": False,
+        "model_type": "Seq. Classifier",
+    },
     "RLHFlow/pair-preference-model-LLaMA3-8B": {
         "model_builder": AutoModelForCausalLM.from_pretrained,
         "pipeline_builder": SlicPairPMPipeline,

diff --git a/scripts/submit_eval_jobs.py b/scripts/submit_eval_jobs.py
@@ -28,7 +28,7 @@
     "--eval_on_pref_sets", action="store_true", default=False, help="Evaluate on preference sets rather than core set"
 )
 argparser.add_argument("--eval_on_bon", action="store_true", default=False, help="Evaluate on BON preference sets")
-argparser.add_argument("--image", type=str, default="nathanl/rb_v16", help="Beaker image to use")
+argparser.add_argument("--image", type=str, default="nathanl/rb_v18", help="Beaker image to use")
 argparser.add_argument("--cluster", type=str, default="ai2/allennlp-cirrascale", help="Beaker cluster to use")
 argparser.add_argument("--priority", type=str, default="normal", help="Priority of the job")
 argparser.add_argument("--upload_to_hub", action="store_false", default=True, help="Upload to results to HF hub")

diff --git a/setup.py b/setup.py
@@ -19,7 +19,7 @@
 # this has not yet been pushed to pypyi-test
 setup(
     name="rewardbench",
-    version="0.1.2",
+    version="0.1.3",  # do not import from init, or we get a weird build error
     author="Nathan Lambert",
     author_email="[email protected]",
     description="Tools for evaluating reward models",