From ce3f1465fa6fb87e7641fcc4a8590488bf60d1ee Mon Sep 17 00:00:00 2001
From: ljvmiranda921 <ljvmiranda@gmail.com>
Date: Tue, 7 Jan 2025 15:02:10 -0800
Subject: [PATCH] Add Dockerfile and convert script for llama31

---
 evals/convert.Dockerfile             |  3 ++-
 evals/convert_to_hf.py               |  7 ++++-
 evals/templates/template-llama31.yml | 39 ++++++++++++++++++++++++++++
 3 files changed, 47 insertions(+), 2 deletions(-)
 create mode 100644 evals/templates/template-llama31.yml

diff --git a/evals/convert.Dockerfile b/evals/convert.Dockerfile
index 4b7dfd8..04e2751 100644
--- a/evals/convert.Dockerfile
+++ b/evals/convert.Dockerfile
@@ -54,7 +54,8 @@ ENV PATH="/usr/local/google-cloud-sdk/bin:${PATH}"
 
 # Clone EasyLM repository
 RUN git clone https://github.com/hamishivi/EasyLM.git  . && \
-    git checkout bc241782b67bbe926e148ec9d2046d76b7ba58c8
+    # git checkout bc241782b67bbe926e148ec9d2046d76b7ba58c8
+    git checkout dbf2212c1775b2762f7108d62c8c8b01b52ea4aa
 
 COPY ai2-allennlp-79f5e3a8e95a.json /root/.config/gcloud/application_default_credentials.json
 # Set environment variable for Google Cloud
diff --git a/evals/convert_to_hf.py b/evals/convert_to_hf.py
index a3a5c54..a892eca 100644
--- a/evals/convert_to_hf.py
+++ b/evals/convert_to_hf.py
@@ -125,7 +125,12 @@ def main():
         )
         pytorch_dir = Path(args.pytorch_dir)
         for params_path in params_paths:
-            experiment_name = params_path.parent.stem.split("--")[0]
+            if "llama" in str(params_path):
+                experiment_name = (
+                    str(params_path.parent).split("--")[0].replace(".", "-")
+                )
+            else:
+                experiment_name = params_path.parent.stem.split("--")[0]
             if args.prefix:
                 experiment_name = f"{args.prefix}-{experiment_name}"
             output_dir = pytorch_dir / experiment_name
diff --git a/evals/templates/template-llama31.yml b/evals/templates/template-llama31.yml
new file mode 100644
index 0000000..530a2e4
--- /dev/null
+++ b/evals/templates/template-llama31.yml
@@ -0,0 +1,39 @@
+version: v2
+budget: ai2/oe-adapt
+description: "Convert model to pytorch and launch a rewardbench eval job"
+tasks:
+  - name: template
+    image:
+      beaker: ljm/easylm-convert-llama
+    command: ["python", "convert_to_hf.py"]
+    arguments:
+      - --gcs_bucket
+      - ljm-dev
+      - --batch_size
+      - 1
+      - --tokenizer_path
+      - meta-llama/Llama-3.1-8B
+      - --model_size
+      - 8b31
+    result:
+      path: /output
+    resources:
+      gpuCount: 1
+    context:
+      priority: normal
+      preemptible: true
+    constraints:
+      cluster:
+        - ai2/allennlp-cirrascale
+        - ai2/jupiter-cirrascale-2
+    envVars:
+      - name: OPENAI_API_KEY
+        secret: OPENAI_API_KEY
+      - name: GOOGLE_SERVICE_ACCOUNT
+        secret: GOOGLE_SERVICE_ACCOUNT
+      - name: BEAKER_TOKEN
+        secret: BEAKER_TOKEN
+      - name: TOKENIZERS_PARALLELISM
+        value: "false"
+      - name: HF_TOKEN
+        secret: HF_TOKEN