Skip to content

Commit

Permalink
Merge branch 'main' into bump_cuda
Browse files Browse the repository at this point in the history
  • Loading branch information
natolambert committed Sep 26, 2024
2 parents 11b0af8 + 011dfbd commit b96085a
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 2 deletions.
3 changes: 2 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,13 @@ RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.d
RUN apt-get -y install git-lfs

WORKDIR /stage/
ENV HF_HUB_ENABLE_HF_TRANSFER=1

RUN pip install --upgrade pip setuptools wheel
# designed for cuda 12.1
RUN pip3 install torch torchvision torchaudio
# If you need to use cuda 11.8, use this and the below vllm code for installing with cuda 11.8
# RUN pip install torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1 --index-url https://download.pytorch.org/whl/cu118
# RUN pip install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --index-url https://download.pytorch.org/whl/cu118
# Install vLLM with CUDA 11.8.
# RUN export VLLM_VERSION=0.6.1.post1
# RUN export PYTHON_VERSION=310
Expand Down
62 changes: 62 additions & 0 deletions rewardbench/rewardbench.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import wandb
from accelerate import Accelerator
from accelerate.logging import get_logger
from huggingface_hub import EvalResult, ModelCard, ModelCardData
from tqdm import tqdm
from transformers import AutoTokenizer, HfArgumentParser

Expand Down Expand Up @@ -61,6 +62,8 @@ class Args:
# wandb args
wandb_run: Optional[str] = None
"""The wandb run to extract model and revision from."""
upload_metadata_to_hf: bool = False
"""Upload metadata to Hugging Face Hub."""

# inference args
batch_size: int = 8
Expand Down Expand Up @@ -424,6 +427,65 @@ def actual_main(args: Args):
for chosen, rejected in zip(scores_chosen, scores_rejected):
f.write(json.dumps({"chosen": chosen, "rejected": rejected}) + "\n")

############################
# Upload metadata to Hugging Face Hub
############################
if args.upload_metadata_to_hf:
logger.info("*** Uploading metadata to Hugging Face Hub ***")
try:
# Initialize ModelCardData with basic metadata
card_data = ModelCardData(
language="en",
model_name=args.model,
eval_results=[
EvalResult(
task_type="preference_evaluation",
dataset_type=args.dataset,
dataset_name=args.dataset.split("/")[-1], # Assuming dataset ID is like 'owner/dataset'
metric_type="accuracy",
metric_value=accuracy,
)
],
)

# If there are extra results (per subset), add them as separate EvalResults
if args.dataset == "allenai/reward-bench" and results_grouped:
for section, section_accuracy in results_section.items():
print(f"Adding section {section} with accuracy {section_accuracy}")
section_eval = EvalResult(
task_type="preference_evaluation",
dataset_type=section.replace(" ", "_"),
dataset_name=section,
metric_type="accuracy",
metric_value=section_accuracy,
)
card_data.eval_results.append(section_eval)

for subset, subset_accuracy in results_grouped.items():
print(f"Adding subset {subset} with accuracy {subset_accuracy}")
subset_eval = EvalResult(
task_type="preference_evaluation",
dataset_type=subset,
dataset_name=subset,
metric_type="accuracy",
metric_value=subset_accuracy,
)
card_data.eval_results.append(subset_eval)

# Create a ModelCard
card = ModelCard.from_template(
card_data,
model_id=args.model,
)

# Push the updated ModelCard to the Hugging Face Hub
card.push_to_hub(
args.model, revision=args.revision, commit_message="Update evaluation results via RewardBench"
)
logger.info(f"Successfully pushed updated ModelCard to Hugging Face Hub for {args.model}")
except Exception as e:
logger.error(f"Failed to upload metadata to Hugging Face Hub: {e}")


if __name__ == "__main__":
main()
15 changes: 14 additions & 1 deletion scripts/run_generative.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ def get_args():
"--trust_remote_code", action="store_true", default=False, help="directly load model instead of pipeline"
)
parser.add_argument("--num_gpus", type=int, default=1, help="number of gpus to use, for multi-node vllm")
parser.add_arugment("--vllm_gpu_util", type=float, default=0.9, help="gpu utilization for vllm")
parser.add_argument("--vllm_max_seq_length", type=int, default=None, help="max sequence length for vllm")
parser.add_argument("--do_not_save", action="store_true", help="do not save results to hub (for debugging)")
parser.add_argument(
"--pref_sets", action="store_true", help="run on common preference sets instead of our custom eval set"
Expand Down Expand Up @@ -124,8 +126,19 @@ def main():

# if model isn't API, load via vllm
if not is_api_models:
# if multi gpu, set multiproc method to spawn
if args.num_gpus > 1:
# Set the environment variable
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"

# load model
model = LLM(args.model, trust_remote_code=args.trust_remote_code, tensor_parallel_size=args.num_gpus)
model = LLM(
args.model,
trust_remote_code=args.trust_remote_code,
tensor_parallel_size=args.num_gpus,
gpu_memory_utilization=args.vllm_gpu_util,
max_seq_length=args.vllm_max_seq_length,
)
tokenizer = AutoTokenizer.from_pretrained(args.model)
if "Llama-3" in args.model or "llama3-8b" in args.model and "3.1" not in args.model:
stop_token_ids = [128009]
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
"flake8>=6.0",
"fschat",
"huggingface_hub",
"hf_transfer",
"isort>=5.12.0",
"pandas",
"peft",
Expand Down

0 comments on commit b96085a

Please sign in to comment.