diff --git a/README.md b/README.md index 81a9c976..88ade192 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,7 @@ python scripts/run_rm.py --model=berkeley-nest/Starling-RM-7B-alpha --tokenizer= python scripts/run_rm.py --model=stanfordnlp/SteamSHP-flan-t5-xl --direct_load --batch_size=32 python scripts/run_rm.py --model=PKU-Alignment/beaver-7b-v1.0-reward --chat_template=pku-align --direct_load --batch_size=16 python scripts/run_rm.py --model=PKU-Alignment/beaver-7b-v1.0-cost --chat_template=pku-align --direct_load --batch_size=16 +python scripts/run_rm.py --model=IDEA-CCNL/Ziya-LLaMA-7B-Reward --batch_size=32 --direct_load --trust_remote_code --chat_template=Ziya # custom code causing cuda issues ``` And for DPO: diff --git a/herm/chattemplates.py b/herm/chattemplates.py new file mode 100644 index 00000000..12b71b2d --- /dev/null +++ b/herm/chattemplates.py @@ -0,0 +1,15 @@ +# Copyright 2023 AllenAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Added chat templates for models (when they have examples) +# TODO add as needed diff --git a/herm/models/beaver.py b/herm/models/beaver.py index 0b15daa5..bc224740 100644 --- a/herm/models/beaver.py +++ b/herm/models/beaver.py @@ -491,4 +491,4 @@ def __call__(self, samples, **kwargs): ).to("cuda") with torch.no_grad(): outputs = self.model(**inputs) - return outputs.scores + return outputs.end_scores diff --git a/herm/models/ziya.py b/herm/models/ziya.py new file mode 100644 index 00000000..8226ede0 --- /dev/null +++ b/herm/models/ziya.py @@ -0,0 +1,55 @@ +# Copyright 2023 AllenAI. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +from fastchat.conversation import Conversation, SeparatorStyle, register_conv_template + +# e.g. https://huggingface.co/IDEA-CCNL/Ziya-LLaMA-7B-Reward#usage +# prefix_user = "Human:" +# prefix_bot = "\n\nAssistant:" +# query = "列举一种空气污染。" +# response = "一种常见的空气污染源是化石燃料的燃烧产生的尾气排放,包括来自汽车、卡车、飞机、 +# 火车和工业厂房的废气排放。这会导致大气中的二氧化硫、氮氧化物、一氧化碳、臭氧和颗粒物(例如灰尘和烟雾)等污染物含量增加,对人类健康和环境造成不利影响。" +register_conv_template( + Conversation( + name="Ziya", + roles=("Human", "Assistant"), + sep_style=SeparatorStyle.ADD_COLON_SPACE_SINGLE, + sep="\n\n", + ) +) + + +# pipeline because custom model returns reward directly compared to other models +class ZiyaPipeline: + def __init__(self, task, model, tokenizer): + self.task = task + self.model = model.eval().half().cuda() + self.tokenizer = tokenizer + + def __call__(self, query, **kwargs): + _ = kwargs.get("batch_size", 1) + truncation = kwargs.get("truncation", True) + padding = kwargs.get("padding", True) + max_length = kwargs.get("max_length", 2048) + inputs = self.tokenizer( + query, + truncation=truncation, + max_length=max_length, + padding=padding, + return_tensors="pt", + ).to("cuda") + with torch.no_grad(): + reward = self.model(**inputs) + return reward diff --git a/scripts/run_rm.py b/scripts/run_rm.py index 187ccdc9..665e0109 100644 --- a/scripts/run_rm.py +++ b/scripts/run_rm.py @@ -54,7 +54,12 @@ def get_args(): "--tokenizer", type=str, default=None, help="path to non-matching tokenizer, requires --direct_load" ) parser.add_argument("--chat_template", type=str, default="tulu", help="path to chat template") - parser.add_argument("--direct_load", action="store_true", help="directly load model instead of pipeline") + parser.add_argument( + "--direct_load", action="store_true", default=False, help="directly load model instead of pipeline" + ) + parser.add_argument( + "--trust_remote_code", action="store_true", default=False, help="directly load model instead of pipeline" + ) parser.add_argument("--do_not_save", action="store_true", help="do not save results to hub (for debugging)") parser.add_argument("--batch_size", type=int, default=64, help="batch size for inference") parser.add_argument( @@ -102,10 +107,18 @@ def main(): model_builder = LlamaForScore.from_pretrained pipeline_builder = BeaverPipeline + elif "Ziya" in args.model or "Ziya" in args.chat_template: + from herm.models.ziya import ZiyaPipeline + + model_builder = AutoModelForSequenceClassification.from_pretrained + pipeline_builder = ZiyaPipeline + quantized = False # handled by .half() in the custom pipeline, as in model card else: model_builder = AutoModelForSequenceClassification.from_pretrained pipeline_builder = pipeline + trust_remote_code = args.trust_remote_code + ############### # Setup logging ############### @@ -168,8 +181,8 @@ def main(): model_kwargs = {"device_map": {"": current_device}} # TODO remove direct load logic # if pipeline_builder is pipeline, use built in pipeline, else custom - if args.direct_load: - model = model_builder(args.model, **model_kwargs) + if args.direct_load or not pipeline_builder == pipeline: + model = model_builder(args.model, **model_kwargs, trust_remote_code=trust_remote_code) tokenizer = AutoTokenizer.from_pretrained(tokenizer_path) reward_pipe = pipeline_builder( "text-classification", @@ -183,6 +196,7 @@ def main(): tokenizer=tokenizer, revision="main", model_kwargs=model_kwargs, + trust_remote_code=trust_remote_code, ) ############################