Skip to content

Commit

Permalink
visualizing multiple rewards
Browse files Browse the repository at this point in the history
  • Loading branch information
Khyathi Chandu committed Feb 15, 2024
1 parent 84c0a9b commit 9b41f4e
Showing 1 changed file with 200 additions and 154 deletions.
354 changes: 200 additions & 154 deletions analysis/per_token_reward.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@

import argparse
import logging
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import sys

import torch
Expand All @@ -37,7 +40,7 @@ def get_args():
Parse arguments strings model and chat_template
"""
parser = argparse.ArgumentParser()
parser.add_argument("--model", type=str, default="natolambert/gpt2-dummy-rm", help="path to model")
parser.add_argument("--model", type=str, nargs='*', default="natolambert/gpt2-dummy-rm", help="path to model")
parser.add_argument(
"--tokenizer", type=str, default=None, help="path to non-matching tokenizer, requires --direct_load"
)
Expand All @@ -53,46 +56,54 @@ def get_args():
raise ValueError("PairRM and SHP require pairwise inputs, not supported")
return args

def visualize_rewards(models, tokens_list_all, rewards_list_all):
print ("HERE")
all_scores = np.concatenate(rewards_list_all)
vmin = np.min(all_scores)
vmax = np.max(all_scores)
# Create subplots with shared y-axis
num_tokens_lists = len(tokens_list_all)
fig, axs = plt.subplots(nrows=num_tokens_lists, figsize=(10, 2 * num_tokens_lists), sharey=True)

# Create a single color bar for both subplots
cbar_ax = fig.add_axes([0.92, 0.15, 0.02, 0.7]) # [left, bottom, width, height]

for i in range(num_tokens_lists):
heatmap = sns.heatmap(np.array([rewards_list_all[i]]), cmap='viridis', annot=True, fmt='g',
xticklabels=tokens_list_all[i], yticklabels=False, ax=axs[i], vmin=vmin, vmax=vmax, cbar_ax=cbar_ax)

# Set labels and title
axs[i].set_xlabel(f'{models[i]}')
axs[i].set_ylabel('Scores')
fig.subplots_adjust(hspace=0.4)
title_string = f'Visualizing sub-string level rewards (note: cumulative score until that token)'
fig.suptitle(f'{title_string}', fontsize=10)
plt.savefig("per-token_reward_vis.png")


def tokenify_string(string, tokenizer):
# Tokenize the entire text
tokens = tokenizer.tokenize(string)

cumulative_texts = []
non_cumulative_texts = []
# Iterate over each token
for i, _ in enumerate(tokens):
# Append the current cumulative text to the list
cumulative_texts.append(tokenizer.convert_tokens_to_string(tokens[: i + 1]))
non_cumulative_texts.append(tokenizer.convert_tokens_to_string([tokens[i]]))

return non_cumulative_texts

def main():
args = get_args()
quantized = True # only Starling isn't quantized for now
custom_dialogue = False
# some models need custom code to be run
if "oasst" in args.model or "oasst" in args.chat_template:
from herm.models import openassistant # noqa

model_builder = AutoModelForSequenceClassification.from_pretrained
pipeline_builder = pipeline
elif "Starling" in args.model or "Starling" in args.chat_template:
from herm.models.starling import StarlingPipeline, build_starling_rm

model_builder = build_starling_rm
pipeline_builder = StarlingPipeline
quantized = False
elif "openbmb" in args.model or "openbmb" in args.chat_template:
from herm.models.openbmb import LlamaRewardModel, OpenBMBPipeline

model_builder = LlamaRewardModel.from_pretrained
pipeline_builder = OpenBMBPipeline
elif "PairRM" in args.model or "PairRM" in args.chat_template:
from herm.models.pairrm import DebertaV2PairRM, PairRMPipeline

custom_dialogue = True
model_builder = DebertaV2PairRM.from_pretrained
pipeline_builder = PairRMPipeline
elif "SHP" in args.model or "SHP" in args.chat_template:
from herm.models.shp import SHPPipeline

custom_dialogue = True
model_builder = T5ForConditionalGeneration.from_pretrained
pipeline_builder = SHPPipeline
else:
model_builder = AutoModelForSequenceClassification.from_pretrained
pipeline_builder = pipeline

if custom_dialogue:
raise ValueError("Custom dialogue formatting not yet supported in this script")

models = args.model
# print (models)
# exit(1)
models = ["OpenAssistant/reward-model-deberta-v3-large-v2", "PKU-Alignment/beaver-7b-v1.0-cost", "IDEA-CCNL/Ziya-LLaMA-7B-Reward"]

###############
# Setup logging
Expand All @@ -112,125 +123,160 @@ def main():
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()

logger.info(f"Running reward model on {args.model} with chat template {args.chat_template}")

############################
# Load reward model pipeline
############################
tokenizer_path = args.tokenizer if args.tokenizer else args.model
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)

BATCH_SIZE = args.batch_size
logger.info("*** Load reward model ***")
reward_pipeline_kwargs = {
"batch_size": BATCH_SIZE, # eval_args.inference_batch_size,
"truncation": True,
"padding": True,
"max_length": 2048,
"function_to_apply": "none", # Compute raw logits
"return_token_type_ids": False,
}
if quantized:
model_kwargs = {
"load_in_8bit": True,
"device_map": {"": current_device},
"torch_dtype": torch.float16 if torch.cuda.is_available() else None,
}
else:
model_kwargs = {"device_map": {"": current_device}}
# TODO remove direct load logic
# if pipeline_builder is pipeline, use built in pipeline, else custom
if not pipeline == pipeline_builder:
model = model_builder(args.model, **model_kwargs)
tokens_list_all, rewards_list_all = list(), list()

for model in models:
# some models need custom code to be run
if "oasst" in model or "oasst" in args.chat_template:
from herm.models import openassistant # noqa

model_builder = AutoModelForSequenceClassification.from_pretrained
pipeline_builder = pipeline
elif "Starling" in model or "Starling" in args.chat_template:
from herm.models.starling import StarlingPipeline, build_starling_rm

model_builder = build_starling_rm
pipeline_builder = StarlingPipeline
quantized = False
elif "openbmb" in model or "openbmb" in args.chat_template:
from herm.models.openbmb import LlamaRewardModel, OpenBMBPipeline

model_builder = LlamaRewardModel.from_pretrained
pipeline_builder = OpenBMBPipeline
elif "PairRM" in model or "PairRM" in args.chat_template:
from herm.models.pairrm import DebertaV2PairRM, PairRMPipeline

custom_dialogue = True
model_builder = DebertaV2PairRM.from_pretrained
pipeline_builder = PairRMPipeline
elif "SHP" in model or "SHP" in args.chat_template:
from herm.models.shp import SHPPipeline

custom_dialogue = True
model_builder = T5ForConditionalGeneration.from_pretrained
pipeline_builder = SHPPipeline
else:
model_builder = AutoModelForSequenceClassification.from_pretrained
pipeline_builder = pipeline

if custom_dialogue:
raise ValueError("Custom dialogue formatting not yet supported in this script")

logger.info(f"Running reward model on {model} with chat template {args.chat_template}")

############################
# Load reward model pipeline
############################
tokenizer_path = args.tokenizer if args.tokenizer else model
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
reward_pipe = pipeline_builder(
"text-classification",
model=model,
tokenizer=tokenizer,
)
else:
reward_pipe = pipeline(
"text-classification",
model=args.model,
tokenizer=tokenizer,
revision="main",
model_kwargs=model_kwargs,
)

############################
# Tokenization settings & dataset preparation
############################
# set pad token to eos token if not set
if reward_pipe.tokenizer.pad_token_id is None:
reward_pipe.model.config.pad_token_id = reward_pipe.tokenizer.eos_token_id
reward_pipe.tokenizer.pad_token_id = reward_pipe.tokenizer.eos_token_id

def tokenify_string(string, tokenizer):
# Tokenize the entire text
tokens = tokenizer.tokenize(string)

cumulative_texts = []
# Iterate over each token
for i, _ in enumerate(tokens):
# Append the current cumulative text to the list
cumulative_texts.append(tokenizer.convert_tokens_to_string(tokens[: i + 1]))

return cumulative_texts

substrings = tokenify_string(args.text, tokenizer)
# create dataset from list of strings substrings with huggingface
dataset = [{"text": substring} for substring in substrings]
dataset = Dataset.from_list(dataset)

############################
# Run inference [1/2]" built in transformers
############################
# if using HF pipeline, can pass entire dataset and get results
# first, handle custom pipelines that we must batch normally
if not pipeline_builder == pipeline:
logger.info("*** Running forward pass via built in pipeline abstraction ***")
# this setup can be optimized slightly with one pipeline call
# prepare for inference
reward_pipe = accelerator.prepare(reward_pipe)

rewards = reward_pipe(dataset["text"], **reward_pipeline_kwargs)

############################
# Run inference [2/2] custom pipelines
############################
else:
logger.info("*** Running dataloader to collect results ***")

dataloader = torch.utils.data.DataLoader(
dataset,
batch_size=BATCH_SIZE,
collate_fn=None,
shuffle=False,
drop_last=False,
)

dataloader, model = accelerator.prepare(dataloader, reward_pipe.model)
reward_pipe.model = model

results = []
for step, batch in enumerate(tqdm(dataloader, desc="RM batch steps")):
logger.info(f"RM inference step {step}/{len(dataloader)}")
rewards = reward_pipe(batch["text"], **reward_pipeline_kwargs)

# for each item in batch, record 1 if chosen > rejected
# extra score from dict within batched results (e.g. logits)
# [{'label': 'LABEL_1', 'score': 0.6826171875},... ]
if isinstance(rewards[0], dict):
scores = [result["score"] for result in rewards]
# for classes that directly output scores (custom code)
else:
scores = rewards.cpu().numpy().tolist()

results.extend(scores)

# print the results
for i, substring in enumerate(substrings):
print(f"Reward: {round(results[i], 3)} | Substring: {substring}")

BATCH_SIZE = args.batch_size
logger.info("*** Load reward model ***")
reward_pipeline_kwargs = {
"batch_size": BATCH_SIZE, # eval_args.inference_batch_size,
"truncation": True,
"padding": True,
"max_length": 2048,
"function_to_apply": "none", # Compute raw logits
"return_token_type_ids": False,
}
if quantized:
model_kwargs = {
"load_in_8bit": True,
"device_map": {"": current_device},
"torch_dtype": torch.float16 if torch.cuda.is_available() else None,
}
else:
model_kwargs = {"device_map": {"": current_device}}
# TODO remove direct load logic
# if pipeline_builder is pipeline, use built in pipeline, else custom
if not pipeline == pipeline_builder:
model = model_builder(model, **model_kwargs)
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
reward_pipe = pipeline_builder(
"text-classification",
model=model,
tokenizer=tokenizer,
)
else:
reward_pipe = pipeline(
"text-classification",
model=model,
tokenizer=tokenizer,
revision="main",
model_kwargs=model_kwargs,
)

############################
# Tokenization settings & dataset preparation
############################
# set pad token to eos token if not set
if reward_pipe.tokenizer.pad_token_id is None:
reward_pipe.model.config.pad_token_id = reward_pipe.tokenizer.eos_token_id
reward_pipe.tokenizer.pad_token_id = reward_pipe.tokenizer.eos_token_id


substrings = tokenify_string(args.text, tokenizer)
# create dataset from list of strings substrings with huggingface
dataset = [{"text": substring} for substring in substrings]
dataset = Dataset.from_list(dataset)

############################
# Run inference [1/2]" built in transformers
############################
# if using HF pipeline, can pass entire dataset and get results
# first, handle custom pipelines that we must batch normally
if not pipeline_builder == pipeline:
logger.info("*** Running forward pass via built in pipeline abstraction ***")
# this setup can be optimized slightly with one pipeline call
# prepare for inference
reward_pipe = accelerator.prepare(reward_pipe)

rewards = reward_pipe(dataset["text"], **reward_pipeline_kwargs)

############################
# Run inference [2/2] custom pipelines
############################
else:
logger.info("*** Running dataloader to collect results ***")

dataloader = torch.utils.data.DataLoader(
dataset,
batch_size=BATCH_SIZE,
collate_fn=None,
shuffle=False,
drop_last=False,
)

dataloader, model = accelerator.prepare(dataloader, reward_pipe.model)
reward_pipe.model = model

results = list()
for step, batch in enumerate(tqdm(dataloader, desc="RM batch steps")):
logger.info(f"RM inference step {step}/{len(dataloader)}")
rewards = reward_pipe(batch["text"], **reward_pipeline_kwargs)

# for each item in batch, record 1 if chosen > rejected
# extra score from dict within batched results (e.g. logits)
# [{'label': 'LABEL_1', 'score': 0.6826171875},... ]
if isinstance(rewards[0], dict):
scores = [result["score"] for result in rewards]
# for classes that directly output scores (custom code)
else:
scores = rewards.cpu().numpy().tolist()

results.extend(scores)

# print the results
tokens_per_model, rewards_per_model = list(), list()
for i, substring in enumerate(substrings):
reward_so_far = round(results[i], 3)
print(f"Reward: {reward_so_far} | Substring: {substring}")
tokens_per_model.append( substring)
rewards_per_model.append(reward_so_far)
tokens_list_all.append(tokens_per_model)
rewards_list_all.append(rewards_per_model)
visualize_rewards(models, tokens_list_all, rewards_list_all)


if __name__ == "__main__":
Expand Down

0 comments on commit 9b41f4e

Please sign in to comment.