diff --git a/README.md b/README.md
index 93497c7..6e43e25 100644
--- a/README.md
+++ b/README.md
@@ -30,7 +30,6 @@ The repository includes the following:
 The two primary scripts to generate results (more in `scripts/`):
 1. `scripts/run_rm.py`: Run evaluations for reward models.
 2. `scripts/run_dpo.py`: Run evaluations for direct preference optimization (DPO) models (and other models using implicit rewards, such as KTO).
-3. `scripts/train_rm.py`: A basic RM training script built on [TRL](https://github.com/huggingface/trl).
 
 ## Quick Usage
 RewardBench let's you quickly evaluate any reward model on any preference set. 
@@ -81,6 +80,10 @@ Add the following to your `.bashrc`:
 export HF_TOKEN="{your_token}"
 ```
 
+## Training
+
+For training, we recommend using [`open-instruct`](https://github.com/allenai/open-instruct).
+
 ## Contribute Your Model
 
 For now, in order to contribute your model to the leaderboard, open an issue with the model name on HuggingFace (you can still evaluate local models with RewardBench, see below).
@@ -208,7 +211,7 @@ print(scores_per_section)
 ├── rewardbench/                <- Core utils and modeling files
 |   ├── models/                     ├── Standalone files for running existing reward models
 |   └── *.py                        └── RewardBench tools and utilities
-├── scripts/                    <- Scripts and configs to train and evaluate reward models
+├── scripts/                    <- Scripts and configs to evaluate reward models
 ├── tests                       <- Unit tests
 ├── Dockerfile                  <- Build file for reproducible and scaleable research at AI2
 ├── LICENSE
diff --git a/rewardbench.pdf b/rewardbench.pdf
deleted file mode 100644
index 0d6d241..0000000
Binary files a/rewardbench.pdf and /dev/null differ
diff --git a/scripts/configs/README.md b/scripts/configs/README.md
index 67780ea..6e987de 100644
--- a/scripts/configs/README.md
+++ b/scripts/configs/README.md
@@ -3,4 +3,3 @@
 The following configs are supported:
 1. `beaker_eval.yaml`: Config for internal AI tooling to correctly setup compute environment.
 2. `eval_configs.yaml`: Configs for models to reproduce results on `run_rm.py`/`run_dpo.py`.
-3. [in progress] `training_configs.yaml`: Configs for training reward models.
\ No newline at end of file
diff --git a/scripts/configs/beaker_train.yaml b/scripts/configs/beaker_train.yaml
deleted file mode 100644
index d2bdb42..0000000
--- a/scripts/configs/beaker_train.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-version: v2
-description: herm-train
-budget: ai2/allennlp
-tasks:
-  - name: herm-train
-    image:
-      beaker: <placeholder>
-    command: [
-      '/bin/sh', '-c'
-    ]
-    arguments: ['SCRIPT_HERE']
-    envVars:
-      - name: CUDA_DEVICE_ORDER
-        value: PCI_BUS_ID
-      - name: TRANSFORMERS_CACHE
-        value: ./cache/
-      - name: WANDB_PROJECT
-        value: open-instruct
-      - name: WANDB_WATCH
-        value: false
-      - name: WANDB_LOG_MODEL
-        value: false
-      - name: WANDB_DISABLED
-        value: true
-    datasets:
-      - mountPath: /net/nfs.cirrascale
-        source:
-          hostPath: /net/nfs.cirrascale
-    result:
-      path: /output
-    resources:
-      gpuCount: 4
-    context:
-      cluster: ai2/allennlp-cirrascale
-      priority: high
\ No newline at end of file
diff --git a/scripts/configs/stage3_no_offloading.conf b/scripts/configs/stage3_no_offloading.conf
deleted file mode 100644
index 532669b..0000000
--- a/scripts/configs/stage3_no_offloading.conf
+++ /dev/null
@@ -1,41 +0,0 @@
-{
-    "bf16": {
-        "enabled": "auto"
-    },
-    "optimizer": {
-        "type": "AdamW",
-        "params": {
-            "lr": "auto",
-            "betas": "auto",
-            "eps": "auto",
-            "weight_decay": "auto"
-        }
-    },
-    "scheduler": {
-       "type": "WarmupDecayLR",
-       "params": {
-         "total_num_steps": "auto",
-         "warmup_min_lr": "auto",
-         "warmup_max_lr": "auto",
-         "warmup_num_steps": "auto"
-        }
-    },
-    "zero_optimization": {
-        "stage": 3,
-        "overlap_comm": true,
-        "contiguous_gradients": true,
-        "sub_group_size": 1e9,
-        "reduce_bucket_size": "auto",
-        "stage3_prefetch_bucket_size": "auto",
-        "stage3_param_persistence_threshold": "auto",
-        "stage3_max_live_parameters": 1e9,
-        "stage3_max_reuse_distance": 1e9,
-        "stage3_gather_16bit_weights_on_model_save": true
-    },
-    "gradient_accumulation_steps": "auto",
-    "gradient_clipping": "auto",
-    "steps_per_print": 1e5,
-    "train_batch_size": "auto",
-    "train_micro_batch_size_per_gpu": "auto",
-    "wall_clock_breakdown": false
-}
\ No newline at end of file
diff --git a/scripts/configs/train_configs.yaml b/scripts/configs/train_configs.yaml
deleted file mode 100644
index 997e521..0000000
--- a/scripts/configs/train_configs.yaml
+++ /dev/null
@@ -1,31 +0,0 @@
-# This file contains default training parameters assuming access to A100-80GBs
-allenai/tulu-2-7b:
-  model: 'allenai/tulu-2-7b'
-  tokenizer: 'allenai/tulu-2-7b'
-  chat_template: 'tulu'
-  num_gpus: 4
-  total_batch_size: 128
-  batch_size_per_gpu: 2
-  max_seq_len: 1024
-  use_flash_attn: True
-  bf16: True
-meta-llama/Llama-2-7b-chat-hf:
-  model: 'meta-llama/Llama-2-7b-chat-hf'
-  tokenizer: 'meta-llama/Llama-2-7b-chat-hf'
-  chat_template: 'llama-2'
-  num_gpus: 4
-  total_batch_size: 128
-  batch_size_per_gpu: 2
-  max_seq_len: 1024
-  use_flash_attn: True
-  bf16: True
-TinyLlama/TinyLlama-1.1B-Chat-v1.0:
-  model: 'TinyLlama/TinyLlama-1.1B-Chat-v1.0'
-  tokenizer: 'TinyLlama/TinyLlama-1.1B-Chat-v1.0'
-  chat_template: 'llama-2'
-  num_gpus: 2
-  total_batch_size: 128
-  batch_size_per_gpu: 16
-  max_seq_len: 1024
-  use_flash_attn: True
-  bf16: True
\ No newline at end of file
diff --git a/scripts/submit_train_jobs.py b/scripts/submit_train_jobs.py
deleted file mode 100644
index c5dcc3f..0000000
--- a/scripts/submit_train_jobs.py
+++ /dev/null
@@ -1,100 +0,0 @@
-# Copyright 2023 AllenAI. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import subprocess
-from datetime import date
-
-import yaml
-
-argparser = argparse.ArgumentParser()
-argparser.add_argument("--image", type=str, default="jacobm/rb_train", help="Beaker image to use")
-argparser.add_argument("--cluster", type=str, default="ai2/allennlp-cirrascale", help="Beaker cluster to use")
-argparser.add_argument("--model", type=str, default=None, help="Specific model to train on top of")
-argparser.add_argument("--dataset", type=str, default=None, help="Specific dataset file path for training")
-argparser.add_argument("--lr", type=str, default="1e-5", help="Learning rate for training")
-argparser.add_argument("--num_epochs", type=str, default="1", help="Number of training epochs")
-argparser.add_argument("--seed", type=int, default=123409876, help="Seed for training")
-args = argparser.parse_args()
-
-
-today = date.today().strftime("%m%d%Y")
-
-with open("scripts/configs/beaker_train.yaml", "r") as f:
-    default_yaml = f.read()
-d = yaml.load(default_yaml, Loader=yaml.FullLoader)
-
-with open("scripts/configs/train_configs.yaml", "r") as f:
-    configs = yaml.load(f.read(), Loader=yaml.FullLoader)
-model_config = configs[args.model]
-
-# name and description
-model_stem = args.model.replace("/", "-")
-if ".jsonl" in args.dataset:
-    dataset_stem = args.dataset.split("/")[-1].replace(".jsonl", "")
-else:
-    dataset_stem = args.dataset
-exp_name = f"herm_train-rm_{model_stem}_{dataset_stem}"
-
-d["description"] = exp_name
-d["tasks"][0]["context"]["cluster"] = args.cluster
-d["tasks"][0]["context"]["priority"] = "high"
-d["tasks"][0]["name"] = exp_name
-d["tasks"][0]["image"]["beaker"] = args.image
-d["tasks"][0]["resources"]["gpuCount"] = model_config["num_gpus"]
-
-GRADIENT_ACC_STEPS = int(
-    model_config["total_batch_size"] / model_config["num_gpus"] / model_config["batch_size_per_gpu"]
-)
-
-optional_configs = ""
-if model_config["bf16"]:
-    optional_configs += " --bf16"
-if model_config["use_flash_attn"]:
-    optional_configs += " --use_flash_attn"
-
-d["tasks"][0]["arguments"][0] = (
-    f"deepspeed --include localhost:{','.join(str(n) for n in range(model_config['num_gpus']))} "
-    " scripts/train_rm_trainer.py"
-    " --deepspeed scripts/configs/stage3_no_offloading.conf"
-    f" --model_name_or_path {args.model}"
-    f" --tokenizer {model_config['tokenizer']}"
-    f" --dataset_name {args.dataset}"
-    f" --max_seq_length {model_config['max_seq_len']}"
-    " --preprocessing_num_workers 64"
-    f" --do_train {optional_configs}"
-    f" --per_device_train_batch_size {model_config['batch_size_per_gpu']}"
-    f" --gradient_accumulation_steps {GRADIENT_ACC_STEPS}"
-    f" --learning_rate {args.lr}"
-    " --lr_scheduler_type linear"
-    " --warmup_ratio 0.03"
-    " --weight_decay 0."
-    " --evaluation_strategy no"
-    " --logging_steps 1"
-    " --save_strategy epoch"
-    f" --seed {args.seed}"
-    f" --num_train_epochs {args.num_epochs}"
-    f" --output_dir /output"
-    " --use_slow_tokenizer"
-    " --overwrite_output_dir"
-    " --output_dir /output"
-)
-
-fn = "beaker_configs/auto_created/{}.yaml".format(exp_name)
-file = open(fn, "w")
-yaml.dump(d, file, default_flow_style=True)
-file.close()
-
-cmd = "beaker experiment create {} --workspace ai2/herm".format(fn)
-subprocess.Popen(cmd, shell=True)
diff --git a/scripts/train_rm.py b/scripts/train_rm.py
deleted file mode 100644
index 787192b..0000000
--- a/scripts/train_rm.py
+++ /dev/null
@@ -1,438 +0,0 @@
-# Copyright 2023 AllenAI. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# !/usr/bin/env python
-# coding=utf-8
-"""
-This file is modified from the huggingface example for finetuning language models
-[run_clm.py](https://github.com/huggingface/transformers/blob/main/examples/pytorch/language-modeling/run_clm.py)
-"""
-
-import logging
-import os
-import sys
-import warnings
-from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional
-
-import datasets
-import torch
-import transformers
-from datasets import load_dataset
-from fastchat.conversation import Conversation, get_conv_template
-from transformers import (
-    AutoConfig,
-    AutoModelForSequenceClassification,
-    AutoTokenizer,
-    HfArgumentParser,
-    LlamaTokenizer,
-    LlamaTokenizerFast,
-    TrainingArguments,
-    set_seed,
-)
-from transformers.trainer_utils import get_last_checkpoint
-from trl import RewardTrainer
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class ModelArguments:
-    """
-    Arguments pertaining to which model/config/tokenizer we are going to fine-tune, or train from scratch.
-    """
-
-    model_name_or_path: Optional[str] = field(
-        default=None,
-        metadata={
-            "help": (
-                "The model checkpoint for weights initialization. Don't set if you want to train a model from scratch."
-            )
-        },
-    )
-    config_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
-    )
-    tokenizer_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
-    )
-    use_flash_attn: bool = field(
-        default=False,
-        metadata={"help": "Whether to use flash attention in the model training"},
-    )
-    cache_dir: Optional[str] = field(
-        default=None,
-        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
-    )
-    model_revision: str = field(
-        default="main",
-        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
-    )
-    token: str = field(
-        default=None,
-        metadata={
-            "help": (
-                "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
-                "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
-            )
-        },
-    )
-    use_auth_token: bool = field(
-        default=None,
-        metadata={
-            "help": "The `use_auth_token` argument is deprecated and will be removed in Transformers v4.34."
-            " Please use `token`."
-        },
-    )
-    trust_remote_code: bool = field(
-        default=False,
-        metadata={
-            "help": (
-                "Whether or not to allow for custom models defined on the Hub in their own modeling files."
-                " This option should only be set to `True` for repositories you trust and in which you have"
-                " read the code, as it will execute code present on the Hub on your local machine."
-            )
-        },
-    )
-    torch_dtype: Optional[str] = field(
-        default="auto",
-        metadata={
-            "help": (
-                "Override the default `torch.dtype` and load the model under this dtype. If `auto` is passed, the "
-                "dtype will be automatically derived from the model's weights."
-            ),
-            "choices": ["auto", "bfloat16", "float16", "float32"],
-        },
-    )
-    low_cpu_mem_usage: bool = field(
-        default=False,
-        metadata={
-            "help": (
-                "It is an option to create the model as an empty shell, then only materialize its"
-                + " parameters when the pretrained weights are loaded. set True will benefit LLM"
-                + " loading time and RAM consumption."
-            )
-        },
-    )
-    use_slow_tokenizer: bool = field(
-        default=False,
-        metadata={"help": ("use slow tokenizer or not.")},
-    )
-
-
-@dataclass
-class DataTrainingArguments:
-    """
-    Arguments pertaining to what data we are going to input our model for training and eval.
-    """
-
-    dataset_name: Optional[str] = field(
-        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
-    )
-    dataset_config_name: Optional[str] = field(
-        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
-    )
-    train_file: Optional[str] = field(
-        default=None, metadata={"help": "The input training data file (a json/jsonl file)."}
-    )
-    max_train_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": (
-                "For debugging purposes or quicker training, truncate the number of training examples to this "
-                "value if set."
-            )
-        },
-    )
-    streaming: bool = field(default=False, metadata={"help": "Enable streaming mode"})
-    overwrite_cache: bool = field(
-        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
-    )
-    preprocessing_num_workers: Optional[int] = field(
-        default=None,
-        metadata={"help": "The number of processes to use for the preprocessing."},
-    )
-    max_seq_length: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": (
-                "The maximum total input sequence length after tokenization."
-                + " Sequences longer than this will be truncated"
-            )
-        },
-    )
-    chat_template: Optional[str] = field(
-        default="tulu", metadata={"help": ("The chat template to apply to chosen/rejected pairs. Default is Tulu.")}
-    )
-
-    def __post_init__(self):
-        if self.dataset_name is None and self.train_file is None:
-            raise ValueError("Need either a dataset name or a training file.")
-        else:
-            if self.train_file is not None:
-                extension = self.train_file.split(".")[-1]
-                assert extension in ["json", "jsonl"], "`train_file` should be a json or a jsonl file."
-
-
-def main():
-    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
-    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
-        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
-    else:
-        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
-
-    if model_args.use_auth_token is not None:
-        warnings.warn(
-            "The `use_auth_token` argument is deprecated and will be removed in Transformers v4.34.", FutureWarning
-        )
-        if model_args.token is not None:
-            raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
-        model_args.token = model_args.use_auth_token
-
-    # Setup logging
-    logging.basicConfig(
-        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
-        datefmt="%m/%d/%Y %H:%M:%S",
-        handlers=[logging.StreamHandler(sys.stdout)],
-    )
-
-    if training_args.should_log:
-        # The default of training_args.log_level is passive, so we set log level at info here to have that default.
-        transformers.utils.logging.set_verbosity_info()
-
-    log_level = training_args.get_process_log_level()
-    logger.setLevel(log_level)
-    datasets.utils.logging.set_verbosity(log_level)
-    transformers.utils.logging.set_verbosity(log_level)
-    transformers.utils.logging.enable_default_handler()
-    transformers.utils.logging.enable_explicit_format()
-
-    # Log on each process the small summary:
-    logger.warning(
-        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
-        + f"distributed training: {training_args.parallel_mode.value == 'distributed'},"
-        + f" 16-bits training: {training_args.fp16}"
-    )
-    logger.info(f"Training parameters {training_args}")
-
-    # Detecting last checkpoint.
-    last_checkpoint = None
-    if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
-        last_checkpoint = get_last_checkpoint(training_args.output_dir)
-        if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
-            raise ValueError(
-                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
-                "Use --overwrite_output_dir to overcome."
-            )
-        elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
-            logger.info(
-                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
-                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
-            )
-
-    # Set seed before initializing model.
-    if training_args.seed is None:
-        training_args.seed = 123409876
-    set_seed(training_args.seed)
-
-    if data_args.dataset_name is None:
-        raise ValueError("Must provide a valid dataset name")
-    elif data_args.dataset_name[-6:] == ".jsonl":
-        # load dataset file
-        train_dataset = load_dataset("json", data_files=data_args.dataset_name)["train"]
-    else:
-        train_dataset = load_dataset(data_args.dataset_name)["train"]
-
-    config_kwargs = {
-        "cache_dir": model_args.cache_dir,
-        "revision": model_args.model_revision,
-        "token": model_args.token,
-        "trust_remote_code": model_args.trust_remote_code,
-    }
-    if model_args.config_name:
-        config = AutoConfig.from_pretrained(model_args.config_name, **config_kwargs)
-    elif model_args.model_name_or_path:
-        config = AutoConfig.from_pretrained(model_args.model_name_or_path, **config_kwargs)
-    else:
-        raise ValueError("You are instantiating a new config instance from scratch. This is not supported.")
-
-    config.num_labels = 1
-
-    tokenizer_kwargs = {
-        "cache_dir": model_args.cache_dir,
-        "revision": model_args.model_revision,
-        "token": model_args.token,
-        "trust_remote_code": model_args.trust_remote_code,
-        "use_fast": not model_args.use_slow_tokenizer,
-    }
-    if model_args.tokenizer_name:
-        tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs)
-    elif model_args.model_name_or_path:
-        tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, **tokenizer_kwargs)
-    else:
-        raise ValueError(
-            "You are instantiating a new tokenizer from scratch. This is not supported by this finetuning script."
-        )
-
-    if model_args.model_name_or_path:
-        torch_dtype = (
-            model_args.torch_dtype
-            if model_args.torch_dtype in ["auto", None]
-            else getattr(torch, model_args.torch_dtype)
-        )
-        model = AutoModelForSequenceClassification.from_pretrained(
-            model_args.model_name_or_path,
-            from_tf=bool(".ckpt" in model_args.model_name_or_path),
-            config=config,
-            cache_dir=model_args.cache_dir,
-            revision=model_args.model_revision,
-            token=model_args.token,
-            trust_remote_code=model_args.trust_remote_code,
-            torch_dtype=torch_dtype,
-            low_cpu_mem_usage=model_args.low_cpu_mem_usage,
-            use_flash_attention_2=True if model_args.use_flash_attn else False,
-        )
-    else:
-        raise ValueError(
-            "You are instantiating a new model from scratch. This is not supported by this finetuning script."
-        )
-
-    if "gpt2" in model_args.model_name_or_path:
-        print("Adding padding token for GPT2 models")
-        tokenizer.add_special_tokens({"pad_token": tokenizer.eos_token})
-        config.pad_token_id = config.eos_token_id
-
-    # no default pad token for llama!
-    # here we add all special tokens again, because the default ones are not in the special_tokens_map
-    if (
-        isinstance(tokenizer, LlamaTokenizer)
-        or isinstance(tokenizer, LlamaTokenizerFast)
-        or "llama" in model_args.model_name_or_path.lower()
-        or "tulu" in model_args.model_name_or_path.lower()
-    ):
-        print("Adding pad token for Llama/Tulu models")
-        num_added_tokens = tokenizer.add_special_tokens(
-            {
-                "bos_token": "<s>",
-                "eos_token": "</s>",
-                "unk_token": "<unk>",
-                "pad_token": "<pad>",
-            }
-        )
-        config.pad_token_id = 32000
-        model.config.pad_token_id = 32000
-        assert num_added_tokens in [
-            0,
-            1,
-        ], "LlamaTokenizer should only add one special token - the pad_token, or no tokens if pad token present."
-
-    print(f"model config: {config}")
-
-    # resize embeddings if needed (e.g. for LlamaTokenizer)
-    embedding_size = model.get_input_embeddings().weight.shape[0]
-    if len(tokenizer) > embedding_size:
-        model.resize_token_embeddings(len(tokenizer))
-
-    original_columns = train_dataset.column_names
-
-    def preprocess_preference_pairs(example):
-        chosen = example["chosen"]
-        rejected = example["rejected"]
-        tokenized_chosen = tokenizer(
-            chosen,
-            max_length=data_args.max_seq_length,
-            truncation=True,
-        )
-        tokenized_rejected = tokenizer(
-            rejected,
-            max_length=data_args.max_seq_length,
-            truncation=True,
-        )
-        return {
-            "input_ids_chosen": tokenized_chosen["input_ids"],
-            "attention_mask_chosen": tokenized_chosen["attention_mask"],
-            "input_ids_rejected": tokenized_rejected["input_ids"],
-            "attention_mask_rejected": tokenized_rejected["attention_mask"],
-        }
-
-    def prepare_examples(
-        example: Dict[str, List[Any]],
-        dialogue_template: Conversation,
-    ):
-        processed = {}
-        for key in ["chosen", "rejected"]:
-            dialogue_template.messages = []
-            for elem in example[key]:
-                content = elem["content"]
-                role = elem["role"]
-                dialogue_template.messages.append([role, content])
-            processed[key] = dialogue_template.get_prompt()
-
-        return processed
-
-    train_dataset = train_dataset.map(
-        prepare_examples,
-        fn_kwargs={"dialogue_template": get_conv_template(data_args.chat_template)},
-        num_proc=data_args.preprocessing_num_workers,
-        load_from_cache_file=False,
-    )
-
-    train_dataset = train_dataset.filter(
-        lambda x: x["chosen"] != x["rejected"],
-        num_proc=data_args.preprocessing_num_workers,
-    )
-    train_dataset = train_dataset.map(
-        preprocess_preference_pairs,
-        num_proc=data_args.preprocessing_num_workers,
-        remove_columns=original_columns,
-    )
-    train_dataset = train_dataset.filter(
-        lambda x: len(x["input_ids_chosen"]) <= data_args.max_seq_length
-        and len(x["input_ids_rejected"]) <= data_args.max_seq_length,
-        num_proc=data_args.preprocessing_num_workers,
-    )
-
-    # initialize a trainer
-    trainer = RewardTrainer(
-        model=model,
-        args=training_args,
-        train_dataset=train_dataset if training_args.do_train else None,
-        tokenizer=tokenizer,
-    )
-
-    # Training
-    if training_args.do_train:
-        checkpoint = None
-        if training_args.resume_from_checkpoint is not None:
-            checkpoint = training_args.resume_from_checkpoint
-        elif last_checkpoint is not None:
-            checkpoint = last_checkpoint
-        print(f"resume from checkpoint: {checkpoint}")
-        train_result = trainer.train(resume_from_checkpoint=checkpoint)
-        trainer.save_model()  # Saves the tokenizer too for easy upload
-
-        metrics = train_result.metrics
-
-        max_train_samples = (
-            data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset)
-        )
-        metrics["train_samples"] = min(max_train_samples, len(train_dataset))
-
-        trainer.log_metrics("train", metrics)
-        trainer.save_metrics("train", metrics)
-        trainer.save_state()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/setup.py b/setup.py
index 0201c29..a6097b1 100644
--- a/setup.py
+++ b/setup.py
@@ -60,7 +60,7 @@
         "torch",
         "tiktoken==0.6.0",  # added for llama 3
         "transformers==4.43.4",  # pinned at llama 3
-        "trl>=0.8.2",  # fixed transformers import error
+        "trl>=0.8.2",  # fixed transformers import error, for DPO
     ],
     extras_require={
         "generative": [
@@ -69,8 +69,5 @@
             "anthropic",
             "together",
         ],
-        "training": [
-            "wandb",
-        ],
     },
 )