use adafactor with accelerate

ServiceNow · Nov 5, 2024 · 4e79c5f · 4e79c5f
1 parent 5d87a2a
commit 4e79c5f
Show file tree

Hide file tree

Showing 5 changed files with 19 additions and 30 deletions.
diff --git a/conf/finetune/rl_llama31_8b.yaml b/conf/finetune/rl_llama31_8b.yaml
@@ -17,14 +17,14 @@ wandb_resume: always
 # Whether to use only the basename or the full path as the run name
 wandb_use_basename: false
 config_name: meta-llama/Meta-Llama-3.1-8B-Instruct
-learning_rate: 0.000005
-train_batch_size: 1
-gradient_accumulation_passes: 1024
+learning_rate: 0.0000025
+train_batch_size: 2
+gradient_accumulation_passes: 512
 seq_length: 4096
 load_as_bf16: True
 max_train_steps: 100000
 save_checkpoint_steps: ???
-optim: adamw_torch
+optim: adafactor
 objective: rl
 log_each_n_steps: 1
 resume_dataloader: false

diff --git a/conf/rl_debug.yaml b/conf/rl_debug.yaml
@@ -1,5 +1,7 @@
 defaults:
   - rl_gsm8k
+  - _self_
+
 max_agent_forks: 16
 attempts: 1
 test_every_n_iterations: -1

diff --git a/conf/rl_gsm8k.yaml b/conf/rl_gsm8k.yaml
@@ -11,7 +11,7 @@ attempts: 64
 force_restart: false
 max_iterations: 100
 discount: 0.99
-implicit_kl: false
+implicit_kl: 0.0
 max_steps: 100
 llm:
   parameters:
@@ -39,8 +39,7 @@ vllm_config:
     --enable-chunked-prefill: ""
 
 output_dir: outputs/rl_gsm8k
-accelerate_cfg_path: conf/deepspeed/accelerate_local.yaml
-deepspeed_cfg_path: conf/deepspeed/deepspeed_stage3_bf16.json
+accelerate_cfg_path: conf/deepspeed/accelerate_base.yaml
 
 hydra:
   run:

diff --git a/examples/rl_gsm8k/orchestrate_rl.py b/examples/rl_gsm8k/orchestrate_rl.py
@@ -318,7 +318,9 @@ def main(cfg: DictConfig):
 
             datasets = [("train", train_agent, train_tapes)]
             if state["iteration"] % cfg.test_every_n_iterations == 0 and cfg.test_every_n_iterations > 0:
-                datasets.append(("test", test_agent, test_tapes))
+                #datasets.append(("test", test_agent, test_tapes))
+                #TODO: for debugging purposes, remove before merging
+                datasets.append(("test", test_agent, train_tapes[:1000]))
             all_results = {}
             with VLLMServiceManager(
                 model_name_or_path=assistant_model_path,
@@ -415,7 +417,7 @@ def main(cfg: DictConfig):
             {
                 "execution_time/populating_ref_logprobs": time_populating_ref_logprobs,
                 "execution_time/starting_assistantmodel_vllm": assistant_vllm_stats["starting_time"],
-                "execution_time/starting_refmodel_vllm": assistant_vllm_stats["starting_time"],
+                "execution_time/starting_refmodel_vllm": refmodel_vllm_stats["starting_time"],
             },
             step=state["iteration"],
         )
@@ -439,7 +441,7 @@ def main(cfg: DictConfig):
         OmegaConf.save(finetune_cfg, config_path)
 
         start_finetune = time.time()
-        launch_training(str(conf_dir), str(state["iteration"]), cfg.accelerate_cfg_path, cfg.deepspeed_cfg_path)
+        launch_training(str(conf_dir), str(state["iteration"]), cfg.accelerate_cfg_path)
         time_finetune = time.time() - start_finetune
         time_iteration = time.time() - start_iteration
         wandb.log(

diff --git a/examples/rl_gsm8k/utils.py b/examples/rl_gsm8k/utils.py
@@ -272,7 +272,7 @@ def calculate_stats(stats):
     }
 
 
-def launch_training(config_dir: str, config_name: str, accelerate_cfg_path: str, deepspeed_cfg_path: str):
+def launch_training(config_dir: str, config_name: str, accelerate_cfg_path: str):
     """
     Launch training process with proper GPU configuration and error handling.
 
@@ -307,27 +307,13 @@ def launch_training(config_dir: str, config_name: str, accelerate_cfg_path: str,
     ]
 
     if num_gpus > 1:
-        #TODO: better handling of multi-gpu training: accelerate or deepspeed
-        if False:
-            base_cmd[2:2] = [
-                "--use_deepspeed",
-                "--num_processes",
-                str(num_gpus),
-                "--deepspeed_config_file",
-                deepspeed_cfg_path,
-            ]
-        else:
-            base_cmd[2:2] = [
-                #"--use_deepspeed",
-                "--multi_gpu",
-                "--num_processes",
-                str(num_gpus),
-                #"--deepspeed_config_file",
-                #deepspeed_cfg_path,
-            ]
+        base_cmd[2:2] = [
+            "--multi_gpu",
+            "--num_processes",
+            str(num_gpus),
+        ]
 
     logger.info(f"Launching training with command: {' '.join(base_cmd)}")
-    print(f"Launching training with command: {' '.join(base_cmd)}")
     try:
         result = subprocess.run(
             base_cmd,