diff --git a/docs/examples/config.rst b/docs/examples/config.rst index 07e1dce..d7d8fa4 100644 --- a/docs/examples/config.rst +++ b/docs/examples/config.rst @@ -307,7 +307,7 @@ Trainer total_epochs: 30 project_name: verl_examples experiment_name: gsm8k - logger: ['console', 'tracking'] + logger: ['console', 'wandb'] nnodes: 1 n_gpus_per_node: 8 save_freq: -1 @@ -319,8 +319,7 @@ Trainer - ``trainer.total_epochs``: Number of epochs in training. - ``trainer.project_name``: For wandb - ``trainer.experiment_name``: For wandb -- ``trainer.logger``: Support console and tracking. For tracking, we - will initialize a wandb +- ``trainer.logger``: Support console and wandb - ``trainer.nnodes``: Number of nodes used in the training. - ``trainer.n_gpus_per_node``: Number of GPUs per node. - ``trainer.save_freq``: The frequency (by iteration) to save checkpoint diff --git a/docs/examples/gsm8k_example.rst b/docs/examples/gsm8k_example.rst index 90b61e0..0d3c1f8 100644 --- a/docs/examples/gsm8k_example.rst +++ b/docs/examples/gsm8k_example.rst @@ -91,7 +91,7 @@ We also provide various training scripts for SFT on GSM8K dataset in `gsm8k sft trainer.project_name=gsm8k-sft \ trainer.experiment_name=gsm8k-sft-deepseek-coder-6.7b-instruct \ trainer.total_epochs=4 \ - trainer.logger=['console','tracking'] + trainer.logger=['console','wandb'] Step 4: Perform PPO training with your model on GSM8K Dataset ------------------------------------------------------------- @@ -156,7 +156,7 @@ The script of run_deepseek7b_llm.sh critic.model.fsdp_config.optimizer_offload=False \ algorithm.kl_ctrl.kl_coef=0.001 \ trainer.critic_warmup=0 \ - trainer.logger=['console','tracking'] \ + trainer.logger=['console','wandb'] \ trainer.project_name='verl_example_gsm8k' \ trainer.experiment_name='deepseek_llm_7b_function_rm' \ trainer.n_gpus_per_node=8 \ diff --git a/docs/start/quickstart.rst b/docs/start/quickstart.rst index 2ac6845..69888f3 100644 --- a/docs/start/quickstart.rst +++ b/docs/start/quickstart.rst @@ -97,7 +97,7 @@ We also provide various training scripts for SFT on GSM8K dataset in `gsm8k sft trainer.project_name=gsm8k-sft \ trainer.experiment_name=gsm8k-sft-deepseek-coder-6.7b-instruct \ trainer.total_epochs=4 \ - trainer.logger=['console','tracking'] + trainer.logger=['console','wandb'] Step 4: Perform PPO training with your model on GSM8K Dataset ------------------------------------------------------------- @@ -163,7 +163,7 @@ The script of `run_deepseek7b_llm.sh` critic.model.fsdp_config.optimizer_offload=False \ algorithm.kl_ctrl.kl_coef=0.001 \ trainer.critic_warmup=0 \ - trainer.logger=['console','tracking'] \ + trainer.logger=['console','wandb'] \ trainer.project_name='verl_example_gsm8k' \ trainer.experiment_name='deepseek_llm_7b_function_rm' \ trainer.n_gpus_per_node=8 \ diff --git a/examples/ppo_trainer/run_deepseek7b_llm.sh b/examples/ppo_trainer/run_deepseek7b_llm.sh index 1df0da0..108fba1 100644 --- a/examples/ppo_trainer/run_deepseek7b_llm.sh +++ b/examples/ppo_trainer/run_deepseek7b_llm.sh @@ -29,7 +29,7 @@ python3 -m verl.trainer.main_ppo \ critic.model.fsdp_config.optimizer_offload=False \ algorithm.kl_ctrl.kl_coef=0.001 \ trainer.critic_warmup=0 \ - trainer.logger=['console','tracking'] \ + trainer.logger=['console','wandb'] \ trainer.project_name='verl_example_gsm8k' \ trainer.experiment_name='deepseek_llm_7b_function_rm' \ trainer.n_gpus_per_node=8 \ diff --git a/examples/ppo_trainer/run_deepseek_full_hh_rlhf.sh b/examples/ppo_trainer/run_deepseek_full_hh_rlhf.sh index f4e2587..bd2c0bc 100644 --- a/examples/ppo_trainer/run_deepseek_full_hh_rlhf.sh +++ b/examples/ppo_trainer/run_deepseek_full_hh_rlhf.sh @@ -31,7 +31,7 @@ python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megat reward_model.param_offload=False \ algorithm.kl_ctrl.kl_coef=0.001 \ trainer.critic_warmup=0 \ - trainer.logger=['console','tracking'] \ + trainer.logger=['console','wandb'] \ trainer.project_name='verl_megatron_full_hh_rlhf_examples' \ trainer.experiment_name='deepseek_llm_7b_model_rm' \ trainer.n_gpus_per_node=8 \ diff --git a/examples/ppo_trainer/run_deepseek_math_gsm8k_megatron.sh b/examples/ppo_trainer/run_deepseek_math_gsm8k_megatron.sh index ed113b2..c342d52 100644 --- a/examples/ppo_trainer/run_deepseek_math_gsm8k_megatron.sh +++ b/examples/ppo_trainer/run_deepseek_math_gsm8k_megatron.sh @@ -30,7 +30,7 @@ python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megat critic.ppo_micro_batch_size=32 \ algorithm.kl_ctrl.kl_coef=0.001 \ trainer.critic_warmup=0 \ - trainer.logger=['console','tracking'] \ + trainer.logger=['console','wandb'] \ trainer.project_name='verl_megatron_math_gsm8k_examples' \ trainer.experiment_name='deepseek_llm_7b_function_rm' \ trainer.n_gpus_per_node=8 \ diff --git a/examples/ppo_trainer/run_deepseek_megatron.sh b/examples/ppo_trainer/run_deepseek_megatron.sh index 2d1cab2..c63285a 100644 --- a/examples/ppo_trainer/run_deepseek_megatron.sh +++ b/examples/ppo_trainer/run_deepseek_megatron.sh @@ -22,7 +22,7 @@ python3 -m verl.trainer.main_ppo --config-path=./config --config-name='ppo_megat critic.ppo_micro_batch_size=64 \ algorithm.kl_ctrl.kl_coef=0.001 \ trainer.critic_warmup=0 \ - trainer.logger=['console','tracking'] \ + trainer.logger=['console','wandb'] \ trainer.project_name='verl_megatron_gsm8k_examples' \ trainer.experiment_name='deepseek_llm_7b_function_rm' \ trainer.n_gpus_per_node=8 \ diff --git a/examples/ppo_trainer/run_gemma.sh b/examples/ppo_trainer/run_gemma.sh index bcd5452..200ebdb 100644 --- a/examples/ppo_trainer/run_gemma.sh +++ b/examples/ppo_trainer/run_gemma.sh @@ -29,7 +29,7 @@ python3 -m verl.trainer.main_ppo \ critic.model.fsdp_config.optimizer_offload=False \ algorithm.kl_ctrl.kl_coef=0.001 \ trainer.critic_warmup=0 \ - trainer.logger=['console','tracking'] \ + trainer.logger=['console','wandb'] \ trainer.project_name='verl_example' \ trainer.experiment_name='gemma2b_function_rm' \ trainer.n_gpus_per_node=2 \ diff --git a/examples/ppo_trainer/run_qwen2-7b.sh b/examples/ppo_trainer/run_qwen2-7b.sh index 396eb63..c6ffc1b 100644 --- a/examples/ppo_trainer/run_qwen2-7b.sh +++ b/examples/ppo_trainer/run_qwen2-7b.sh @@ -37,7 +37,7 @@ python3 -m verl.trainer.main_ppo \ critic.model.fsdp_config.optimizer_offload=False \ algorithm.kl_ctrl.kl_coef=0.001 \ trainer.critic_warmup=0 \ - trainer.logger=['console','tracking'] \ + trainer.logger=['console','wandb'] \ trainer.project_name='verl_example' \ trainer.experiment_name='Qwen2-7B-Instruct_function_rm' \ trainer.n_gpus_per_node=8 \ diff --git a/examples/ppo_trainer/run_qwen2-7b_rm.sh b/examples/ppo_trainer/run_qwen2-7b_rm.sh index 2f77e87..3755b38 100644 --- a/examples/ppo_trainer/run_qwen2-7b_rm.sh +++ b/examples/ppo_trainer/run_qwen2-7b_rm.sh @@ -44,7 +44,7 @@ python3 -m verl.trainer.main_ppo \ reward_model.micro_batch_size=16 \ algorithm.kl_ctrl.kl_coef=0.001 \ trainer.critic_warmup=0 \ - trainer.logger=['console','tracking'] \ + trainer.logger=['console','wandb'] \ trainer.project_name='verl_example' \ trainer.experiment_name='Qwen2-7B-Instruct_hybrid_rm' \ trainer.n_gpus_per_node=8 \ diff --git a/examples/ppo_trainer/run_qwen2.5-32b.sh b/examples/ppo_trainer/run_qwen2.5-32b.sh index e7f93cc..1192f1e 100644 --- a/examples/ppo_trainer/run_qwen2.5-32b.sh +++ b/examples/ppo_trainer/run_qwen2.5-32b.sh @@ -38,7 +38,7 @@ python3 -m verl.trainer.main_ppo \ critic.model.fsdp_config.optimizer_offload=False \ algorithm.kl_ctrl.kl_coef=0.0001 \ trainer.critic_warmup=0 \ - trainer.logger=['console','tracking'] \ + trainer.logger=['console','wandb'] \ trainer.project_name='verl_example' \ trainer.experiment_name='Qwen2.5-32B-Instruct_function_rm' \ trainer.n_gpus_per_node=8 \ diff --git a/examples/sft/gsm8k/run_deepseek_6b7.sh b/examples/sft/gsm8k/run_deepseek_6b7.sh index f944a14..8e4d54c 100644 --- a/examples/sft/gsm8k/run_deepseek_6b7.sh +++ b/examples/sft/gsm8k/run_deepseek_6b7.sh @@ -16,4 +16,4 @@ torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ trainer.project_name=gsm8k-sft \ trainer.experiment_name=gsm8k-sft-deepseek-coder-6.7b-instruct \ trainer.total_epochs=4 \ - trainer.logger=['console','tracking'] \ No newline at end of file + trainer.logger=['console','wandb'] \ No newline at end of file diff --git a/examples/sft/gsm8k/run_gemma_2b.sh b/examples/sft/gsm8k/run_gemma_2b.sh index fb5773c..4eca025 100644 --- a/examples/sft/gsm8k/run_gemma_2b.sh +++ b/examples/sft/gsm8k/run_gemma_2b.sh @@ -2,9 +2,16 @@ set -x -hdfs_path=hdfs://user/verl/experiments/gsm8k/gemma-2b-it/ # replace to your own hdfs/local path +if [ "$#" -lt 2 ]; then + echo "Usage: run_gemma_2b.sh [other_configs...]" + exit 1 +fi nproc_per_node=$1 +hdfs_path=$2 + +# Shift the arguments so $@ refers to the rest +shift 2 torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ -m verl.trainer.fsdp_sft_trainer \ @@ -18,4 +25,4 @@ torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ trainer.project_name=gsm8k-sft \ trainer.experiment_name=gsm8k-sft-gemma-2b-it \ trainer.total_epochs=3 \ - trainer.logger=['console','tracking'] \ No newline at end of file + trainer.logger=['console','wandb'] $@ \ No newline at end of file diff --git a/examples/sft/gsm8k/run_gemma_7b.sh b/examples/sft/gsm8k/run_gemma_7b.sh index 8239136..9c35792 100644 --- a/examples/sft/gsm8k/run_gemma_7b.sh +++ b/examples/sft/gsm8k/run_gemma_7b.sh @@ -16,4 +16,4 @@ torchrun --standalone --nnodes=1 --nproc_per_node=$nproc_per_node \ trainer.project_name=gsm8k-sft \ trainer.experiment_name=gsm8k-sft-gemma-1.1-7b-it \ trainer.total_epochs=4 \ - trainer.logger=['console','tracking'] \ No newline at end of file + trainer.logger=['console','wandb'] \ No newline at end of file diff --git a/examples/split_placement/config/ppo_trainer_split.yaml b/examples/split_placement/config/ppo_trainer_split.yaml index bd6bcf2..22835cc 100644 --- a/examples/split_placement/config/ppo_trainer_split.yaml +++ b/examples/split_placement/config/ppo_trainer_split.yaml @@ -121,7 +121,7 @@ trainer: total_epochs: 30 project_name: verl_examples experiment_name: gsm8k - logger: ['console', 'tracking'] + logger: ['console', 'wandb'] nnodes: 1 n_gpus_per_node: 8 save_freq: -1 diff --git a/examples/split_placement/run_deepseek7b_llm.sh b/examples/split_placement/run_deepseek7b_llm.sh index 6afd399..a2db960 100644 --- a/examples/split_placement/run_deepseek7b_llm.sh +++ b/examples/split_placement/run_deepseek7b_llm.sh @@ -29,7 +29,7 @@ python3 main_ppo_split.py \ critic.model.fsdp_config.optimizer_offload=False \ algorithm.kl_ctrl.kl_coef=0.001 \ trainer.critic_warmup=0 \ - trainer.logger=['console','tracking'] \ + trainer.logger=['console','wandb'] \ trainer.project_name='verl_example_gsm8k' \ trainer.experiment_name='deepseek_llm_7b_function_rm' \ trainer.n_gpus_per_node=8 \ diff --git a/verl/trainer/config/ppo_megatron_trainer.yaml b/verl/trainer/config/ppo_megatron_trainer.yaml index 2048490..364452a 100644 --- a/verl/trainer/config/ppo_megatron_trainer.yaml +++ b/verl/trainer/config/ppo_megatron_trainer.yaml @@ -135,7 +135,7 @@ trainer: total_epochs: 30 project_name: verl_examples experiment_name: gsm8k - logger: ['console', 'tracking'] + logger: ['console', 'wandb'] nnodes: 1 n_gpus_per_node: 8 save_freq: -1 diff --git a/verl/trainer/config/ppo_trainer.yaml b/verl/trainer/config/ppo_trainer.yaml index bd6bcf2..22835cc 100644 --- a/verl/trainer/config/ppo_trainer.yaml +++ b/verl/trainer/config/ppo_trainer.yaml @@ -121,7 +121,7 @@ trainer: total_epochs: 30 project_name: verl_examples experiment_name: gsm8k - logger: ['console', 'tracking'] + logger: ['console', 'wandb'] nnodes: 1 n_gpus_per_node: 8 save_freq: -1 diff --git a/verl/utils/tracking.py b/verl/utils/tracking.py index 19aab11..5a65f95 100644 --- a/verl/utils/tracking.py +++ b/verl/utils/tracking.py @@ -19,20 +19,24 @@ class Tracking(object): - supported_backend = ['tracking', 'console'] + supported_backend = ['wandb', 'console'] def __init__(self, project_name, experiment_name, default_backend: Union[str, List[str]] = 'console', config=None): if isinstance(default_backend, str): default_backend = [default_backend] for backend in default_backend: - assert backend in self.supported_backend, f'{backend} is not supported' + if backend == 'tracking': + import warnings + warnings.warn("`tracking` logger is deprecated. use `wandb` instead.", DeprecationWarning) + else: + assert backend in self.supported_backend, f'{backend} is not supported' self.logger = {} - if 'tracking' in default_backend: + if 'tracking' in default_backend or 'wandb' in default_backend: import wandb wandb.init(project=project_name, name=experiment_name, config=config) - self.logger['tracking'] = wandb + self.logger['wandb'] = wandb if 'console' in default_backend: from verl.utils.logger.aggregate_logger import LocalLogger