From cff17eeebfe1e083bfc6bb7978c22020bf396585 Mon Sep 17 00:00:00 2001 From: Sven Mika Date: Thu, 19 Dec 2024 15:08:09 +0100 Subject: [PATCH] [RLlib] Fix schedule validation on new API stack (for config settings like `lr` or `entropy_coeff`). (#49363) --- rllib/algorithms/algorithm_config.py | 10 ++++ rllib/algorithms/dqn/dqn.py | 61 +++++++++--------------- rllib/algorithms/impala/impala.py | 6 +++ rllib/algorithms/ppo/ppo.py | 19 +++----- rllib/tuned_examples/sac/pendulum_sac.py | 1 + rllib/utils/schedules/scheduler.py | 18 +++++-- 6 files changed, 59 insertions(+), 56 deletions(-) diff --git a/rllib/algorithms/algorithm_config.py b/rllib/algorithms/algorithm_config.py index f81624d58627..c7be1c837e56 100644 --- a/rllib/algorithms/algorithm_config.py +++ b/rllib/algorithms/algorithm_config.py @@ -4483,6 +4483,16 @@ def _validate_new_api_stack_settings(self): # `enable_rl_module_and_learner=True`. return + # Warn about new API stack on by default. + logger.warning( + f"You are running {self.algo_class.__name__} on the new API stack! " + "This is the new default behavior for this algorithm. If you don't " + "want to use the new API stack, set `config.api_stack(" + "enable_rl_module_and_learner=False," + "enable_env_runner_and_connector_v2=False)`. For a detailed migration " + "guide, see here: https://docs.ray.io/en/master/rllib/new-api-stack-migration-guide.html" # noqa + ) + # Disabled hybrid API stack. Now, both `enable_rl_module_and_learner` and # `enable_env_runner_and_connector_v2` must be True or both False. if not self.enable_env_runner_and_connector_v2: diff --git a/rllib/algorithms/dqn/dqn.py b/rllib/algorithms/dqn/dqn.py index 2c5e95602bdb..78b719756344 100644 --- a/rllib/algorithms/dqn/dqn.py +++ b/rllib/algorithms/dqn/dqn.py @@ -413,30 +413,31 @@ def validate(self) -> None: # Call super's validation method. super().validate() - # Warn about new API stack on by default. if self.enable_rl_module_and_learner: - logger.warning( - f"You are running {self.algo_class.__name__} on the new API stack! " - "This is the new default behavior for this algorithm. If you don't " - "want to use the new API stack, set `config.api_stack(" - "enable_rl_module_and_learner=False," - "enable_env_runner_and_connector_v2=False)`. For a detailed migration " - "guide, see here: https://docs.ray.io/en/master/rllib/new-api-stack-migration-guide.html" # noqa - ) - - if ( - not self.enable_rl_module_and_learner - and self.exploration_config["type"] == "ParameterNoise" - ): - if self.batch_mode != "complete_episodes": + # `lr_schedule` checking. + if self.lr_schedule is not None: raise ValueError( - "ParameterNoise Exploration requires `batch_mode` to be " - "'complete_episodes'. Try setting `config.env_runners(" - "batch_mode='complete_episodes')`." + "`lr_schedule` is deprecated and must be None! Use the " + "`lr` setting to setup a schedule." ) - - if not self.enable_env_runner_and_connector_v2 and not self.in_evaluation: - validate_buffer_config(self) + else: + if not self.in_evaluation: + validate_buffer_config(self) + + # TODO (simon): Find a clean solution to deal with configuration configs + # when using the new API stack. + if self.exploration_config["type"] == "ParameterNoise": + if self.batch_mode != "complete_episodes": + raise ValueError( + "ParameterNoise Exploration requires `batch_mode` to be " + "'complete_episodes'. Try setting `config.env_runners(" + "batch_mode='complete_episodes')`." + ) + if self.noisy: + raise ValueError( + "ParameterNoise Exploration and `noisy` network cannot be" + " used at the same time!" + ) if self.td_error_loss_fn not in ["huber", "mse"]: raise ValueError("`td_error_loss_fn` must be 'huber' or 'mse'!") @@ -454,24 +455,6 @@ def validate(self) -> None: f"{self.n_step})." ) - # TODO (simon): Find a clean solution to deal with - # configuration configs when using the new API stack. - if ( - not self.enable_rl_module_and_learner - and self.exploration_config["type"] == "ParameterNoise" - ): - if self.batch_mode != "complete_episodes": - raise ValueError( - "ParameterNoise Exploration requires `batch_mode` to be " - "'complete_episodes'. Try setting `config.env_runners(" - "batch_mode='complete_episodes')`." - ) - if self.noisy: - raise ValueError( - "ParameterNoise Exploration and `noisy` network cannot be" - " used at the same time!" - ) - # Validate that we use the corresponding `EpisodeReplayBuffer` when using # episodes. # TODO (sven, simon): Implement the multi-agent case for replay buffers. diff --git a/rllib/algorithms/impala/impala.py b/rllib/algorithms/impala/impala.py index ac9229b0a71e..f1c55169e6f4 100644 --- a/rllib/algorithms/impala/impala.py +++ b/rllib/algorithms/impala/impala.py @@ -381,6 +381,12 @@ def validate(self) -> None: "does NOT support a mixin replay buffer yet for " f"{self} (set `config.replay_proportion` to 0.0)!" ) + # `lr_schedule` checking. + if self.lr_schedule is not None: + raise ValueError( + "`lr_schedule` is deprecated and must be None! Use the " + "`lr` setting to setup a schedule." + ) # Entropy coeff schedule checking. if self.entropy_coeff_schedule is not None: raise ValueError( diff --git a/rllib/algorithms/ppo/ppo.py b/rllib/algorithms/ppo/ppo.py index ab39972849d7..dedd086c34bc 100644 --- a/rllib/algorithms/ppo/ppo.py +++ b/rllib/algorithms/ppo/ppo.py @@ -290,17 +290,6 @@ def validate(self) -> None: # Call super's validation method. super().validate() - # Warn about new API stack on by default. - if self.enable_rl_module_and_learner: - logger.warning( - f"You are running {self.algo_class.__name__} on the new API stack! " - "This is the new default behavior for this algorithm. If you don't " - "want to use the new API stack, set `config.api_stack(" - "enable_rl_module_and_learner=False," - "enable_env_runner_and_connector_v2=False)`. For a detailed migration " - "guide, see here: https://docs.ray.io/en/master/rllib/new-api-stack-migration-guide.html" # noqa - ) - # Synchronous sampling, on-policy/PPO algos -> Check mismatches between # `rollout_fragment_length` and `train_batch_size_per_learner` to avoid user # confusion. @@ -350,8 +339,14 @@ def validate(self) -> None: "batch_mode=complete_episodes." ) - # Entropy coeff schedule checking. + # New API stack checks. if self.enable_rl_module_and_learner: + # `lr_schedule` checking. + if self.lr_schedule is not None: + raise ValueError( + "`lr_schedule` is deprecated and must be None! Use the " + "`lr` setting to setup a schedule." + ) if self.entropy_coeff_schedule is not None: raise ValueError( "`entropy_coeff_schedule` is deprecated and must be None! Use the " diff --git a/rllib/tuned_examples/sac/pendulum_sac.py b/rllib/tuned_examples/sac/pendulum_sac.py index 21d9af89b94c..2a050378e14a 100644 --- a/rllib/tuned_examples/sac/pendulum_sac.py +++ b/rllib/tuned_examples/sac/pendulum_sac.py @@ -22,6 +22,7 @@ actor_lr=2e-4 * (args.num_learners or 1) ** 0.5, critic_lr=8e-4 * (args.num_learners or 1) ** 0.5, alpha_lr=9e-4 * (args.num_learners or 1) ** 0.5, + # TODO (sven): Maybe go back to making this a dict of the sub-learning rates? lr=None, target_entropy="auto", n_step=(2, 5), diff --git a/rllib/utils/schedules/scheduler.py b/rllib/utils/schedules/scheduler.py index 231b25c71ea5..ea7eca808457 100644 --- a/rllib/utils/schedules/scheduler.py +++ b/rllib/utils/schedules/scheduler.py @@ -86,6 +86,7 @@ def validate( Raises: ValueError: In case, errors are found in the schedule's format. """ + # Fixed (single) value. if ( isinstance(fixed_value_or_schedule, (int, float)) or fixed_value_or_schedule is None @@ -97,17 +98,24 @@ def validate( ): raise ValueError( f"Invalid `{setting_name}` ({fixed_value_or_schedule}) specified! " - f"Must be a list of at least 2 tuples, each of the form " - f"(`timestep`, `{description} to reach`), e.g. " + f"Must be a list of 2 or more tuples, each of the form " + f"(`timestep`, `{description} to reach`), for example " "`[(0, 0.001), (1e6, 0.0001), (2e6, 0.00005)]`." ) elif fixed_value_or_schedule[0][0] != 0: raise ValueError( - f"When providing a `{setting_name}`, the first timestep must be 0 " - f"and the corresponding lr value is the initial {description}! You " - f"provided ts={fixed_value_or_schedule[0][0]} {description}=" + f"When providing a `{setting_name}` schedule, the first timestep must " + f"be 0 and the corresponding lr value is the initial {description}! " + f"You provided ts={fixed_value_or_schedule[0][0]} {description}=" f"{fixed_value_or_schedule[0][1]}." ) + elif any(len(pair) != 2 for pair in fixed_value_or_schedule): + raise ValueError( + f"When providing a `{setting_name}` schedule, each tuple in the " + f"schedule list must have exctly 2 items of the form " + f"(`timestep`, `{description} to reach`), for example " + "`[(0, 0.001), (1e6, 0.0001), (2e6, 0.00005)]`." + ) def get_current_value(self) -> TensorType: """Returns the current value (as a tensor variable).