From cff17eeebfe1e083bfc6bb7978c22020bf396585 Mon Sep 17 00:00:00 2001
From: Sven Mika <svenmika1977@gmail.com>
Date: Thu, 19 Dec 2024 15:08:09 +0100
Subject: [PATCH] [RLlib] Fix schedule validation on new API stack (for config
 settings like `lr` or `entropy_coeff`). (#49363)

---
 rllib/algorithms/algorithm_config.py     | 10 ++++
 rllib/algorithms/dqn/dqn.py              | 61 +++++++++---------------
 rllib/algorithms/impala/impala.py        |  6 +++
 rllib/algorithms/ppo/ppo.py              | 19 +++-----
 rllib/tuned_examples/sac/pendulum_sac.py |  1 +
 rllib/utils/schedules/scheduler.py       | 18 +++++--
 6 files changed, 59 insertions(+), 56 deletions(-)

diff --git a/rllib/algorithms/algorithm_config.py b/rllib/algorithms/algorithm_config.py
index f81624d58627..c7be1c837e56 100644
--- a/rllib/algorithms/algorithm_config.py
+++ b/rllib/algorithms/algorithm_config.py
@@ -4483,6 +4483,16 @@ def _validate_new_api_stack_settings(self):
             # `enable_rl_module_and_learner=True`.
             return
 
+        # Warn about new API stack on by default.
+        logger.warning(
+            f"You are running {self.algo_class.__name__} on the new API stack! "
+            "This is the new default behavior for this algorithm. If you don't "
+            "want to use the new API stack, set `config.api_stack("
+            "enable_rl_module_and_learner=False,"
+            "enable_env_runner_and_connector_v2=False)`. For a detailed migration "
+            "guide, see here: https://docs.ray.io/en/master/rllib/new-api-stack-migration-guide.html"  # noqa
+        )
+
         # Disabled hybrid API stack. Now, both `enable_rl_module_and_learner` and
         # `enable_env_runner_and_connector_v2` must be True or both False.
         if not self.enable_env_runner_and_connector_v2:
diff --git a/rllib/algorithms/dqn/dqn.py b/rllib/algorithms/dqn/dqn.py
index 2c5e95602bdb..78b719756344 100644
--- a/rllib/algorithms/dqn/dqn.py
+++ b/rllib/algorithms/dqn/dqn.py
@@ -413,30 +413,31 @@ def validate(self) -> None:
         # Call super's validation method.
         super().validate()
 
-        # Warn about new API stack on by default.
         if self.enable_rl_module_and_learner:
-            logger.warning(
-                f"You are running {self.algo_class.__name__} on the new API stack! "
-                "This is the new default behavior for this algorithm. If you don't "
-                "want to use the new API stack, set `config.api_stack("
-                "enable_rl_module_and_learner=False,"
-                "enable_env_runner_and_connector_v2=False)`. For a detailed migration "
-                "guide, see here: https://docs.ray.io/en/master/rllib/new-api-stack-migration-guide.html"  # noqa
-            )
-
-        if (
-            not self.enable_rl_module_and_learner
-            and self.exploration_config["type"] == "ParameterNoise"
-        ):
-            if self.batch_mode != "complete_episodes":
+            # `lr_schedule` checking.
+            if self.lr_schedule is not None:
                 raise ValueError(
-                    "ParameterNoise Exploration requires `batch_mode` to be "
-                    "'complete_episodes'. Try setting `config.env_runners("
-                    "batch_mode='complete_episodes')`."
+                    "`lr_schedule` is deprecated and must be None! Use the "
+                    "`lr` setting to setup a schedule."
                 )
-
-        if not self.enable_env_runner_and_connector_v2 and not self.in_evaluation:
-            validate_buffer_config(self)
+        else:
+            if not self.in_evaluation:
+                validate_buffer_config(self)
+
+            # TODO (simon): Find a clean solution to deal with configuration configs
+            #  when using the new API stack.
+            if self.exploration_config["type"] == "ParameterNoise":
+                if self.batch_mode != "complete_episodes":
+                    raise ValueError(
+                        "ParameterNoise Exploration requires `batch_mode` to be "
+                        "'complete_episodes'. Try setting `config.env_runners("
+                        "batch_mode='complete_episodes')`."
+                    )
+                if self.noisy:
+                    raise ValueError(
+                        "ParameterNoise Exploration and `noisy` network cannot be"
+                        " used at the same time!"
+                    )
 
         if self.td_error_loss_fn not in ["huber", "mse"]:
             raise ValueError("`td_error_loss_fn` must be 'huber' or 'mse'!")
@@ -454,24 +455,6 @@ def validate(self) -> None:
                 f"{self.n_step})."
             )
 
-        # TODO (simon): Find a clean solution to deal with
-        # configuration configs when using the new API stack.
-        if (
-            not self.enable_rl_module_and_learner
-            and self.exploration_config["type"] == "ParameterNoise"
-        ):
-            if self.batch_mode != "complete_episodes":
-                raise ValueError(
-                    "ParameterNoise Exploration requires `batch_mode` to be "
-                    "'complete_episodes'. Try setting `config.env_runners("
-                    "batch_mode='complete_episodes')`."
-                )
-            if self.noisy:
-                raise ValueError(
-                    "ParameterNoise Exploration and `noisy` network cannot be"
-                    " used at the same time!"
-                )
-
         # Validate that we use the corresponding `EpisodeReplayBuffer` when using
         # episodes.
         # TODO (sven, simon): Implement the multi-agent case for replay buffers.
diff --git a/rllib/algorithms/impala/impala.py b/rllib/algorithms/impala/impala.py
index ac9229b0a71e..f1c55169e6f4 100644
--- a/rllib/algorithms/impala/impala.py
+++ b/rllib/algorithms/impala/impala.py
@@ -381,6 +381,12 @@ def validate(self) -> None:
                     "does NOT support a mixin replay buffer yet for "
                     f"{self} (set `config.replay_proportion` to 0.0)!"
                 )
+            # `lr_schedule` checking.
+            if self.lr_schedule is not None:
+                raise ValueError(
+                    "`lr_schedule` is deprecated and must be None! Use the "
+                    "`lr` setting to setup a schedule."
+                )
             # Entropy coeff schedule checking.
             if self.entropy_coeff_schedule is not None:
                 raise ValueError(
diff --git a/rllib/algorithms/ppo/ppo.py b/rllib/algorithms/ppo/ppo.py
index ab39972849d7..dedd086c34bc 100644
--- a/rllib/algorithms/ppo/ppo.py
+++ b/rllib/algorithms/ppo/ppo.py
@@ -290,17 +290,6 @@ def validate(self) -> None:
         # Call super's validation method.
         super().validate()
 
-        # Warn about new API stack on by default.
-        if self.enable_rl_module_and_learner:
-            logger.warning(
-                f"You are running {self.algo_class.__name__} on the new API stack! "
-                "This is the new default behavior for this algorithm. If you don't "
-                "want to use the new API stack, set `config.api_stack("
-                "enable_rl_module_and_learner=False,"
-                "enable_env_runner_and_connector_v2=False)`. For a detailed migration "
-                "guide, see here: https://docs.ray.io/en/master/rllib/new-api-stack-migration-guide.html"  # noqa
-            )
-
         # Synchronous sampling, on-policy/PPO algos -> Check mismatches between
         # `rollout_fragment_length` and `train_batch_size_per_learner` to avoid user
         # confusion.
@@ -350,8 +339,14 @@ def validate(self) -> None:
                 "batch_mode=complete_episodes."
             )
 
-        # Entropy coeff schedule checking.
+        # New API stack checks.
         if self.enable_rl_module_and_learner:
+            # `lr_schedule` checking.
+            if self.lr_schedule is not None:
+                raise ValueError(
+                    "`lr_schedule` is deprecated and must be None! Use the "
+                    "`lr` setting to setup a schedule."
+                )
             if self.entropy_coeff_schedule is not None:
                 raise ValueError(
                     "`entropy_coeff_schedule` is deprecated and must be None! Use the "
diff --git a/rllib/tuned_examples/sac/pendulum_sac.py b/rllib/tuned_examples/sac/pendulum_sac.py
index 21d9af89b94c..2a050378e14a 100644
--- a/rllib/tuned_examples/sac/pendulum_sac.py
+++ b/rllib/tuned_examples/sac/pendulum_sac.py
@@ -22,6 +22,7 @@
         actor_lr=2e-4 * (args.num_learners or 1) ** 0.5,
         critic_lr=8e-4 * (args.num_learners or 1) ** 0.5,
         alpha_lr=9e-4 * (args.num_learners or 1) ** 0.5,
+        # TODO (sven): Maybe go back to making this a dict of the sub-learning rates?
         lr=None,
         target_entropy="auto",
         n_step=(2, 5),
diff --git a/rllib/utils/schedules/scheduler.py b/rllib/utils/schedules/scheduler.py
index 231b25c71ea5..ea7eca808457 100644
--- a/rllib/utils/schedules/scheduler.py
+++ b/rllib/utils/schedules/scheduler.py
@@ -86,6 +86,7 @@ def validate(
         Raises:
             ValueError: In case, errors are found in the schedule's format.
         """
+        # Fixed (single) value.
         if (
             isinstance(fixed_value_or_schedule, (int, float))
             or fixed_value_or_schedule is None
@@ -97,17 +98,24 @@ def validate(
         ):
             raise ValueError(
                 f"Invalid `{setting_name}` ({fixed_value_or_schedule}) specified! "
-                f"Must be a list of at least 2 tuples, each of the form "
-                f"(`timestep`, `{description} to reach`), e.g. "
+                f"Must be a list of 2 or more tuples, each of the form "
+                f"(`timestep`, `{description} to reach`), for example "
                 "`[(0, 0.001), (1e6, 0.0001), (2e6, 0.00005)]`."
             )
         elif fixed_value_or_schedule[0][0] != 0:
             raise ValueError(
-                f"When providing a `{setting_name}`, the first timestep must be 0 "
-                f"and the corresponding lr value is the initial {description}! You "
-                f"provided ts={fixed_value_or_schedule[0][0]} {description}="
+                f"When providing a `{setting_name}` schedule, the first timestep must "
+                f"be 0 and the corresponding lr value is the initial {description}! "
+                f"You provided ts={fixed_value_or_schedule[0][0]} {description}="
                 f"{fixed_value_or_schedule[0][1]}."
             )
+        elif any(len(pair) != 2 for pair in fixed_value_or_schedule):
+            raise ValueError(
+                f"When providing a `{setting_name}` schedule, each tuple in the "
+                f"schedule list must have exctly 2 items of the form "
+                f"(`timestep`, `{description} to reach`), for example "
+                "`[(0, 0.001), (1e6, 0.0001), (2e6, 0.00005)]`."
+            )
 
     def get_current_value(self) -> TensorType:
         """Returns the current value (as a tensor variable).