From b70774601098585b5a8863e03ffa611312708c51 Mon Sep 17 00:00:00 2001 From: Jason Rupert <92821409+jrupert-unity@users.noreply.github.com> Date: Tue, 30 Nov 2021 16:24:58 -0800 Subject: [PATCH 01/10] Quick changes to support Elo as a curriculum measure. --- .../trainers/environment_parameter_manager.py | 2 ++ ml-agents/mlagents/trainers/settings.py | 13 +++++++++++-- ml-agents/mlagents/trainers/trainer_controller.py | 7 ++++++- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/ml-agents/mlagents/trainers/environment_parameter_manager.py b/ml-agents/mlagents/trainers/environment_parameter_manager.py index 9c1bf981c4..3663e4afe7 100644 --- a/ml-agents/mlagents/trainers/environment_parameter_manager.py +++ b/ml-agents/mlagents/trainers/environment_parameter_manager.py @@ -137,6 +137,7 @@ def update_lessons( trainer_steps: Dict[str, int], trainer_max_steps: Dict[str, int], trainer_reward_buffer: Dict[str, List[float]], + trainer_elo_score: Dict[str, int], ) -> Tuple[bool, bool]: """ Given progress metrics, calculates if at least one environment parameter is @@ -169,6 +170,7 @@ def update_lessons( float(trainer_steps[behavior_to_consider]) / float(trainer_max_steps[behavior_to_consider]), trainer_reward_buffer[behavior_to_consider], + trainer_elo_score[behavior_to_consider] if trainer_elo_score else None, self._smoothed_values[param_name], ) self._smoothed_values[param_name] = new_smoothing diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index fe52fb838c..20a115ebea 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -490,6 +490,7 @@ class CompletionCriteriaSettings: class MeasureType(Enum): PROGRESS: str = "progress" REWARD: str = "reward" + ELO: str = "Elo" behavior: str measure: MeasureType = attr.ib(default=MeasureType.REWARD) @@ -515,7 +516,7 @@ def _check_threshold_value(self, attribute, value): ) def need_increment( - self, progress: float, reward_buffer: List[float], smoothing: float + self, progress: float, reward_buffer: List[float], elo_score: float, smoothing: float ) -> Tuple[bool, float]: """ Given measures, this method returns a boolean indicating if the lesson @@ -524,10 +525,11 @@ def need_increment( # Is the min number of episodes reached if len(reward_buffer) < self.min_lesson_length: return False, smoothing + #BUG? Shouldn't we check if the max number of steps has passed if it isn't a PROGRESS measure? if self.measure == CompletionCriteriaSettings.MeasureType.PROGRESS: if progress > self.threshold: return True, smoothing - if self.measure == CompletionCriteriaSettings.MeasureType.REWARD: + elif self.measure == CompletionCriteriaSettings.MeasureType.REWARD: if len(reward_buffer) < 1: return False, smoothing measure = np.mean(reward_buffer) @@ -538,6 +540,13 @@ def need_increment( smoothing = measure if measure > self.threshold: return True, smoothing + elif self.measure == CompletionCriteriaSettings.MeasureType.ELO: + if elo_score is None: + raise TrainerConfigError( + "Elo isn't a valid completion criteria measure if not using self-play." + ) + if elo_score > self.threshold: + return True, smoothing return False, smoothing diff --git a/ml-agents/mlagents/trainers/trainer_controller.py b/ml-agents/mlagents/trainers/trainer_controller.py index 8700da3cbe..440c83b101 100644 --- a/ml-agents/mlagents/trainers/trainer_controller.py +++ b/ml-agents/mlagents/trainers/trainer_controller.py @@ -211,10 +211,15 @@ def reset_env_if_ready(self, env: EnvManager) -> None: reward_buff = {k: list(t.reward_buffer) for (k, t) in self.trainers.items()} curr_step = {k: int(t.get_step) for (k, t) in self.trainers.items()} max_step = {k: int(t.get_max_steps) for (k, t) in self.trainers.items()} + try: + curr_elo = {k: float(t.current_elo) for (k, t) in self.trainers.items()} + except AttributeError: + curr_elo = None + # Attempt to increment the lessons of the brains who # were ready. updated, param_must_reset = self.param_manager.update_lessons( - curr_step, max_step, reward_buff + curr_step, max_step, reward_buff, curr_elo ) if updated: for trainer in self.trainers.values(): From 0bb2e064b325dea619615c7f774c2a5d1903ed27 Mon Sep 17 00:00:00 2001 From: Jason Rupert <92821409+jrupert-unity@users.noreply.github.com> Date: Thu, 2 Dec 2021 10:54:31 -0800 Subject: [PATCH 02/10] Simple example of using Elo score as curriculum step completion criteria --- config/poca/SoccerTwosCurriculum.yaml | 46 +++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 config/poca/SoccerTwosCurriculum.yaml diff --git a/config/poca/SoccerTwosCurriculum.yaml b/config/poca/SoccerTwosCurriculum.yaml new file mode 100644 index 0000000000..0562a28a2c --- /dev/null +++ b/config/poca/SoccerTwosCurriculum.yaml @@ -0,0 +1,46 @@ +behaviors: + SoccerTwos: + trainer_type: poca + hyperparameters: + batch_size: 2048 + buffer_size: 20480 + learning_rate: 0.0003 + beta: 0.005 + epsilon: 0.2 + lambd: 0.95 + num_epoch: 3 + learning_rate_schedule: constant + network_settings: + normalize: false + hidden_units: 512 + num_layers: 2 + vis_encode_type: simple + reward_signals: + extrinsic: + gamma: 0.99 + strength: 1.0 + keep_checkpoints: 5 + max_steps: 50000000 + time_horizon: 1000 + summary_freq: 10000 + self_play: + save_steps: 50000 + team_change: 200000 + swap_steps: 2000 + window: 10 + play_against_latest_model_ratio: 0.5 + initial_elo: 1200.0 +environment_parameters: + ball_touch: + curriculum: + - name: Lesson0 # The '-' is important as this is a list + completion_criteria: + measure: Elo + behavior: SoccerTwos + signal_smoothing: false + min_lesson_length: 100 + threshold: 1250.0 + value: 1.0 + - name: Lesson1 # The '-' is important as this is a list + value: 0.0 + \ No newline at end of file From dcb7db81395a0d56f926d1f8f0350a16c34dca47 Mon Sep 17 00:00:00 2001 From: jrupert Date: Fri, 17 Dec 2021 10:48:05 -0800 Subject: [PATCH 03/10] WIP --- .../mlagents/trainers/tests/test_env_param_manager.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/ml-agents/mlagents/trainers/tests/test_env_param_manager.py b/ml-agents/mlagents/trainers/tests/test_env_param_manager.py index e5ac2e4f00..72ed2307ca 100644 --- a/ml-agents/mlagents/trainers/tests/test_env_param_manager.py +++ b/ml-agents/mlagents/trainers/tests/test_env_param_manager.py @@ -209,16 +209,19 @@ def test_curriculum_raises_all_completion_criteria_conversion(): trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1000] * 101}, + trainer_elo_score={"fake_behavior": 1220.0}, #TODO: trainer_elo_scores aren't set properly for tests ) == (True, True) assert param_manager.update_lessons( trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1000] * 101}, + trainer_elo_score={"fake_behavior": 1220.0}, ) == (True, True) assert param_manager.update_lessons( trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1000] * 101}, + trainer_elo_score={"fake_behavior": 1220.0}, ) == (False, False) assert param_manager.get_current_lesson_number() == {"param_1": 2} @@ -279,17 +282,20 @@ def test_create_manager(): trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1000] * 99}, + trainer_elo_score={"fake_behavior": 1220.0}, ) == (False, False) # Not enough episodes reward assert param_manager.update_lessons( trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1] * 101}, + trainer_elo_score={"fake_behavior": 1220.0}, ) == (False, False) assert param_manager.update_lessons( trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1000] * 101}, + trainer_elo_score={"fake_behavior": 1220.0}, ) == (True, True) assert param_manager.get_current_lesson_number() == { "param_1": 1, @@ -310,6 +316,7 @@ def test_create_manager(): trainer_steps={"fake_behavior": 700}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [0] * 101}, + trainer_elo_score={"fake_behavior": 1220.0}, ) == (True, False) assert param_manager.get_current_samplers() == { "param_1": UniformSettings(seed=1337 + 2, min_value=1, max_value=3), From 8af0540bc5868c9b58fde5b7b4014859ff3d50d0 Mon Sep 17 00:00:00 2001 From: jrupert Date: Sun, 19 Dec 2021 16:41:53 -0800 Subject: [PATCH 04/10] Update tests to work with new parameter in curriculum update for Elo, and to test Elo curriculum functionality. --- .../trainers/environment_parameter_manager.py | 2 + .../trainers/tests/test_env_param_manager.py | 46 ++++++++++++++----- 2 files changed, 36 insertions(+), 12 deletions(-) diff --git a/ml-agents/mlagents/trainers/environment_parameter_manager.py b/ml-agents/mlagents/trainers/environment_parameter_manager.py index 3663e4afe7..0c5dd3746a 100644 --- a/ml-agents/mlagents/trainers/environment_parameter_manager.py +++ b/ml-agents/mlagents/trainers/environment_parameter_manager.py @@ -149,6 +149,8 @@ def update_lessons( of training steps this behavior's trainer has performed. :param trainer_reward_buffer: A dictionary from behavior_name to the list of the most recent episode returns for this behavior's trainer. + :trainer_elo_score: A Dictionary from behavior_name to the minimum Elo score + to be reached. :returns: A tuple of two booleans : (True if any lesson has changed, True if environment needs to reset) """ diff --git a/ml-agents/mlagents/trainers/tests/test_env_param_manager.py b/ml-agents/mlagents/trainers/tests/test_env_param_manager.py index 72ed2307ca..55a3648522 100644 --- a/ml-agents/mlagents/trainers/tests/test_env_param_manager.py +++ b/ml-agents/mlagents/trainers/tests/test_env_param_manager.py @@ -131,7 +131,7 @@ def test_curriculum_conversion(): assert lesson.value.max_value == 3 -test_bad_curriculum_no_competion_criteria_config_yaml = """ +test_bad_curriculum_no_completion_criteria_config_yaml = """ environment_parameters: param_1: curriculum: @@ -154,7 +154,7 @@ def test_curriculum_conversion(): """ -test_bad_curriculum_all_competion_criteria_config_yaml = """ +test_bad_curriculum_all_completion_criteria_config_yaml = """ environment_parameters: param_1: curriculum: @@ -174,6 +174,14 @@ def test_curriculum_conversion(): min_lesson_length: 100 require_reset: true value: 2 + - name: Lesson3 + completion_criteria: + measure: Elo + behavior: fake_behavior + threshold: 1300 + min_lesson_length: 100 + require_reset: true + value: 3 - name: Lesson3 completion_criteria: measure: reward @@ -192,14 +200,14 @@ def test_curriculum_conversion(): def test_curriculum_raises_no_completion_criteria_conversion(): with pytest.raises(TrainerConfigError): RunOptions.from_dict( - yaml.safe_load(test_bad_curriculum_no_competion_criteria_config_yaml) + yaml.safe_load(test_bad_curriculum_no_completion_criteria_config_yaml) ) def test_curriculum_raises_all_completion_criteria_conversion(): with pytest.warns(TrainerConfigWarning): run_options = RunOptions.from_dict( - yaml.safe_load(test_bad_curriculum_all_competion_criteria_config_yaml) + yaml.safe_load(test_bad_curriculum_all_completion_criteria_config_yaml) ) param_manager = EnvironmentParameterManager( @@ -209,22 +217,36 @@ def test_curriculum_raises_all_completion_criteria_conversion(): trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1000] * 101}, - trainer_elo_score={"fake_behavior": 1220.0}, #TODO: trainer_elo_scores aren't set properly for tests + trainer_elo_score={"fake_behavior": 1200.0}, #TODO: trainer_elo_scores aren't set properly for tests ) == (True, True) assert param_manager.update_lessons( trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1000] * 101}, - trainer_elo_score={"fake_behavior": 1220.0}, + trainer_elo_score={"fake_behavior": 1200.0}, ) == (True, True) + assert param_manager.get_current_lesson_number() == {"param_1": 2} assert param_manager.update_lessons( trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1000] * 101}, - trainer_elo_score={"fake_behavior": 1220.0}, + trainer_elo_score={"fake_behavior": 1200.0}, ) == (False, False) assert param_manager.get_current_lesson_number() == {"param_1": 2} - + assert param_manager.update_lessons( + trainer_steps={"fake_behavior": 500}, + trainer_max_steps={"fake_behavior": 1000}, + trainer_reward_buffer={"fake_behavior": [1000] * 101}, + trainer_elo_score={"fake_behavior": 1500.0}, + ) == (True, True) + assert param_manager.get_current_lesson_number() == {"param_1": 3} + assert param_manager.update_lessons( + trainer_steps={"fake_behavior": 500}, + trainer_max_steps={"fake_behavior": 1000}, + trainer_reward_buffer={"fake_behavior": [1000] * 101}, + trainer_elo_score={"fake_behavior": 1200.0}, + ) == (False, False) + assert param_manager.get_current_lesson_number() == {"param_1": 3} test_everything_config_yaml = """ environment_parameters: @@ -282,20 +304,20 @@ def test_create_manager(): trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1000] * 99}, - trainer_elo_score={"fake_behavior": 1220.0}, + trainer_elo_score={"fake_behavior": 1200.0}, ) == (False, False) # Not enough episodes reward assert param_manager.update_lessons( trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1] * 101}, - trainer_elo_score={"fake_behavior": 1220.0}, + trainer_elo_score={"fake_behavior": 1200.0}, ) == (False, False) assert param_manager.update_lessons( trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1000] * 101}, - trainer_elo_score={"fake_behavior": 1220.0}, + trainer_elo_score={"fake_behavior": 1200.0}, ) == (True, True) assert param_manager.get_current_lesson_number() == { "param_1": 1, @@ -316,7 +338,7 @@ def test_create_manager(): trainer_steps={"fake_behavior": 700}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [0] * 101}, - trainer_elo_score={"fake_behavior": 1220.0}, + trainer_elo_score={"fake_behavior": 1200.0}, ) == (True, False) assert param_manager.get_current_samplers() == { "param_1": UniformSettings(seed=1337 + 2, min_value=1, max_value=3), From 0ef3141a13408b8a6e788003574e197c280a7dd6 Mon Sep 17 00:00:00 2001 From: jrupert Date: Sun, 19 Dec 2021 17:05:08 -0800 Subject: [PATCH 05/10] Remove comment about possible bug (no bug). Fix test for not advancing to non-existent next state. As it was it would have failed because of the curriculum test and not the reason we want to check. --- ml-agents/mlagents/trainers/settings.py | 1 - ml-agents/mlagents/trainers/tests/test_env_param_manager.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py index 20a115ebea..ac2b41b437 100644 --- a/ml-agents/mlagents/trainers/settings.py +++ b/ml-agents/mlagents/trainers/settings.py @@ -525,7 +525,6 @@ def need_increment( # Is the min number of episodes reached if len(reward_buffer) < self.min_lesson_length: return False, smoothing - #BUG? Shouldn't we check if the max number of steps has passed if it isn't a PROGRESS measure? if self.measure == CompletionCriteriaSettings.MeasureType.PROGRESS: if progress > self.threshold: return True, smoothing diff --git a/ml-agents/mlagents/trainers/tests/test_env_param_manager.py b/ml-agents/mlagents/trainers/tests/test_env_param_manager.py index 55a3648522..c1f8ca881b 100644 --- a/ml-agents/mlagents/trainers/tests/test_env_param_manager.py +++ b/ml-agents/mlagents/trainers/tests/test_env_param_manager.py @@ -244,8 +244,8 @@ def test_curriculum_raises_all_completion_criteria_conversion(): trainer_steps={"fake_behavior": 500}, trainer_max_steps={"fake_behavior": 1000}, trainer_reward_buffer={"fake_behavior": [1000] * 101}, - trainer_elo_score={"fake_behavior": 1200.0}, - ) == (False, False) + trainer_elo_score={"fake_behavior": 1500.0}, + ) == (False, False) # No step to advance to assert param_manager.get_current_lesson_number() == {"param_1": 3} test_everything_config_yaml = """ From 1aa57b0d6ce58c6623ed975cae231fe2a29ef1f0 Mon Sep 17 00:00:00 2001 From: jrupert Date: Mon, 20 Dec 2021 15:45:02 -0800 Subject: [PATCH 06/10] Update changelog and fix curriculum test name. --- com.unity.ml-agents/CHANGELOG.md | 1 + ml-agents/mlagents/trainers/tests/test_env_param_manager.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md index b91f9ae2bb..da35994f6a 100755 --- a/com.unity.ml-agents/CHANGELOG.md +++ b/com.unity.ml-agents/CHANGELOG.md @@ -29,6 +29,7 @@ and this project adheres to 1. env_params.max_lifetime_restarts (--max-lifetime-restarts) [default=10] 2. env_params.restarts_rate_limit_n (--restarts-rate-limit-n) [default=1] 3. env_params.restarts_rate_limit_period_s (--restarts-rate-limit-period-s) [default=60] +-Added support for Elo as a curriculum learning completion criteria. ### Bug Fixes - Fixed a bug where the critics were not being normalized during training. (#5595) diff --git a/ml-agents/mlagents/trainers/tests/test_env_param_manager.py b/ml-agents/mlagents/trainers/tests/test_env_param_manager.py index c1f8ca881b..7a881c9ad8 100644 --- a/ml-agents/mlagents/trainers/tests/test_env_param_manager.py +++ b/ml-agents/mlagents/trainers/tests/test_env_param_manager.py @@ -182,7 +182,7 @@ def test_curriculum_conversion(): min_lesson_length: 100 require_reset: true value: 3 - - name: Lesson3 + - name: Lesson4 completion_criteria: measure: reward behavior: fake_behavior From c0ca235ae7b729883e5de033198a29af8712ea2d Mon Sep 17 00:00:00 2001 From: jrupert Date: Mon, 20 Dec 2021 15:58:59 -0800 Subject: [PATCH 07/10] Update changelog. --- com.unity.ml-agents/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md index da35994f6a..2185cdcc79 100755 --- a/com.unity.ml-agents/CHANGELOG.md +++ b/com.unity.ml-agents/CHANGELOG.md @@ -29,7 +29,7 @@ and this project adheres to 1. env_params.max_lifetime_restarts (--max-lifetime-restarts) [default=10] 2. env_params.restarts_rate_limit_n (--restarts-rate-limit-n) [default=1] 3. env_params.restarts_rate_limit_period_s (--restarts-rate-limit-period-s) [default=60] --Added support for Elo as a curriculum learning completion criteria. +-Added support for Elo as a curriculum learning completion criteria. (#5646) ### Bug Fixes - Fixed a bug where the critics were not being normalized during training. (#5595) From 6d2ec7a700d84bc975f3f94561af26b50075a627 Mon Sep 17 00:00:00 2001 From: jrupert Date: Mon, 20 Dec 2021 15:58:59 -0800 Subject: [PATCH 08/10] Update changelog. Remove changelog note for now. --- com.unity.ml-agents/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md index da35994f6a..2185cdcc79 100755 --- a/com.unity.ml-agents/CHANGELOG.md +++ b/com.unity.ml-agents/CHANGELOG.md @@ -29,7 +29,7 @@ and this project adheres to 1. env_params.max_lifetime_restarts (--max-lifetime-restarts) [default=10] 2. env_params.restarts_rate_limit_n (--restarts-rate-limit-n) [default=1] 3. env_params.restarts_rate_limit_period_s (--restarts-rate-limit-period-s) [default=60] --Added support for Elo as a curriculum learning completion criteria. +-Added support for Elo as a curriculum learning completion criteria. (#5646) ### Bug Fixes - Fixed a bug where the critics were not being normalized during training. (#5595) From 15136c57ff51098b6bb2e366288c494e48bd35db Mon Sep 17 00:00:00 2001 From: Jason Rupert <92821409+jrupert-unity@users.noreply.github.com> Date: Tue, 18 Jan 2022 15:18:43 -0800 Subject: [PATCH 09/10] Removed previous changelog note (for now). --- com.unity.ml-agents/CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md index 2185cdcc79..b91f9ae2bb 100755 --- a/com.unity.ml-agents/CHANGELOG.md +++ b/com.unity.ml-agents/CHANGELOG.md @@ -29,7 +29,6 @@ and this project adheres to 1. env_params.max_lifetime_restarts (--max-lifetime-restarts) [default=10] 2. env_params.restarts_rate_limit_n (--restarts-rate-limit-n) [default=1] 3. env_params.restarts_rate_limit_period_s (--restarts-rate-limit-period-s) [default=60] --Added support for Elo as a curriculum learning completion criteria. (#5646) ### Bug Fixes - Fixed a bug where the critics were not being normalized during training. (#5595) From 192ea10e398bd5f04314535cc1cd3e41affd3b18 Mon Sep 17 00:00:00 2001 From: Jason Rupert <92821409+jrupert-unity@users.noreply.github.com> Date: Tue, 18 Jan 2022 15:42:58 -0800 Subject: [PATCH 10/10] Update changelog. --- com.unity.ml-agents/CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md index 951dcb5691..4655f4d8fa 100755 --- a/com.unity.ml-agents/CHANGELOG.md +++ b/com.unity.ml-agents/CHANGELOG.md @@ -13,6 +13,8 @@ and this project adheres to ### Minor Changes #### com.unity.ml-agents / com.unity.ml-agents.extensions (C#) #### ml-agents / ml-agents-envs / gym-unity (Python) +-Added support for Elo as a curriculum learning completion criteria. (#5646) + ### Bug Fixes #### com.unity.ml-agents / com.unity.ml-agents.extensions (C#) #### ml-agents / ml-agents-envs / gym-unity (Python)