From b70774601098585b5a8863e03ffa611312708c51 Mon Sep 17 00:00:00 2001
From: Jason Rupert <92821409+jrupert-unity@users.noreply.github.com>
Date: Tue, 30 Nov 2021 16:24:58 -0800
Subject: [PATCH 01/10] Quick changes to support Elo as a curriculum measure.

---
 .../trainers/environment_parameter_manager.py       |  2 ++
 ml-agents/mlagents/trainers/settings.py             | 13 +++++++++++--
 ml-agents/mlagents/trainers/trainer_controller.py   |  7 ++++++-
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/ml-agents/mlagents/trainers/environment_parameter_manager.py b/ml-agents/mlagents/trainers/environment_parameter_manager.py
index 9c1bf981c4..3663e4afe7 100644
--- a/ml-agents/mlagents/trainers/environment_parameter_manager.py
+++ b/ml-agents/mlagents/trainers/environment_parameter_manager.py
@@ -137,6 +137,7 @@ def update_lessons(
         trainer_steps: Dict[str, int],
         trainer_max_steps: Dict[str, int],
         trainer_reward_buffer: Dict[str, List[float]],
+        trainer_elo_score: Dict[str, int],
     ) -> Tuple[bool, bool]:
         """
         Given progress metrics, calculates if at least one environment parameter is
@@ -169,6 +170,7 @@ def update_lessons(
                         float(trainer_steps[behavior_to_consider])
                         / float(trainer_max_steps[behavior_to_consider]),
                         trainer_reward_buffer[behavior_to_consider],
+                        trainer_elo_score[behavior_to_consider] if trainer_elo_score else None,
                         self._smoothed_values[param_name],
                     )
                     self._smoothed_values[param_name] = new_smoothing
diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py
index fe52fb838c..20a115ebea 100644
--- a/ml-agents/mlagents/trainers/settings.py
+++ b/ml-agents/mlagents/trainers/settings.py
@@ -490,6 +490,7 @@ class CompletionCriteriaSettings:
     class MeasureType(Enum):
         PROGRESS: str = "progress"
         REWARD: str = "reward"
+        ELO: str = "Elo"
 
     behavior: str
     measure: MeasureType = attr.ib(default=MeasureType.REWARD)
@@ -515,7 +516,7 @@ def _check_threshold_value(self, attribute, value):
                 )
 
     def need_increment(
-        self, progress: float, reward_buffer: List[float], smoothing: float
+        self, progress: float, reward_buffer: List[float], elo_score: float, smoothing: float
     ) -> Tuple[bool, float]:
         """
         Given measures, this method returns a boolean indicating if the lesson
@@ -524,10 +525,11 @@ def need_increment(
         # Is the min number of episodes reached
         if len(reward_buffer) < self.min_lesson_length:
             return False, smoothing
+        #BUG?  Shouldn't we check if the max number of steps has passed if it isn't a PROGRESS measure?
         if self.measure == CompletionCriteriaSettings.MeasureType.PROGRESS:
             if progress > self.threshold:
                 return True, smoothing
-        if self.measure == CompletionCriteriaSettings.MeasureType.REWARD:
+        elif self.measure == CompletionCriteriaSettings.MeasureType.REWARD:
             if len(reward_buffer) < 1:
                 return False, smoothing
             measure = np.mean(reward_buffer)
@@ -538,6 +540,13 @@ def need_increment(
                 smoothing = measure
             if measure > self.threshold:
                 return True, smoothing
+        elif self.measure == CompletionCriteriaSettings.MeasureType.ELO:
+            if elo_score is None:
+                raise TrainerConfigError(
+                    "Elo isn't a valid completion criteria measure if not using self-play."
+                )
+            if elo_score > self.threshold:
+                return True, smoothing
         return False, smoothing
 
 
diff --git a/ml-agents/mlagents/trainers/trainer_controller.py b/ml-agents/mlagents/trainers/trainer_controller.py
index 8700da3cbe..440c83b101 100644
--- a/ml-agents/mlagents/trainers/trainer_controller.py
+++ b/ml-agents/mlagents/trainers/trainer_controller.py
@@ -211,10 +211,15 @@ def reset_env_if_ready(self, env: EnvManager) -> None:
         reward_buff = {k: list(t.reward_buffer) for (k, t) in self.trainers.items()}
         curr_step = {k: int(t.get_step) for (k, t) in self.trainers.items()}
         max_step = {k: int(t.get_max_steps) for (k, t) in self.trainers.items()}
+        try:
+            curr_elo = {k: float(t.current_elo) for (k, t) in self.trainers.items()}
+        except AttributeError:
+            curr_elo = None
+
         # Attempt to increment the lessons of the brains who
         # were ready.
         updated, param_must_reset = self.param_manager.update_lessons(
-            curr_step, max_step, reward_buff
+            curr_step, max_step, reward_buff, curr_elo
         )
         if updated:
             for trainer in self.trainers.values():

From 0bb2e064b325dea619615c7f774c2a5d1903ed27 Mon Sep 17 00:00:00 2001
From: Jason Rupert <92821409+jrupert-unity@users.noreply.github.com>
Date: Thu, 2 Dec 2021 10:54:31 -0800
Subject: [PATCH 02/10] Simple example of using Elo score as curriculum step
 completion criteria

---
 config/poca/SoccerTwosCurriculum.yaml | 46 +++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 config/poca/SoccerTwosCurriculum.yaml

diff --git a/config/poca/SoccerTwosCurriculum.yaml b/config/poca/SoccerTwosCurriculum.yaml
new file mode 100644
index 0000000000..0562a28a2c
--- /dev/null
+++ b/config/poca/SoccerTwosCurriculum.yaml
@@ -0,0 +1,46 @@
+behaviors:
+  SoccerTwos:
+    trainer_type: poca
+    hyperparameters:
+      batch_size: 2048
+      buffer_size: 20480
+      learning_rate: 0.0003
+      beta: 0.005
+      epsilon: 0.2
+      lambd: 0.95
+      num_epoch: 3
+      learning_rate_schedule: constant
+    network_settings:
+      normalize: false
+      hidden_units: 512
+      num_layers: 2
+      vis_encode_type: simple
+    reward_signals:
+      extrinsic:
+        gamma: 0.99
+        strength: 1.0
+    keep_checkpoints: 5
+    max_steps: 50000000
+    time_horizon: 1000
+    summary_freq: 10000
+    self_play:
+      save_steps: 50000
+      team_change: 200000
+      swap_steps: 2000
+      window: 10
+      play_against_latest_model_ratio: 0.5
+      initial_elo: 1200.0
+environment_parameters:
+  ball_touch:
+    curriculum:
+      - name: Lesson0 # The '-' is important as this is a list
+        completion_criteria:
+          measure: Elo
+          behavior: SoccerTwos
+          signal_smoothing: false
+          min_lesson_length: 100
+          threshold: 1250.0
+        value: 1.0    
+      - name: Lesson1 # The '-' is important as this is a list
+        value: 0.0    
+        
\ No newline at end of file

From dcb7db81395a0d56f926d1f8f0350a16c34dca47 Mon Sep 17 00:00:00 2001
From: jrupert <jason.rupert@unity3d.com>
Date: Fri, 17 Dec 2021 10:48:05 -0800
Subject: [PATCH 03/10] WIP

---
 .../mlagents/trainers/tests/test_env_param_manager.py      | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/ml-agents/mlagents/trainers/tests/test_env_param_manager.py b/ml-agents/mlagents/trainers/tests/test_env_param_manager.py
index e5ac2e4f00..72ed2307ca 100644
--- a/ml-agents/mlagents/trainers/tests/test_env_param_manager.py
+++ b/ml-agents/mlagents/trainers/tests/test_env_param_manager.py
@@ -209,16 +209,19 @@ def test_curriculum_raises_all_completion_criteria_conversion():
         trainer_steps={"fake_behavior": 500},
         trainer_max_steps={"fake_behavior": 1000},
         trainer_reward_buffer={"fake_behavior": [1000] * 101},
+        trainer_elo_score={"fake_behavior": 1220.0},   #TODO: trainer_elo_scores aren't set properly for tests
     ) == (True, True)
     assert param_manager.update_lessons(
         trainer_steps={"fake_behavior": 500},
         trainer_max_steps={"fake_behavior": 1000},
         trainer_reward_buffer={"fake_behavior": [1000] * 101},
+        trainer_elo_score={"fake_behavior": 1220.0},
     ) == (True, True)
     assert param_manager.update_lessons(
         trainer_steps={"fake_behavior": 500},
         trainer_max_steps={"fake_behavior": 1000},
         trainer_reward_buffer={"fake_behavior": [1000] * 101},
+        trainer_elo_score={"fake_behavior": 1220.0},
     ) == (False, False)
     assert param_manager.get_current_lesson_number() == {"param_1": 2}
 
@@ -279,17 +282,20 @@ def test_create_manager():
         trainer_steps={"fake_behavior": 500},
         trainer_max_steps={"fake_behavior": 1000},
         trainer_reward_buffer={"fake_behavior": [1000] * 99},
+        trainer_elo_score={"fake_behavior": 1220.0},
     ) == (False, False)
     # Not enough episodes reward
     assert param_manager.update_lessons(
         trainer_steps={"fake_behavior": 500},
         trainer_max_steps={"fake_behavior": 1000},
         trainer_reward_buffer={"fake_behavior": [1] * 101},
+        trainer_elo_score={"fake_behavior": 1220.0},
     ) == (False, False)
     assert param_manager.update_lessons(
         trainer_steps={"fake_behavior": 500},
         trainer_max_steps={"fake_behavior": 1000},
         trainer_reward_buffer={"fake_behavior": [1000] * 101},
+        trainer_elo_score={"fake_behavior": 1220.0},
     ) == (True, True)
     assert param_manager.get_current_lesson_number() == {
         "param_1": 1,
@@ -310,6 +316,7 @@ def test_create_manager():
         trainer_steps={"fake_behavior": 700},
         trainer_max_steps={"fake_behavior": 1000},
         trainer_reward_buffer={"fake_behavior": [0] * 101},
+        trainer_elo_score={"fake_behavior": 1220.0},
     ) == (True, False)
     assert param_manager.get_current_samplers() == {
         "param_1": UniformSettings(seed=1337 + 2, min_value=1, max_value=3),

From 8af0540bc5868c9b58fde5b7b4014859ff3d50d0 Mon Sep 17 00:00:00 2001
From: jrupert <jason.rupert@unity3d.com>
Date: Sun, 19 Dec 2021 16:41:53 -0800
Subject: [PATCH 04/10] Update tests to work with new parameter in curriculum
 update for Elo, and to test Elo curriculum functionality.

---
 .../trainers/environment_parameter_manager.py |  2 +
 .../trainers/tests/test_env_param_manager.py  | 46 ++++++++++++++-----
 2 files changed, 36 insertions(+), 12 deletions(-)

diff --git a/ml-agents/mlagents/trainers/environment_parameter_manager.py b/ml-agents/mlagents/trainers/environment_parameter_manager.py
index 3663e4afe7..0c5dd3746a 100644
--- a/ml-agents/mlagents/trainers/environment_parameter_manager.py
+++ b/ml-agents/mlagents/trainers/environment_parameter_manager.py
@@ -149,6 +149,8 @@ def update_lessons(
         of training steps this behavior's trainer has performed.
         :param trainer_reward_buffer: A dictionary from behavior_name to the list of
         the most recent episode returns for this behavior's trainer.
+        :trainer_elo_score: A Dictionary from behavior_name to the minimum Elo score
+        to be reached.
         :returns: A tuple of two booleans : (True if any lesson has changed, True if
         environment needs to reset)
         """
diff --git a/ml-agents/mlagents/trainers/tests/test_env_param_manager.py b/ml-agents/mlagents/trainers/tests/test_env_param_manager.py
index 72ed2307ca..55a3648522 100644
--- a/ml-agents/mlagents/trainers/tests/test_env_param_manager.py
+++ b/ml-agents/mlagents/trainers/tests/test_env_param_manager.py
@@ -131,7 +131,7 @@ def test_curriculum_conversion():
     assert lesson.value.max_value == 3
 
 
-test_bad_curriculum_no_competion_criteria_config_yaml = """
+test_bad_curriculum_no_completion_criteria_config_yaml = """
 environment_parameters:
     param_1:
       curriculum:
@@ -154,7 +154,7 @@ def test_curriculum_conversion():
 """
 
 
-test_bad_curriculum_all_competion_criteria_config_yaml = """
+test_bad_curriculum_all_completion_criteria_config_yaml = """
 environment_parameters:
     param_1:
       curriculum:
@@ -174,6 +174,14 @@ def test_curriculum_conversion():
                 min_lesson_length: 100
                 require_reset: true
             value: 2
+          - name: Lesson3
+            completion_criteria:
+                measure: Elo
+                behavior: fake_behavior
+                threshold: 1300
+                min_lesson_length: 100
+                require_reset: true
+            value: 3
           - name: Lesson3
             completion_criteria:
                 measure: reward
@@ -192,14 +200,14 @@ def test_curriculum_conversion():
 def test_curriculum_raises_no_completion_criteria_conversion():
     with pytest.raises(TrainerConfigError):
         RunOptions.from_dict(
-            yaml.safe_load(test_bad_curriculum_no_competion_criteria_config_yaml)
+            yaml.safe_load(test_bad_curriculum_no_completion_criteria_config_yaml)
         )
 
 
 def test_curriculum_raises_all_completion_criteria_conversion():
     with pytest.warns(TrainerConfigWarning):
         run_options = RunOptions.from_dict(
-            yaml.safe_load(test_bad_curriculum_all_competion_criteria_config_yaml)
+            yaml.safe_load(test_bad_curriculum_all_completion_criteria_config_yaml)
         )
 
     param_manager = EnvironmentParameterManager(
@@ -209,22 +217,36 @@ def test_curriculum_raises_all_completion_criteria_conversion():
         trainer_steps={"fake_behavior": 500},
         trainer_max_steps={"fake_behavior": 1000},
         trainer_reward_buffer={"fake_behavior": [1000] * 101},
-        trainer_elo_score={"fake_behavior": 1220.0},   #TODO: trainer_elo_scores aren't set properly for tests
+        trainer_elo_score={"fake_behavior": 1200.0},   #TODO: trainer_elo_scores aren't set properly for tests
     ) == (True, True)
     assert param_manager.update_lessons(
         trainer_steps={"fake_behavior": 500},
         trainer_max_steps={"fake_behavior": 1000},
         trainer_reward_buffer={"fake_behavior": [1000] * 101},
-        trainer_elo_score={"fake_behavior": 1220.0},
+        trainer_elo_score={"fake_behavior": 1200.0},
     ) == (True, True)
+    assert param_manager.get_current_lesson_number() == {"param_1": 2}
     assert param_manager.update_lessons(
         trainer_steps={"fake_behavior": 500},
         trainer_max_steps={"fake_behavior": 1000},
         trainer_reward_buffer={"fake_behavior": [1000] * 101},
-        trainer_elo_score={"fake_behavior": 1220.0},
+        trainer_elo_score={"fake_behavior": 1200.0},
     ) == (False, False)
     assert param_manager.get_current_lesson_number() == {"param_1": 2}
-
+    assert param_manager.update_lessons(
+        trainer_steps={"fake_behavior": 500},
+        trainer_max_steps={"fake_behavior": 1000},
+        trainer_reward_buffer={"fake_behavior": [1000] * 101},
+        trainer_elo_score={"fake_behavior": 1500.0},
+    ) == (True, True)
+    assert param_manager.get_current_lesson_number() == {"param_1": 3}
+    assert param_manager.update_lessons(
+        trainer_steps={"fake_behavior": 500},
+        trainer_max_steps={"fake_behavior": 1000},
+        trainer_reward_buffer={"fake_behavior": [1000] * 101},
+        trainer_elo_score={"fake_behavior": 1200.0},
+    ) == (False, False)
+    assert param_manager.get_current_lesson_number() == {"param_1": 3}
 
 test_everything_config_yaml = """
 environment_parameters:
@@ -282,20 +304,20 @@ def test_create_manager():
         trainer_steps={"fake_behavior": 500},
         trainer_max_steps={"fake_behavior": 1000},
         trainer_reward_buffer={"fake_behavior": [1000] * 99},
-        trainer_elo_score={"fake_behavior": 1220.0},
+        trainer_elo_score={"fake_behavior": 1200.0},
     ) == (False, False)
     # Not enough episodes reward
     assert param_manager.update_lessons(
         trainer_steps={"fake_behavior": 500},
         trainer_max_steps={"fake_behavior": 1000},
         trainer_reward_buffer={"fake_behavior": [1] * 101},
-        trainer_elo_score={"fake_behavior": 1220.0},
+        trainer_elo_score={"fake_behavior": 1200.0},
     ) == (False, False)
     assert param_manager.update_lessons(
         trainer_steps={"fake_behavior": 500},
         trainer_max_steps={"fake_behavior": 1000},
         trainer_reward_buffer={"fake_behavior": [1000] * 101},
-        trainer_elo_score={"fake_behavior": 1220.0},
+        trainer_elo_score={"fake_behavior": 1200.0},
     ) == (True, True)
     assert param_manager.get_current_lesson_number() == {
         "param_1": 1,
@@ -316,7 +338,7 @@ def test_create_manager():
         trainer_steps={"fake_behavior": 700},
         trainer_max_steps={"fake_behavior": 1000},
         trainer_reward_buffer={"fake_behavior": [0] * 101},
-        trainer_elo_score={"fake_behavior": 1220.0},
+        trainer_elo_score={"fake_behavior": 1200.0},
     ) == (True, False)
     assert param_manager.get_current_samplers() == {
         "param_1": UniformSettings(seed=1337 + 2, min_value=1, max_value=3),

From 0ef3141a13408b8a6e788003574e197c280a7dd6 Mon Sep 17 00:00:00 2001
From: jrupert <jason.rupert@unity3d.com>
Date: Sun, 19 Dec 2021 17:05:08 -0800
Subject: [PATCH 05/10] Remove comment about possible bug (no bug).  Fix test
 for not advancing to non-existent next state.  As it was it would have failed
 because of the curriculum test and not the reason we want to check.

---
 ml-agents/mlagents/trainers/settings.py                     | 1 -
 ml-agents/mlagents/trainers/tests/test_env_param_manager.py | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py
index 20a115ebea..ac2b41b437 100644
--- a/ml-agents/mlagents/trainers/settings.py
+++ b/ml-agents/mlagents/trainers/settings.py
@@ -525,7 +525,6 @@ def need_increment(
         # Is the min number of episodes reached
         if len(reward_buffer) < self.min_lesson_length:
             return False, smoothing
-        #BUG?  Shouldn't we check if the max number of steps has passed if it isn't a PROGRESS measure?
         if self.measure == CompletionCriteriaSettings.MeasureType.PROGRESS:
             if progress > self.threshold:
                 return True, smoothing
diff --git a/ml-agents/mlagents/trainers/tests/test_env_param_manager.py b/ml-agents/mlagents/trainers/tests/test_env_param_manager.py
index 55a3648522..c1f8ca881b 100644
--- a/ml-agents/mlagents/trainers/tests/test_env_param_manager.py
+++ b/ml-agents/mlagents/trainers/tests/test_env_param_manager.py
@@ -244,8 +244,8 @@ def test_curriculum_raises_all_completion_criteria_conversion():
         trainer_steps={"fake_behavior": 500},
         trainer_max_steps={"fake_behavior": 1000},
         trainer_reward_buffer={"fake_behavior": [1000] * 101},
-        trainer_elo_score={"fake_behavior": 1200.0},
-    ) == (False, False)
+        trainer_elo_score={"fake_behavior": 1500.0},
+    ) == (False, False)   # No step to advance to
     assert param_manager.get_current_lesson_number() == {"param_1": 3}
 
 test_everything_config_yaml = """

From 1aa57b0d6ce58c6623ed975cae231fe2a29ef1f0 Mon Sep 17 00:00:00 2001
From: jrupert <jason.rupert@unity3d.com>
Date: Mon, 20 Dec 2021 15:45:02 -0800
Subject: [PATCH 06/10] Update changelog and fix curriculum test name.

---
 com.unity.ml-agents/CHANGELOG.md                            | 1 +
 ml-agents/mlagents/trainers/tests/test_env_param_manager.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
index b91f9ae2bb..da35994f6a 100755
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
@@ -29,6 +29,7 @@ and this project adheres to
     1. env_params.max_lifetime_restarts (--max-lifetime-restarts) [default=10]
     2. env_params.restarts_rate_limit_n (--restarts-rate-limit-n) [default=1]
     3. env_params.restarts_rate_limit_period_s (--restarts-rate-limit-period-s) [default=60]
+-Added support for Elo as a curriculum learning completion criteria.
 
 ### Bug Fixes
 - Fixed a bug where the critics were not being normalized during training. (#5595)
diff --git a/ml-agents/mlagents/trainers/tests/test_env_param_manager.py b/ml-agents/mlagents/trainers/tests/test_env_param_manager.py
index c1f8ca881b..7a881c9ad8 100644
--- a/ml-agents/mlagents/trainers/tests/test_env_param_manager.py
+++ b/ml-agents/mlagents/trainers/tests/test_env_param_manager.py
@@ -182,7 +182,7 @@ def test_curriculum_conversion():
                 min_lesson_length: 100
                 require_reset: true
             value: 3
-          - name: Lesson3
+          - name: Lesson4
             completion_criteria:
                 measure: reward
                 behavior: fake_behavior

From c0ca235ae7b729883e5de033198a29af8712ea2d Mon Sep 17 00:00:00 2001
From: jrupert <jason.rupert@unity3d.com>
Date: Mon, 20 Dec 2021 15:58:59 -0800
Subject: [PATCH 07/10] Update changelog.

---
 com.unity.ml-agents/CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
index da35994f6a..2185cdcc79 100755
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
@@ -29,7 +29,7 @@ and this project adheres to
     1. env_params.max_lifetime_restarts (--max-lifetime-restarts) [default=10]
     2. env_params.restarts_rate_limit_n (--restarts-rate-limit-n) [default=1]
     3. env_params.restarts_rate_limit_period_s (--restarts-rate-limit-period-s) [default=60]
--Added support for Elo as a curriculum learning completion criteria.
+-Added support for Elo as a curriculum learning completion criteria. (#5646)
 
 ### Bug Fixes
 - Fixed a bug where the critics were not being normalized during training. (#5595)

From 6d2ec7a700d84bc975f3f94561af26b50075a627 Mon Sep 17 00:00:00 2001
From: jrupert <jason.rupert@unity3d.com>
Date: Mon, 20 Dec 2021 15:58:59 -0800
Subject: [PATCH 08/10] Update changelog.

Remove changelog note for now.
---
 com.unity.ml-agents/CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
index da35994f6a..2185cdcc79 100755
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
@@ -29,7 +29,7 @@ and this project adheres to
     1. env_params.max_lifetime_restarts (--max-lifetime-restarts) [default=10]
     2. env_params.restarts_rate_limit_n (--restarts-rate-limit-n) [default=1]
     3. env_params.restarts_rate_limit_period_s (--restarts-rate-limit-period-s) [default=60]
--Added support for Elo as a curriculum learning completion criteria.
+-Added support for Elo as a curriculum learning completion criteria. (#5646)
 
 ### Bug Fixes
 - Fixed a bug where the critics were not being normalized during training. (#5595)

From 15136c57ff51098b6bb2e366288c494e48bd35db Mon Sep 17 00:00:00 2001
From: Jason Rupert <92821409+jrupert-unity@users.noreply.github.com>
Date: Tue, 18 Jan 2022 15:18:43 -0800
Subject: [PATCH 09/10] Removed previous changelog note (for now).

---
 com.unity.ml-agents/CHANGELOG.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
index 2185cdcc79..b91f9ae2bb 100755
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
@@ -29,7 +29,6 @@ and this project adheres to
     1. env_params.max_lifetime_restarts (--max-lifetime-restarts) [default=10]
     2. env_params.restarts_rate_limit_n (--restarts-rate-limit-n) [default=1]
     3. env_params.restarts_rate_limit_period_s (--restarts-rate-limit-period-s) [default=60]
--Added support for Elo as a curriculum learning completion criteria. (#5646)
 
 ### Bug Fixes
 - Fixed a bug where the critics were not being normalized during training. (#5595)

From 192ea10e398bd5f04314535cc1cd3e41affd3b18 Mon Sep 17 00:00:00 2001
From: Jason Rupert <92821409+jrupert-unity@users.noreply.github.com>
Date: Tue, 18 Jan 2022 15:42:58 -0800
Subject: [PATCH 10/10] Update changelog.

---
 com.unity.ml-agents/CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
index 951dcb5691..4655f4d8fa 100755
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
@@ -13,6 +13,8 @@ and this project adheres to
 ### Minor Changes
 #### com.unity.ml-agents / com.unity.ml-agents.extensions (C#)
 #### ml-agents / ml-agents-envs / gym-unity (Python)
+-Added support for Elo as a curriculum learning completion criteria. (#5646)
+
 ### Bug Fixes
 #### com.unity.ml-agents / com.unity.ml-agents.extensions (C#)
 #### ml-agents / ml-agents-envs / gym-unity (Python)