docs: fix name, requirements, installation, doc formatting

PKU-Alignment · Jul 25, 2024 · b1e576f · b1e576f
2 parents 691b2f3 + 6b2ad4b
commit b1e576f
Show file tree

Hide file tree

Showing 128 changed files with 3,852 additions and 6,519 deletions.
diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2024 ProgressAlign Research Team
+Copyright (c) 2024 ProgressGym Research Team
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/benchmark/README.md b/benchmark/README.md
@@ -1 +1 @@
-# ProgressAlign/`benchmark`
+# ProgressGym/`benchmark`
diff --git a/benchmark/framework.py b/benchmark/framework.py
@@ -8,9 +8,11 @@
 
 
 class JudgeBase(ABC):
-    """JudgeBase is the base class for all judges.
+    """
+    JudgeBase is the base class for all judges.
     A judge is the benchmarking algorithm that evaluates the performance of an examinee.
-    Each judge class corresponds to a challenge."""
+    Each judge class corresponds to a challenge.
+    """
 
     """Evaluation results"""
     examinee_model_history: List[Model]
@@ -38,11 +40,12 @@ def __init__(self, **kwargs):
 
     @abstractmethod
     def reset(self, **kwargs) -> None:
-        """Reset the internal state of the judge to start a new evaluation. This method is called before each test.
-
+        """
+        Reset the internal state of the judge to start a new evaluation. This method is called before each test.
         The base class implementation resets the internal state of the judge to the initial state.
         Normally, you should optionally call the base class implementation in your subclass's implementation, and then add
-        any additional reset logic that you need."""
+        any additional reset logic that you need.
+        """
 
         """Find the list of all models, sorted by timestep."""
         self.template_type = (
@@ -106,22 +109,24 @@ def reset(self, **kwargs) -> None:
 
     @abstractmethod
     def eval_snapshot(self, examinee: "ExamineeBase") -> None:
-        """Evaluate the examinee's performance at the current snapshot. This method is called by the judge at every iteration.
-
+        """
+        Evaluate the examinee's performance at the current snapshot. This method is called by the judge at every iteration.
         The base class implementation only does logging. It is recommended to does your own eval and then call the base class
-        implementation to perform logging."""
+        implementation to perform logging.
+        """
 
         self.eval_times += 1
         self.examinee_model_history.append(examinee.get_current_model())
         self.judge_model_history.append(self.current_model)
 
     @abstractmethod
     def tick(self) -> None:
-        """Move the internal state of the judge to the next timestep. This method is called by the judge at every iteration.
-
+        """
+        Move the internal state of the judge to the next timestep. This method is called by the judge at every iteration.
         The base class implementation moves the judge to the next timestep by incrementing `current_timestep` by 1 (or more if necessary).
         You should optionally call the base class implementation in your subclass's implementation, and then add any additional
-        logic that you need."""
+        logic that you need.
+        """
 
         self.current_timestep += 1
         if self.current_timestep >= len(self.model_list):
@@ -135,12 +140,13 @@ def tick(self) -> None:
     def query_from_examinee(
         self, prompt: Union[str, Data, List[Dict]], model: Model = None
     ) -> Union[str, Data, List[Dict]]:
-        """This method is called by the examinee to query the judge, which the judge will answer according to human preferences at the current timestep.
+        """
+        This method is called by the examinee to query the judge, which the judge will answer according to human preferences at the current timestep.
         The examinee will use this information to learn about the latest human preference, and update its language model accordingly.
-
         The base class implementation answers the prompt by directly querying `self.current_model``
         You could either call the base class implementation in your subclass's implementation (possibly supplying a different `model`),
-        or override it if necessary."""
+        or override it if necessary.
+        """
 
         model = model or self.current_model
 
@@ -209,9 +215,9 @@ def query_from_examinee(
 
     @abstractmethod
     def produce_final_result(self) -> Dict[str, Any]:
-        """Return the final result of the evaluation. This method is called at the end of `test()` to get the final evaluation metrics.
+        """
+        Return the final result of the evaluation. This method is called at the end of `test()` to get the final evaluation metrics.
         A reference score may be calculated here, but it will not be used by the leaderboard, in order to prevent manual score manipulation.
-
         The base class implementation only performs logging. You should override this method in your subclass to fill in the evaluation metrics, while preserving logging-purposed dict fields returned by the base class implementation.
         """
 
@@ -234,11 +240,11 @@ def interpret_result(cls, result: Dict[str, Any]) -> float:
         raise NotImplementedError
 
     def test(self, examinee: "ExamineeBase", **kwargs) -> Dict[str, Any]:
-        """Run the examinee and evaluate its performance. This method is called by the user to evaluate the examinee.
+        """
+        Run the examinee and evaluate its performance. This method is called by the user to evaluate the examinee.
         The method returns a dictionary of evaluation metrics. The keys of the dictionary are the names of the metrics, and the values are the corresponding values of the metrics.
         The method operates by moving the examinee and the judge through a series of timesteps, where the judge evaluates the examinee at every timestep.
         Every iteration of examinee_iter corresponds to the passing of a timestep.
-
         Normally, you should not override this method in your subclass. Instead, you should implement the `reset`, `eval_snapshot`, `tick`, `query_from_examinee`, and `produce_final_result` methods in your subclass.
         """
 
@@ -270,7 +276,8 @@ def test_loop() -> Dict[str, Any]:
 
 
 class ExamineeBase(ABC):
-    """ExamineeBase is the base class for all examinees.
+    """
+    ExamineeBase is the base class for all examinees.
     An examinee is the an alignment algorithm (in combination with a language model operated upon by the algorithm) that is benchmarked by a judge.
     You are free to implement the benchmarked examinee in any way you like, as long as it follows the ExamineeBase interface.
     In most cases, you need to re-implement most or all all the methods in your subclass. Base implementations are only provided as an example.
@@ -293,11 +300,12 @@ def __init__(self, **kwargs):
 
     @abstractmethod
     def reset(self, **kwargs) -> None:
-        """Initialize the examinee, including endowing it with a language model.
-
+        """
+        Initialize the examinee, including endowing it with a language model.
         When `examinee_model_size` is not specified, the model will be initialized as a copy of the Judge's initial model. In that case, the examinee will be able to start from the same initial state as the judge.
         Normally, you should implement this method in your subclass to initialize the examinee as needed, after calling the base class implementation for basic setup.
         """
+
         if "model_name" not in kwargs:
             self.model_size = (
                 int(kwargs["examinee_model_size"].lower().replace("b", "").strip())
@@ -344,8 +352,8 @@ def reset(self, **kwargs) -> None:
     def query_from_judge(
         self, prompt: Union[str, Data, List[Dict]], model: Model = None
     ) -> Union[str, Data, List[Dict]]:
-        """This method is called by the judge to query the examinee for a response to a prompt.
-
+        """
+        This method is called by the judge to query the examinee for a response to a prompt.
         In most cases, you only need to call the base class implementation in your subclass's implementation.
         """
 
@@ -407,8 +415,8 @@ def query_from_judge(
 
     @abstractmethod
     def get_current_model(self) -> Model:
-        """Return the current model that the examinee is using at this timestep.
-
+        """
+        Return the current model that the examinee is using at this timestep.
         The base class implementation returns the `current_model` attribute.
         You should not need to override this method in your subclass unless the model is not stored in the `current_model` attribute.
         """
@@ -417,7 +425,8 @@ def get_current_model(self) -> Model:
 
     @abstractmethod
     def run(self, judge: JudgeBase) -> Iterable:
-        """This method is called by the judge to start the examinee.
+        """
+        This method is called by the judge to start the examinee.
         It will return an iterable that the judge will iterate over to run the examinee.
         Every iteration corresponds to the passing of a timestep.
         In this way, the examinee can control the pause and resume of the examinee.

diff --git a/dataset/README.md b/dataset/README.md
@@ -1 +1 @@
-# ProgressAlign/`dataset`
+# ProgressGym/`dataset`
diff --git a/doc_generation/build/.doctrees/Data.doctree b/doc_generation/build/.doctrees/Data.doctree
diff --git a/doc_generation/build/.doctrees/Examinee.doctree b/doc_generation/build/.doctrees/Examinee.doctree
diff --git a/doc_generation/build/.doctrees/Judge.doctree b/doc_generation/build/.doctrees/Judge.doctree
diff --git a/doc_generation/build/.doctrees/Model.doctree b/doc_generation/build/.doctrees/Model.doctree
diff --git a/doc_generation/build/.doctrees/documentation.doctree b/doc_generation/build/.doctrees/documentation.doctree
diff --git a/doc_generation/build/.doctrees/index.doctree b/doc_generation/build/.doctrees/index.doctree
diff --git a/doc_generation/build/.doctrees/running.doctree b/doc_generation/build/.doctrees/running.doctree
diff --git a/doc_generation/build/.doctrees/usage.doctree b/doc_generation/build/.doctrees/usage.doctree
diff --git a/...neration/build/html/_static/alabaster.css → doc_generation/build/_static/alabaster.css b/...neration/build/html/_static/alabaster.css → doc_generation/build/_static/alabaster.css
diff --git a/doc_generation/build/html/_static/basic.css → doc_generation/build/_static/basic.css b/doc_generation/build/html/_static/basic.css → doc_generation/build/_static/basic.css
diff --git a/doc_generation/build/html/_static/custom.css → doc_generation/build/_static/custom.css b/doc_generation/build/html/_static/custom.css → doc_generation/build/_static/custom.css
diff --git a/doc_generation/build/html/_static/debug.css → doc_generation/build/_static/debug.css b/doc_generation/build/html/_static/debug.css → doc_generation/build/_static/debug.css
diff --git a/...generation/build/html/_static/doctools.js → doc_generation/build/_static/doctools.js b/...generation/build/html/_static/doctools.js → doc_generation/build/_static/doctools.js
diff --git a/...ild/html/_static/documentation_options.js → ...on/build/_static/documentation_options.js b/...ild/html/_static/documentation_options.js → ...on/build/_static/documentation_options.js
diff --git a/doc_generation/build/html/_static/file.png → doc_generation/build/_static/file.png b/doc_generation/build/html/_static/file.png → doc_generation/build/_static/file.png
diff --git a/...ation/build/html/_static/language_data.js → ...generation/build/_static/language_data.js b/...ation/build/html/_static/language_data.js → ...generation/build/_static/language_data.js
diff --git a/doc_generation/build/html/_static/minus.png → doc_generation/build/_static/minus.png b/doc_generation/build/html/_static/minus.png → doc_generation/build/_static/minus.png
diff --git a/doc_generation/build/html/_static/plus.png → doc_generation/build/_static/plus.png b/doc_generation/build/html/_static/plus.png → doc_generation/build/_static/plus.png
diff --git a/...eneration/build/html/_static/pygments.css → doc_generation/build/_static/pygments.css b/...eneration/build/html/_static/pygments.css → doc_generation/build/_static/pygments.css
diff --git a/...d/html/_static/scripts/furo-extensions.js → .../build/_static/scripts/furo-extensions.js b/...d/html/_static/scripts/furo-extensions.js → .../build/_static/scripts/furo-extensions.js
diff --git a/...ration/build/html/_static/scripts/furo.js → doc_generation/build/_static/scripts/furo.js b/...ration/build/html/_static/scripts/furo.js → doc_generation/build/_static/scripts/furo.js
diff --git a/.../html/_static/scripts/furo.js.LICENSE.txt → ...build/_static/scripts/furo.js.LICENSE.txt b/.../html/_static/scripts/furo.js.LICENSE.txt → ...build/_static/scripts/furo.js.LICENSE.txt
diff --git a/...on/build/html/_static/scripts/furo.js.map → ...eration/build/_static/scripts/furo.js.map b/...on/build/html/_static/scripts/furo.js.map → ...eration/build/_static/scripts/furo.js.map
diff --git a/...eration/build/html/_static/searchtools.js → doc_generation/build/_static/searchtools.js b/...eration/build/html/_static/searchtools.js → doc_generation/build/_static/searchtools.js
diff --git a/...eneration/build/html/_static/skeleton.css → doc_generation/build/_static/skeleton.css b/...eneration/build/html/_static/skeleton.css → doc_generation/build/_static/skeleton.css
diff --git a/...on/build/html/_static/sphinx_highlight.js → ...eration/build/_static/sphinx_highlight.js b/...on/build/html/_static/sphinx_highlight.js → ...eration/build/_static/sphinx_highlight.js
diff --git a/...d/html/_static/styles/furo-extensions.css → .../build/_static/styles/furo-extensions.css b/...d/html/_static/styles/furo-extensions.css → .../build/_static/styles/furo-extensions.css
diff --git a/...ml/_static/styles/furo-extensions.css.map → ...ld/_static/styles/furo-extensions.css.map b/...ml/_static/styles/furo-extensions.css.map → ...ld/_static/styles/furo-extensions.css.map
diff --git a/...ration/build/html/_static/styles/furo.css → doc_generation/build/_static/styles/furo.css b/...ration/build/html/_static/styles/furo.css → doc_generation/build/_static/styles/furo.css
diff --git a/...on/build/html/_static/styles/furo.css.map → ...eration/build/_static/styles/furo.css.map b/...on/build/html/_static/styles/furo.css.map → ...eration/build/_static/styles/furo.css.map
diff --git a/doc_generation/build/doctrees/Data.doctree b/doc_generation/build/doctrees/Data.doctree
diff --git a/doc_generation/build/doctrees/Examinee.doctree b/doc_generation/build/doctrees/Examinee.doctree
diff --git a/doc_generation/build/doctrees/Judge.doctree b/doc_generation/build/doctrees/Judge.doctree
diff --git a/doc_generation/build/doctrees/Model.doctree b/doc_generation/build/doctrees/Model.doctree
diff --git a/doc_generation/build/doctrees/documentation.doctree b/doc_generation/build/doctrees/documentation.doctree
diff --git a/doc_generation/build/doctrees/index.doctree b/doc_generation/build/doctrees/index.doctree
diff --git a/doc_generation/build/doctrees/running.doctree b/doc_generation/build/doctrees/running.doctree
diff --git a/doc_generation/build/doctrees/usage.doctree b/doc_generation/build/doctrees/usage.doctree
diff --git a/doc_generation/build/html/.buildinfo b/doc_generation/build/html/.buildinfo
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		# ProgressAlign/`benchmark`
		# ProgressGym/`benchmark`