From c40fd44f67d9e987d977e7985ef7575dcff153f6 Mon Sep 17 00:00:00 2001
From: Ikechukwu Uchendu <iuchendu@g.harvard.edu>
Date: Thu, 29 Aug 2024 16:36:56 -0400
Subject: [PATCH] Updated training and inference tutorials along with small
 fixes

---
 a2perf/domains/tfa/suite_gym.py               |  43 +++----
 .../dog_pace/inference.gin                    |   6 +-
 .../dog_spin/inference.gin                    |   1 +
 .../dog_trot/inference.gin                    |   1 +
 .../configs/quadruped_locomotion/train.gin    |   2 +-
 a2perf/submission/main_submission.py          |   8 --
 a2perf/submission/submission_util.py          |  32 +++--
 docs/content/tutorials/inference.md           | 121 +++++++++++++++++-
 docs/content/tutorials/training.md            |  52 ++++----
 xm_launch.py                                  |   3 +-
 10 files changed, 192 insertions(+), 77 deletions(-)

diff --git a/a2perf/domains/tfa/suite_gym.py b/a2perf/domains/tfa/suite_gym.py
index afa21d2..cfd2e02 100644
--- a/a2perf/domains/tfa/suite_gym.py
+++ b/a2perf/domains/tfa/suite_gym.py
@@ -22,31 +22,24 @@
 for the final step of an episode. To prevent that we extract the step limit
 from the environment specs and utilize our TimeLimit wrapper.
 """
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
 
 import json
 import os
-from typing import Any
-from typing import Callable
-from typing import Dict
-from typing import Optional
-from typing import Sequence
-from typing import Text
+from typing import Any, Callable, Dict, Optional, Sequence, Text
 
 import gin
 import gymnasium as gym
 import numpy as np
 from absl import logging
-from tf_agents.environments import py_environment
-from tf_agents.environments import wrappers
+from tf_agents.environments import py_environment, wrappers
 from tf_agents.typing import types
 
 from a2perf.domains import circuit_training  # noqa: F401
 from a2perf.domains import quadruped_locomotion  # noqa: F401
 from a2perf.domains import web_navigation  # noqa: F401
 from a2perf.domains.tfa import gym_wrapper
+from a2perf.domains.web_navigation.gwob.CoDE import vocabulary_node
 
 TimeLimitWrapperType = Callable[
     [py_environment.PyEnvironment, int], py_environment.PyEnvironment
@@ -184,8 +177,6 @@ def create_domain(
 ):
     if env_name in WEB_NAVIGATION_ENVS:
         # noinspection PyUnresolvedReferences
-        from a2perf.domains import web_navigation  # noqa: F401
-        from a2perf.domains.web_navigation.gwob.CoDE import vocabulary_node
 
         save_vocab_dir = os.path.join(root_dir, "vocabulary")
         reload_vocab = env_kwargs.pop("reload_vocab", True)
@@ -193,7 +184,7 @@ def create_domain(
         if vocab_type == "threaded":
             global_vocab = vocabulary_node.LockedThreadedVocabulary()
         elif vocab_type == "unlocked":
-            global_vocab = vocabulary_node.UnlockedVocabulary()
+            vocabulary_node.UnlockedVocabulary()
         elif vocab_type == "multiprocessing":
             global_vocab = vocabulary_node.LockedMultiprocessingVocabulary()
         else:
@@ -209,14 +200,14 @@ def create_domain(
                     global_vocab.restore(state=global_vocab_dict)
         seed = int(os.environ.get("SEED", None))
         num_websites = int(os.environ.get("NUM_WEBSITES", None))
-        difficulty = int(os.environ.get("DIFFICULTY_LEVEL", None))
+        # difficulty = int(os.environ.get("DIFFICULTY_LEVEL", None))
 
         env_kwargs.update(
             {
                 "global_vocabulary": global_vocab,
                 "seed": seed,
                 "num_websites": num_websites,
-                "difficulty": difficulty,
+                # "difficulty": difficulty,
                 "browser_args": dict(
                     threading=False,
                     chrome_options={
@@ -230,30 +221,26 @@ def create_domain(
         )
         env_wrappers = [wrappers.ActionClipWrapper] + list(env_wrappers)
     elif env_name in CIRCUIT_TRAINING_ENVS:
-        # noinspection PyUnresolvedReferences
-        from a2perf.domains import circuit_training  # noqa: F401
 
         env_kwargs.pop("netlist", None)
-        netlist_file_path = os.environ.get("NETLIST_PATH", None)
+        # netlist_file_path = os.environ.get("NETLIST_PATH", None)
         seed = int(os.environ.get("SEED", None))
-        init_placement_file_path = os.environ.get("INIT_PLACEMENT_PATH", None)
-        std_cell_placer_mode = os.environ.get("STD_CELL_PLACER_MODE", None)
+        # init_placement_file_path = os.environ.get("INIT_PLACEMENT_PATH", None)
+        # std_cell_placer_mode = os.environ.get("STD_CELL_PLACER_MODE", None)
         env_kwargs.update(
             {
                 "global_seed": seed,
-                "netlist_file": netlist_file_path,
-                "init_placement": init_placement_file_path,
+                # "netlist_file": netlist_file_path,
+                # "init_placement": init_placement_file_path,
                 "output_plc_file": os.path.join(root_dir, "output.plc"),
-                "std_cell_placer_mode": std_cell_placer_mode,
+                # "std_cell_placer_mode": std_cell_placer_mode,
             }
         )
         env_wrappers = [wrappers.ActionClipWrapper] + list(env_wrappers)
     elif env_name in QUADRUPED_LOCOMOTION_ENVS:
-        # noinspection PyUnresolvedReferences
-        from a2perf.domains import quadruped_locomotion  # noqa: F401
 
-        motion_file_path = os.environ.get("MOTION_FILE_PATH", None)
-        env_kwargs["motion_files"] = [motion_file_path]
+        # motion_file_path = os.environ.get("MOTION_FILE_PATH", None)
+        # env_kwargs["motion_files"] = [motion_file_path]
         env_wrappers = [wrappers.ActionClipWrapper] + list(env_wrappers)
     else:
         raise NotImplementedError(f"Unknown environment: {env_name}")
diff --git a/a2perf/submission/configs/quadruped_locomotion/dog_pace/inference.gin b/a2perf/submission/configs/quadruped_locomotion/dog_pace/inference.gin
index 4a0196b..9e6f7f0 100644
--- a/a2perf/submission/configs/quadruped_locomotion/dog_pace/inference.gin
+++ b/a2perf/submission/configs/quadruped_locomotion/dog_pace/inference.gin
@@ -10,12 +10,13 @@ import a2perf.domains.tfa.suite_gym
 # Set up submission object
 Submission.mode = %BenchmarkMode.INFERENCE
 Submission.domain = %BenchmarkDomain.QUADRUPED_LOCOMOTION
-#Submission.run_offline_metrics_only = True
+Submission.run_offline_metrics_only = False
 Submission.measure_emissions = True
 
 ####################################
 # Set up domain
 ####################################
+suite_gym.create_domain.env_name = "QuadrupedLocomotion-DogPace-v0"
 suite_gym.create_domain.mode='test'
 suite_gym.create_domain.num_parallel_envs=1
 
@@ -30,7 +31,7 @@ Submission.time_participant_code = True
 # SYSTEM METRICS SETUP
 # ----------------------
 # Set up codecarbon for system metrics
-track_emissions_decorator.project_name = 'a2perf_quadruped_locomotion_inference_debug'
+track_emissions_decorator.project_name = 'a2perf_quadruped_locomotion_inference'
 track_emissions_decorator.measure_power_secs = 1
 track_emissions_decorator.save_to_file = True  # Save data to file
 track_emissions_decorator.save_to_logger = False  # Do not save data to logger
@@ -38,3 +39,4 @@ track_emissions_decorator.gpu_ids = None  # Enter a list of specific GPU IDs to
 track_emissions_decorator.log_level = 'info'  # Log level set to 'info'
 track_emissions_decorator.country_iso_code = 'USA'
 track_emissions_decorator.region = 'Massachusetts'
+track_emissions_decorator.offline = True
diff --git a/a2perf/submission/configs/quadruped_locomotion/dog_spin/inference.gin b/a2perf/submission/configs/quadruped_locomotion/dog_spin/inference.gin
index 4a0196b..1e455f7 100644
--- a/a2perf/submission/configs/quadruped_locomotion/dog_spin/inference.gin
+++ b/a2perf/submission/configs/quadruped_locomotion/dog_spin/inference.gin
@@ -16,6 +16,7 @@ Submission.measure_emissions = True
 ####################################
 # Set up domain
 ####################################
+suite_gym.create_domain.mode.env_name = "QuadrupedLocomotion-DogSpin-v0"
 suite_gym.create_domain.mode='test'
 suite_gym.create_domain.num_parallel_envs=1
 
diff --git a/a2perf/submission/configs/quadruped_locomotion/dog_trot/inference.gin b/a2perf/submission/configs/quadruped_locomotion/dog_trot/inference.gin
index 4a0196b..ad227a7 100644
--- a/a2perf/submission/configs/quadruped_locomotion/dog_trot/inference.gin
+++ b/a2perf/submission/configs/quadruped_locomotion/dog_trot/inference.gin
@@ -16,6 +16,7 @@ Submission.measure_emissions = True
 ####################################
 # Set up domain
 ####################################
+suite_gym.create_domain.env_name = "QuadrupedLocomotion-DogTrot-v0"
 suite_gym.create_domain.mode='test'
 suite_gym.create_domain.num_parallel_envs=1
 
diff --git a/a2perf/submission/configs/quadruped_locomotion/train.gin b/a2perf/submission/configs/quadruped_locomotion/train.gin
index 9b905ac..5ad9d28 100644
--- a/a2perf/submission/configs/quadruped_locomotion/train.gin
+++ b/a2perf/submission/configs/quadruped_locomotion/train.gin
@@ -10,7 +10,7 @@ import a2perf.submission.submission_util
 # Set up submission object
 Submission.mode = %a2perf.constants.BenchmarkMode.TRAIN
 Submission.domain = %a2perf.constants.BenchmarkDomain.QUADRUPED_LOCOMOTION
-Submission.run_offline_metrics_only=False
+Submission.run_offline_metrics_only = False
 Submission.measure_emissions=True
 
 
diff --git a/a2perf/submission/main_submission.py b/a2perf/submission/main_submission.py
index 0f65b02..d55dfce 100644
--- a/a2perf/submission/main_submission.py
+++ b/a2perf/submission/main_submission.py
@@ -4,7 +4,6 @@
 import gin
 from absl import app, flags, logging
 
-from a2perf.constants import BenchmarkMode
 from a2perf.submission import submission_util
 
 _GIN_CONFIG = flags.DEFINE_string(
@@ -32,12 +31,6 @@
 _RUN_OFFLINE_METRICS_ONLY = flags.DEFINE_bool(
     "run-offline-metrics-only", False, "Whether to run offline metrics only."
 )
-_MODE = flags.DEFINE_enum(
-    "mode",
-    "train",
-    ["train", "inference", "generalization"],
-    "Mode of the submission. train, inference, or generalization.",
-)
 
 
 def main(_):
@@ -54,7 +47,6 @@ def main(_):
         logging.info("Adding extra gin binding: %s", binding)
 
     submission = submission_util.Submission(
-        mode=BenchmarkMode(_MODE.value),
         root_dir=_ROOT_DIR.value,
         metric_values_dir=_METRIC_VALUES_DIR.value,
         participant_module_path=_PARTICIPANT_MODULE_PATH.value,
diff --git a/a2perf/submission/submission_util.py b/a2perf/submission/submission_util.py
index 1febf22..47ab426 100644
--- a/a2perf/submission/submission_util.py
+++ b/a2perf/submission/submission_util.py
@@ -142,13 +142,13 @@ def _load_module(module_path, filename):
     return module, spec
 
 
-def _load_policy(module_path, env):
+def _load_policy(module_path, env, participant_args=None):
     """Loads the policy from the participant's module."""
     with working_directory(module_path):
         participant_module, participant_module_spec = _load_module(
             module_path, "inference.py"
         )
-        policy = participant_module.load_policy(env)
+        policy = participant_module.load_policy(env, **(participant_args or {}))
     return policy, participant_module
 
 
@@ -159,6 +159,7 @@ def perform_rollouts(
     gin_config_str=None,
     absl_flags=None,
     rollout_rewards_queue=None,
+    participant_args=None,
 ):
     """Performs rollouts using the given policy.
 
@@ -175,7 +176,11 @@ def perform_rollouts(
     """
     setup_subprocess_env(gin_config_str, absl_flags)
     env = create_domain_fn()
-    policy, participant_module = _load_policy(module_path, env)
+    if participant_args is None:
+        participant_args = {}
+    policy, participant_module = _load_policy(
+        module_path, env, participant_args=participant_args
+    )
     episode_reward_metric = py_metrics.AverageReturnMetric()
     rollout_actor = actor.Actor(
         env=env,
@@ -284,9 +289,7 @@ def _perform_rollout_task(
     for key, value in generalization_env_vars.items():
         os.environ[key] = value
 
-    create_domain_fn = functools.partial(
-        suite_gym.create_domain, env_name=domain.value, root_dir=root_dir
-    )
+    create_domain_fn = functools.partial(suite_gym.create_domain, root_dir=root_dir)
     all_rewards = perform_rollouts(
         module_path=participant_module_path,
         create_domain_fn=create_domain_fn,
@@ -456,7 +459,10 @@ def _perform_rollouts(
         setup_subprocess_env(self.gin_config_str, self.absl_flags)
 
         create_domain_fn = functools.partial(
-            suite_gym.create_domain, env_name=self.domain.value, root_dir=self.root_dir
+            suite_gym.create_domain,
+            # env_name=self.domain.value,
+            root_dir=self.root_dir,
+            # load_kwargs=self.participant_args,
         )
         if measure_emissions:
 
@@ -473,6 +479,7 @@ def perform_rollouts_and_track_emissions():
                         self.gin_config_str,
                         self.absl_flags,
                         rollout_rewards_queue,
+                        self.participant_args,
                     ),
                 )
                 rollout_process.start()
@@ -486,6 +493,7 @@ def perform_rollouts_and_track_emissions():
                 module_path=self.participant_module_path,
                 gin_config_str=self.gin_config_str,
                 absl_flags=self.absl_flags,
+                participant_args=self.participant_args,
             )
 
     def _run_training_benchmark(self):
@@ -552,10 +560,8 @@ def _run_generalization_benchmark(self):
 
     def _run_inference_benchmark(self):
         if not self.run_offline_metrics_only:
-            logging.info("Creating Gymnasium domain...")
-            env = suite_gym.create_domain(
-                env_name=self.domain.value, root_dir=self.root_dir
-            )
+            logging.info("Creating Gymnasium environment...")
+            env = suite_gym.create_domain(root_dir=self.root_dir)
             logging.info("Successfully created domain")
 
             logging.info("Generating inference data...")
@@ -566,7 +572,9 @@ def _run_inference_benchmark(self):
 
             logging.info("Loading the policy for inference...")
             participant_policy, participant_module = _load_policy(
-                module_path=self.participant_module_path, env=env
+                module_path=self.participant_module_path,
+                env=env,
+                participant_args=self.participant_args,
             )
 
             # Only include time_step_spec if the participant policy has it as an
diff --git a/docs/content/tutorials/inference.md b/docs/content/tutorials/inference.md
index 29025ef..f61595b 100644
--- a/docs/content/tutorials/inference.md
+++ b/docs/content/tutorials/inference.md
@@ -6,4 +6,123 @@ firstpage:
 
 # Benchmarking Inference
 
-This tutorial is coming soon. Stay tuned!
+## Prerequisites
+
+Before you begin, ensure you have done the following:
+
+### Install A2Perf
+
+For detailed instructions, please refer to
+our [Installation Guide](../basic_usage.md#Installation).
+
+### Benchmarking Training Tutorial
+
+Please refer to the [Benchmarking Training Tutorial](training.md) for
+instructions on how to train your agent. We will use the artifacts generated
+from the training tutorial for this inference tutorial.
+
+### Update the `a2perf_benchmark_submission` Submodule
+
+If you have not already done so for the training tutorial, update
+the `a2perf_benchmark_submission` submodule to the `baselines-local` branch:
+
+```bash
+cd a2perf/a2perf_benchmark_submission
+git fetch origin
+git checkout baselines-local
+git pull origin baselines-local
+cd ../..
+```
+
+## Running the Inference Benchmark
+
+After running the training benchmark, you will have a directory with the trained
+agent and other artifacts. We will use these for the inference benchmark.
+
+### Running locally with XManager (Docker)
+
+#### Running the Benchmark
+
+```bash
+xmanager launch xm_launch.py -- \
+  --experiment-name=test_inference \
+  --root-dir=~/gcs/a2perf/experiments/ \
+  --experiment-id=<experiment-id> \
+  --domain=QuadrupedLocomotion-DogPace-v0  \
+  --submission-gin-config-path=a2perf/submission/configs/quadruped_locomotion/dog_pace/inference.gin \
+  --user=$USER \
+  --participant-module-path=a2perf/a2perf_benchmark_submission \
+  --participant-args="root_dir=/experiment_dir,policy_name=greedy_policy"
+```
+
+#### Command line arguments
+
+- **`root-dir`**: Specifies the directory where experiment logs and artifacts
+  will be saved.
+- **`experiment-id`**: The ID of the training experiment from which to load the
+  trained agent.
+- **`submission-gin-config-path`**: Points to the Gin configuration file for
+  inference in the Dog Pace environment.
+- **`participant-module-path`**: Indicates the path to the directory containing
+  the submission code.
+- **`participant-args`**: Provides additional arguments for the participant's
+  code, including the path to the trained agent and the policy name to use.
+
+XManager will automatically launch a Docker container with the necessary
+dependencies installed. It will create a new experiment directory for the
+inference results.
+
+### Running Locally Without Docker
+
+If you prefer to run the benchmark locally without using Docker, follow these
+steps:
+
+#### Installing Dependencies
+
+If you have not already done so for the training tutorial, install the required
+Python dependencies:
+
+```bash
+pip install -r A2Perf/a2perf/a2perf_benchmark_submission/requirements.txt
+```
+
+#### Running the Benchmark
+
+Once the dependencies are installed, you can run the inference benchmark with
+the following command:
+
+```bash
+cd A2Perf
+export A2PERF_ROOT=$(pwd)
+python a2perf/launch/entrypoint.py \
+  --root-dir=~/gcs/a2perf/experiments/<experiment-id>/test/1 \
+  --submission-gin-config-path=$A2PERF_ROOT/a2perf/submission/configs/quadruped_locomotion/dog_pace/inference.gin \
+  --participant-module-path=$A2PERF_ROOT/a2perf/a2perf_benchmark_submission \
+  --participant-args="root_dir=~/gcs/a2perf/experiments/<experiment-id>/test/1,policy_name=greedy_policy"
+```
+
+Note: Replace `<experiment-id>` with the actual ID of your training experiment.
+This ID is unique for each run and can be found in the output of your training
+command or in the experiment directory structure.
+
+#### Command line arguments
+
+The command line arguments are similar to those used in the Docker version, but
+adapted for local execution:
+
+- **`root-dir`**: Specifies the directory where the training artifacts are
+  located and where inference results will be saved.
+- **`submission-gin-config-path`**: Points to the Gin configuration file for
+  inference in the Dog Pace environment.
+- **`participant-module-path`**: Indicates the path to the directory containing
+  the submission code.
+- **`participant-args`**: Provides additional arguments for the participant's
+  code, including the path to the trained agent and the policy name to use.
+
+Make sure to adjust the paths according to your setup if they differ from the
+example provided.
+
+After running the inference benchmark, you will find the results in the
+specified
+root directory. These results will include metrics on the agent's performance
+during inference, such as average returns, inference time, and resource usage.
diff --git a/docs/content/tutorials/training.md b/docs/content/tutorials/training.md
index a582566..6f5ab2e 100644
--- a/docs/content/tutorials/training.md
+++ b/docs/content/tutorials/training.md
@@ -46,20 +46,20 @@ You can clone this repository and modify it to fit your specific implementation.
 
 - `inference.py`
   Next, the `inference.py` file is subsequently used for benchmarking the
-  trained model.
+  trained agent.
   This file includes several key functions.\
   \
   __`load_model(env)`:__
-  This function loads and returns the trained model. A2Perf passes the
+  This function loads and returns the trained agent. A2Perf passes the
   environment that is being tested via the `env` parameter. This allows the
-  model loading logic to use any context needed, such as the environment name.
+  agent loading logic to use any context needed, such as the environment object.
   \
   __`preprocess_observation(observation)`:__
-  Preprocesses the observation before feeding it to the model. If no
+  Preprocesses the observation before feeding it to the agent. If no
   preprocessing is required, simply return the initial observation.
   \
   __`infer_once(model, observation)`:__
-  Passes a single observation to the loaded model and returns the predicted
+  Passes a single observation to the loaded agent and returns the predicted
   action. This function performs a single inference step.
 
 - `requirements.txt`:
@@ -78,30 +78,30 @@ branch: `baselines-local`
 
 ### Navigate to the Submodule Directory
 
-   ```bash
-   cd a2perf/a2perf_benchmark_submission
-   ```
+ ```bash
+ cd a2perf/a2perf_benchmark_submission
+ ```
 
 ### Checkout the branch with code for baselines
 
-   ```bash
-   git fetch origin
-   git checkout baselines-local
-   ```
+ ```bash
+ git fetch origin
+ git checkout baselines-local
+ ```
 
 ### Pull Latest Changes
 
-    ```bash
-    git pull origin baselines-local
-    ```
+```bash
+git pull origin baselines-local
+```
 
 ### Back to the Main Directory
 
-Return to the main directory of the `A2Perf` repository:
+Return to the root directory of the `A2Perf` repository:
 
-      ```bash
-      cd ../../..
-      ```
+```bash
+cd ../..
+```
 
 ---
 
@@ -113,7 +113,7 @@ Return to the main directory of the `A2Perf` repository:
 
 ```bash
 xmanager launch xm_launch.py -- \
-  --experiment-name=test \ 
+  --experiment-name=test \
   --root-dir=~/gcs/a2perf/experiments/ \
   --domain=QuadrupedLocomotion-DogPace-v0  \
   --submission-gin-config-path=a2perf/submission/configs/quadruped_locomotion/train.gin \
@@ -137,13 +137,18 @@ xmanager launch xm_launch.py -- \
 [XManager](https://github.com/google-deepmind/xmanager) will automatically
 launch a Docker container with the necessary dependencies installed. It will
 also create a new experiment directory
-at `~/gcs/a2perf/experiments/<experiment-number>/test/1/`. The number `1` is
+at `~/gcs/a2perf/experiments/<experiment-id>/test/1/`. The number `1` is
 included because we are running a single work unit in the experiment. For more
 details on work units, refer
 to [XManager's documentation](https://github.com/google-deepmind/xmanager).
 
+**Important**: Make note of the `<experiment-id>` in your experiment directory
+path. You will need this ID when running the inference benchmark later.
+The `<experiment-id>` is a unique identifier for your training run and is
+typically a long string of numbers.
+
 The experiment directory will contain all logs and artifacts generated during
-the benchmark. Here is how the directory structure will look at the end of the
+the benchmark. Here is how the directory structure will look at the end of
 training:
 
 ```plaintext
@@ -154,6 +159,7 @@ training:
         ├── metrics
         ├── policies
         ├── submission_config.gin
+        ├── training_complete
         └── train
 ```
 
@@ -188,6 +194,7 @@ training:
   ├── collect_policy
   ├── greedy_policy
   └── policy
+  ```
 
 - **`train/`**: Contains additional checkpoint information and TensorBoard logs
   from the training process, which are useful for monitoring training progress
@@ -220,7 +227,6 @@ python a2perf/launch/entrypoint.py \
   --submission-gin-config-path=$A2PERF_ROOT/a2perf/submission/configs/quadruped_locomotion/train.gin \
   --participant-module-path=$A2PERF_ROOT/a2perf/a2perf_benchmark_submission \
   --participant-args="gin_config_path=configs/quadruped_locomotion/dog_pace/ppo.gin"
-
 ```
 
 #### Command line arguments
diff --git a/xm_launch.py b/xm_launch.py
index b95c337..dc4171c 100644
--- a/xm_launch.py
+++ b/xm_launch.py
@@ -5,8 +5,7 @@
 from absl import app, flags
 from xmanager import xm, xm_local
 
-from a2perf.constants import BenchmarkDomain
-from a2perf.constants import ENV_NAMES
+from a2perf.constants import ENV_NAMES, BenchmarkDomain
 from a2perf.launch.docker_utils import (
     DOCKER_EXPERIMENT_DIR,
     DOCKER_PARTICIPANT_DIR,