From c40fd44f67d9e987d977e7985ef7575dcff153f6 Mon Sep 17 00:00:00 2001 From: Ikechukwu Uchendu Date: Thu, 29 Aug 2024 16:36:56 -0400 Subject: [PATCH] Updated training and inference tutorials along with small fixes --- a2perf/domains/tfa/suite_gym.py | 43 +++---- .../dog_pace/inference.gin | 6 +- .../dog_spin/inference.gin | 1 + .../dog_trot/inference.gin | 1 + .../configs/quadruped_locomotion/train.gin | 2 +- a2perf/submission/main_submission.py | 8 -- a2perf/submission/submission_util.py | 32 +++-- docs/content/tutorials/inference.md | 121 +++++++++++++++++- docs/content/tutorials/training.md | 52 ++++---- xm_launch.py | 3 +- 10 files changed, 192 insertions(+), 77 deletions(-) diff --git a/a2perf/domains/tfa/suite_gym.py b/a2perf/domains/tfa/suite_gym.py index afa21d2..cfd2e02 100644 --- a/a2perf/domains/tfa/suite_gym.py +++ b/a2perf/domains/tfa/suite_gym.py @@ -22,31 +22,24 @@ for the final step of an episode. To prevent that we extract the step limit from the environment specs and utilize our TimeLimit wrapper. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function +from __future__ import absolute_import, division, print_function import json import os -from typing import Any -from typing import Callable -from typing import Dict -from typing import Optional -from typing import Sequence -from typing import Text +from typing import Any, Callable, Dict, Optional, Sequence, Text import gin import gymnasium as gym import numpy as np from absl import logging -from tf_agents.environments import py_environment -from tf_agents.environments import wrappers +from tf_agents.environments import py_environment, wrappers from tf_agents.typing import types from a2perf.domains import circuit_training # noqa: F401 from a2perf.domains import quadruped_locomotion # noqa: F401 from a2perf.domains import web_navigation # noqa: F401 from a2perf.domains.tfa import gym_wrapper +from a2perf.domains.web_navigation.gwob.CoDE import vocabulary_node TimeLimitWrapperType = Callable[ [py_environment.PyEnvironment, int], py_environment.PyEnvironment @@ -184,8 +177,6 @@ def create_domain( ): if env_name in WEB_NAVIGATION_ENVS: # noinspection PyUnresolvedReferences - from a2perf.domains import web_navigation # noqa: F401 - from a2perf.domains.web_navigation.gwob.CoDE import vocabulary_node save_vocab_dir = os.path.join(root_dir, "vocabulary") reload_vocab = env_kwargs.pop("reload_vocab", True) @@ -193,7 +184,7 @@ def create_domain( if vocab_type == "threaded": global_vocab = vocabulary_node.LockedThreadedVocabulary() elif vocab_type == "unlocked": - global_vocab = vocabulary_node.UnlockedVocabulary() + vocabulary_node.UnlockedVocabulary() elif vocab_type == "multiprocessing": global_vocab = vocabulary_node.LockedMultiprocessingVocabulary() else: @@ -209,14 +200,14 @@ def create_domain( global_vocab.restore(state=global_vocab_dict) seed = int(os.environ.get("SEED", None)) num_websites = int(os.environ.get("NUM_WEBSITES", None)) - difficulty = int(os.environ.get("DIFFICULTY_LEVEL", None)) + # difficulty = int(os.environ.get("DIFFICULTY_LEVEL", None)) env_kwargs.update( { "global_vocabulary": global_vocab, "seed": seed, "num_websites": num_websites, - "difficulty": difficulty, + # "difficulty": difficulty, "browser_args": dict( threading=False, chrome_options={ @@ -230,30 +221,26 @@ def create_domain( ) env_wrappers = [wrappers.ActionClipWrapper] + list(env_wrappers) elif env_name in CIRCUIT_TRAINING_ENVS: - # noinspection PyUnresolvedReferences - from a2perf.domains import circuit_training # noqa: F401 env_kwargs.pop("netlist", None) - netlist_file_path = os.environ.get("NETLIST_PATH", None) + # netlist_file_path = os.environ.get("NETLIST_PATH", None) seed = int(os.environ.get("SEED", None)) - init_placement_file_path = os.environ.get("INIT_PLACEMENT_PATH", None) - std_cell_placer_mode = os.environ.get("STD_CELL_PLACER_MODE", None) + # init_placement_file_path = os.environ.get("INIT_PLACEMENT_PATH", None) + # std_cell_placer_mode = os.environ.get("STD_CELL_PLACER_MODE", None) env_kwargs.update( { "global_seed": seed, - "netlist_file": netlist_file_path, - "init_placement": init_placement_file_path, + # "netlist_file": netlist_file_path, + # "init_placement": init_placement_file_path, "output_plc_file": os.path.join(root_dir, "output.plc"), - "std_cell_placer_mode": std_cell_placer_mode, + # "std_cell_placer_mode": std_cell_placer_mode, } ) env_wrappers = [wrappers.ActionClipWrapper] + list(env_wrappers) elif env_name in QUADRUPED_LOCOMOTION_ENVS: - # noinspection PyUnresolvedReferences - from a2perf.domains import quadruped_locomotion # noqa: F401 - motion_file_path = os.environ.get("MOTION_FILE_PATH", None) - env_kwargs["motion_files"] = [motion_file_path] + # motion_file_path = os.environ.get("MOTION_FILE_PATH", None) + # env_kwargs["motion_files"] = [motion_file_path] env_wrappers = [wrappers.ActionClipWrapper] + list(env_wrappers) else: raise NotImplementedError(f"Unknown environment: {env_name}") diff --git a/a2perf/submission/configs/quadruped_locomotion/dog_pace/inference.gin b/a2perf/submission/configs/quadruped_locomotion/dog_pace/inference.gin index 4a0196b..9e6f7f0 100644 --- a/a2perf/submission/configs/quadruped_locomotion/dog_pace/inference.gin +++ b/a2perf/submission/configs/quadruped_locomotion/dog_pace/inference.gin @@ -10,12 +10,13 @@ import a2perf.domains.tfa.suite_gym # Set up submission object Submission.mode = %BenchmarkMode.INFERENCE Submission.domain = %BenchmarkDomain.QUADRUPED_LOCOMOTION -#Submission.run_offline_metrics_only = True +Submission.run_offline_metrics_only = False Submission.measure_emissions = True #################################### # Set up domain #################################### +suite_gym.create_domain.env_name = "QuadrupedLocomotion-DogPace-v0" suite_gym.create_domain.mode='test' suite_gym.create_domain.num_parallel_envs=1 @@ -30,7 +31,7 @@ Submission.time_participant_code = True # SYSTEM METRICS SETUP # ---------------------- # Set up codecarbon for system metrics -track_emissions_decorator.project_name = 'a2perf_quadruped_locomotion_inference_debug' +track_emissions_decorator.project_name = 'a2perf_quadruped_locomotion_inference' track_emissions_decorator.measure_power_secs = 1 track_emissions_decorator.save_to_file = True # Save data to file track_emissions_decorator.save_to_logger = False # Do not save data to logger @@ -38,3 +39,4 @@ track_emissions_decorator.gpu_ids = None # Enter a list of specific GPU IDs to track_emissions_decorator.log_level = 'info' # Log level set to 'info' track_emissions_decorator.country_iso_code = 'USA' track_emissions_decorator.region = 'Massachusetts' +track_emissions_decorator.offline = True diff --git a/a2perf/submission/configs/quadruped_locomotion/dog_spin/inference.gin b/a2perf/submission/configs/quadruped_locomotion/dog_spin/inference.gin index 4a0196b..1e455f7 100644 --- a/a2perf/submission/configs/quadruped_locomotion/dog_spin/inference.gin +++ b/a2perf/submission/configs/quadruped_locomotion/dog_spin/inference.gin @@ -16,6 +16,7 @@ Submission.measure_emissions = True #################################### # Set up domain #################################### +suite_gym.create_domain.mode.env_name = "QuadrupedLocomotion-DogSpin-v0" suite_gym.create_domain.mode='test' suite_gym.create_domain.num_parallel_envs=1 diff --git a/a2perf/submission/configs/quadruped_locomotion/dog_trot/inference.gin b/a2perf/submission/configs/quadruped_locomotion/dog_trot/inference.gin index 4a0196b..ad227a7 100644 --- a/a2perf/submission/configs/quadruped_locomotion/dog_trot/inference.gin +++ b/a2perf/submission/configs/quadruped_locomotion/dog_trot/inference.gin @@ -16,6 +16,7 @@ Submission.measure_emissions = True #################################### # Set up domain #################################### +suite_gym.create_domain.env_name = "QuadrupedLocomotion-DogTrot-v0" suite_gym.create_domain.mode='test' suite_gym.create_domain.num_parallel_envs=1 diff --git a/a2perf/submission/configs/quadruped_locomotion/train.gin b/a2perf/submission/configs/quadruped_locomotion/train.gin index 9b905ac..5ad9d28 100644 --- a/a2perf/submission/configs/quadruped_locomotion/train.gin +++ b/a2perf/submission/configs/quadruped_locomotion/train.gin @@ -10,7 +10,7 @@ import a2perf.submission.submission_util # Set up submission object Submission.mode = %a2perf.constants.BenchmarkMode.TRAIN Submission.domain = %a2perf.constants.BenchmarkDomain.QUADRUPED_LOCOMOTION -Submission.run_offline_metrics_only=False +Submission.run_offline_metrics_only = False Submission.measure_emissions=True diff --git a/a2perf/submission/main_submission.py b/a2perf/submission/main_submission.py index 0f65b02..d55dfce 100644 --- a/a2perf/submission/main_submission.py +++ b/a2perf/submission/main_submission.py @@ -4,7 +4,6 @@ import gin from absl import app, flags, logging -from a2perf.constants import BenchmarkMode from a2perf.submission import submission_util _GIN_CONFIG = flags.DEFINE_string( @@ -32,12 +31,6 @@ _RUN_OFFLINE_METRICS_ONLY = flags.DEFINE_bool( "run-offline-metrics-only", False, "Whether to run offline metrics only." ) -_MODE = flags.DEFINE_enum( - "mode", - "train", - ["train", "inference", "generalization"], - "Mode of the submission. train, inference, or generalization.", -) def main(_): @@ -54,7 +47,6 @@ def main(_): logging.info("Adding extra gin binding: %s", binding) submission = submission_util.Submission( - mode=BenchmarkMode(_MODE.value), root_dir=_ROOT_DIR.value, metric_values_dir=_METRIC_VALUES_DIR.value, participant_module_path=_PARTICIPANT_MODULE_PATH.value, diff --git a/a2perf/submission/submission_util.py b/a2perf/submission/submission_util.py index 1febf22..47ab426 100644 --- a/a2perf/submission/submission_util.py +++ b/a2perf/submission/submission_util.py @@ -142,13 +142,13 @@ def _load_module(module_path, filename): return module, spec -def _load_policy(module_path, env): +def _load_policy(module_path, env, participant_args=None): """Loads the policy from the participant's module.""" with working_directory(module_path): participant_module, participant_module_spec = _load_module( module_path, "inference.py" ) - policy = participant_module.load_policy(env) + policy = participant_module.load_policy(env, **(participant_args or {})) return policy, participant_module @@ -159,6 +159,7 @@ def perform_rollouts( gin_config_str=None, absl_flags=None, rollout_rewards_queue=None, + participant_args=None, ): """Performs rollouts using the given policy. @@ -175,7 +176,11 @@ def perform_rollouts( """ setup_subprocess_env(gin_config_str, absl_flags) env = create_domain_fn() - policy, participant_module = _load_policy(module_path, env) + if participant_args is None: + participant_args = {} + policy, participant_module = _load_policy( + module_path, env, participant_args=participant_args + ) episode_reward_metric = py_metrics.AverageReturnMetric() rollout_actor = actor.Actor( env=env, @@ -284,9 +289,7 @@ def _perform_rollout_task( for key, value in generalization_env_vars.items(): os.environ[key] = value - create_domain_fn = functools.partial( - suite_gym.create_domain, env_name=domain.value, root_dir=root_dir - ) + create_domain_fn = functools.partial(suite_gym.create_domain, root_dir=root_dir) all_rewards = perform_rollouts( module_path=participant_module_path, create_domain_fn=create_domain_fn, @@ -456,7 +459,10 @@ def _perform_rollouts( setup_subprocess_env(self.gin_config_str, self.absl_flags) create_domain_fn = functools.partial( - suite_gym.create_domain, env_name=self.domain.value, root_dir=self.root_dir + suite_gym.create_domain, + # env_name=self.domain.value, + root_dir=self.root_dir, + # load_kwargs=self.participant_args, ) if measure_emissions: @@ -473,6 +479,7 @@ def perform_rollouts_and_track_emissions(): self.gin_config_str, self.absl_flags, rollout_rewards_queue, + self.participant_args, ), ) rollout_process.start() @@ -486,6 +493,7 @@ def perform_rollouts_and_track_emissions(): module_path=self.participant_module_path, gin_config_str=self.gin_config_str, absl_flags=self.absl_flags, + participant_args=self.participant_args, ) def _run_training_benchmark(self): @@ -552,10 +560,8 @@ def _run_generalization_benchmark(self): def _run_inference_benchmark(self): if not self.run_offline_metrics_only: - logging.info("Creating Gymnasium domain...") - env = suite_gym.create_domain( - env_name=self.domain.value, root_dir=self.root_dir - ) + logging.info("Creating Gymnasium environment...") + env = suite_gym.create_domain(root_dir=self.root_dir) logging.info("Successfully created domain") logging.info("Generating inference data...") @@ -566,7 +572,9 @@ def _run_inference_benchmark(self): logging.info("Loading the policy for inference...") participant_policy, participant_module = _load_policy( - module_path=self.participant_module_path, env=env + module_path=self.participant_module_path, + env=env, + participant_args=self.participant_args, ) # Only include time_step_spec if the participant policy has it as an diff --git a/docs/content/tutorials/inference.md b/docs/content/tutorials/inference.md index 29025ef..f61595b 100644 --- a/docs/content/tutorials/inference.md +++ b/docs/content/tutorials/inference.md @@ -6,4 +6,123 @@ firstpage: # Benchmarking Inference -This tutorial is coming soon. Stay tuned! +## Prerequisites + +Before you begin, ensure you have done the following: + +### Install A2Perf + +For detailed instructions, please refer to +our [Installation Guide](../basic_usage.md#Installation). + +### Benchmarking Training Tutorial + +Please refer to the [Benchmarking Training Tutorial](training.md) for +instructions on how to train your agent. We will use the artifacts generated +from the training tutorial for this inference tutorial. + +### Update the `a2perf_benchmark_submission` Submodule + +If you have not already done so for the training tutorial, update +the `a2perf_benchmark_submission` submodule to the `baselines-local` branch: + +```bash +cd a2perf/a2perf_benchmark_submission +git fetch origin +git checkout baselines-local +git pull origin baselines-local +cd ../.. +``` + +## Running the Inference Benchmark + +After running the training benchmark, you will have a directory with the trained +agent and other artifacts. We will use these for the inference benchmark. + +### Running locally with XManager (Docker) + +#### Running the Benchmark + +```bash +xmanager launch xm_launch.py -- \ + --experiment-name=test_inference \ + --root-dir=~/gcs/a2perf/experiments/ \ + --experiment-id= \ + --domain=QuadrupedLocomotion-DogPace-v0 \ + --submission-gin-config-path=a2perf/submission/configs/quadruped_locomotion/dog_pace/inference.gin \ + --user=$USER \ + --participant-module-path=a2perf/a2perf_benchmark_submission \ + --participant-args="root_dir=/experiment_dir,policy_name=greedy_policy" +``` + +#### Command line arguments + +- **`root-dir`**: Specifies the directory where experiment logs and artifacts + will be saved. +- **`experiment-id`**: The ID of the training experiment from which to load the + trained agent. +- **`submission-gin-config-path`**: Points to the Gin configuration file for + inference in the Dog Pace environment. +- **`participant-module-path`**: Indicates the path to the directory containing + the submission code. +- **`participant-args`**: Provides additional arguments for the participant's + code, including the path to the trained agent and the policy name to use. + +XManager will automatically launch a Docker container with the necessary +dependencies installed. It will create a new experiment directory for the +inference results. + +### Running Locally Without Docker + +If you prefer to run the benchmark locally without using Docker, follow these +steps: + +#### Installing Dependencies + +If you have not already done so for the training tutorial, install the required +Python dependencies: + +```bash +pip install -r A2Perf/a2perf/a2perf_benchmark_submission/requirements.txt +``` + +#### Running the Benchmark + +Once the dependencies are installed, you can run the inference benchmark with +the following command: + +```bash +cd A2Perf +export A2PERF_ROOT=$(pwd) +python a2perf/launch/entrypoint.py \ + --root-dir=~/gcs/a2perf/experiments//test/1 \ + --submission-gin-config-path=$A2PERF_ROOT/a2perf/submission/configs/quadruped_locomotion/dog_pace/inference.gin \ + --participant-module-path=$A2PERF_ROOT/a2perf/a2perf_benchmark_submission \ + --participant-args="root_dir=~/gcs/a2perf/experiments//test/1,policy_name=greedy_policy" +``` + +Note: Replace `` with the actual ID of your training experiment. +This ID is unique for each run and can be found in the output of your training +command or in the experiment directory structure. + +#### Command line arguments + +The command line arguments are similar to those used in the Docker version, but +adapted for local execution: + +- **`root-dir`**: Specifies the directory where the training artifacts are + located and where inference results will be saved. +- **`submission-gin-config-path`**: Points to the Gin configuration file for + inference in the Dog Pace environment. +- **`participant-module-path`**: Indicates the path to the directory containing + the submission code. +- **`participant-args`**: Provides additional arguments for the participant's + code, including the path to the trained agent and the policy name to use. + +Make sure to adjust the paths according to your setup if they differ from the +example provided. + +After running the inference benchmark, you will find the results in the +specified +root directory. These results will include metrics on the agent's performance +during inference, such as average returns, inference time, and resource usage. diff --git a/docs/content/tutorials/training.md b/docs/content/tutorials/training.md index a582566..6f5ab2e 100644 --- a/docs/content/tutorials/training.md +++ b/docs/content/tutorials/training.md @@ -46,20 +46,20 @@ You can clone this repository and modify it to fit your specific implementation. - `inference.py` Next, the `inference.py` file is subsequently used for benchmarking the - trained model. + trained agent. This file includes several key functions.\ \ __`load_model(env)`:__ - This function loads and returns the trained model. A2Perf passes the + This function loads and returns the trained agent. A2Perf passes the environment that is being tested via the `env` parameter. This allows the - model loading logic to use any context needed, such as the environment name. + agent loading logic to use any context needed, such as the environment object. \ __`preprocess_observation(observation)`:__ - Preprocesses the observation before feeding it to the model. If no + Preprocesses the observation before feeding it to the agent. If no preprocessing is required, simply return the initial observation. \ __`infer_once(model, observation)`:__ - Passes a single observation to the loaded model and returns the predicted + Passes a single observation to the loaded agent and returns the predicted action. This function performs a single inference step. - `requirements.txt`: @@ -78,30 +78,30 @@ branch: `baselines-local` ### Navigate to the Submodule Directory - ```bash - cd a2perf/a2perf_benchmark_submission - ``` + ```bash + cd a2perf/a2perf_benchmark_submission + ``` ### Checkout the branch with code for baselines - ```bash - git fetch origin - git checkout baselines-local - ``` + ```bash + git fetch origin + git checkout baselines-local + ``` ### Pull Latest Changes - ```bash - git pull origin baselines-local - ``` +```bash +git pull origin baselines-local +``` ### Back to the Main Directory -Return to the main directory of the `A2Perf` repository: +Return to the root directory of the `A2Perf` repository: - ```bash - cd ../../.. - ``` +```bash +cd ../.. +``` --- @@ -113,7 +113,7 @@ Return to the main directory of the `A2Perf` repository: ```bash xmanager launch xm_launch.py -- \ - --experiment-name=test \ + --experiment-name=test \ --root-dir=~/gcs/a2perf/experiments/ \ --domain=QuadrupedLocomotion-DogPace-v0 \ --submission-gin-config-path=a2perf/submission/configs/quadruped_locomotion/train.gin \ @@ -137,13 +137,18 @@ xmanager launch xm_launch.py -- \ [XManager](https://github.com/google-deepmind/xmanager) will automatically launch a Docker container with the necessary dependencies installed. It will also create a new experiment directory -at `~/gcs/a2perf/experiments//test/1/`. The number `1` is +at `~/gcs/a2perf/experiments//test/1/`. The number `1` is included because we are running a single work unit in the experiment. For more details on work units, refer to [XManager's documentation](https://github.com/google-deepmind/xmanager). +**Important**: Make note of the `` in your experiment directory +path. You will need this ID when running the inference benchmark later. +The `` is a unique identifier for your training run and is +typically a long string of numbers. + The experiment directory will contain all logs and artifacts generated during -the benchmark. Here is how the directory structure will look at the end of the +the benchmark. Here is how the directory structure will look at the end of training: ```plaintext @@ -154,6 +159,7 @@ training: ├── metrics ├── policies ├── submission_config.gin + ├── training_complete └── train ``` @@ -188,6 +194,7 @@ training: ├── collect_policy ├── greedy_policy └── policy + ``` - **`train/`**: Contains additional checkpoint information and TensorBoard logs from the training process, which are useful for monitoring training progress @@ -220,7 +227,6 @@ python a2perf/launch/entrypoint.py \ --submission-gin-config-path=$A2PERF_ROOT/a2perf/submission/configs/quadruped_locomotion/train.gin \ --participant-module-path=$A2PERF_ROOT/a2perf/a2perf_benchmark_submission \ --participant-args="gin_config_path=configs/quadruped_locomotion/dog_pace/ppo.gin" - ``` #### Command line arguments diff --git a/xm_launch.py b/xm_launch.py index b95c337..dc4171c 100644 --- a/xm_launch.py +++ b/xm_launch.py @@ -5,8 +5,7 @@ from absl import app, flags from xmanager import xm, xm_local -from a2perf.constants import BenchmarkDomain -from a2perf.constants import ENV_NAMES +from a2perf.constants import ENV_NAMES, BenchmarkDomain from a2perf.launch.docker_utils import ( DOCKER_EXPERIMENT_DIR, DOCKER_PARTICIPANT_DIR,