purdue-arc · Abuynits · Jun 12, 2022 · Jun 13, 2022 · Jun 13, 2022 · Jun 13, 2022
diff --git a/.gitignore b/.gitignore
@@ -142,3 +142,9 @@ dmypy.json
 
 # MATLAB
 *.asv
+
+#urdf removal:
+rktl_sim/urdf/walls.urdf
+rktl_sim/urdf/goal_a.urdf
+rktl_sim/urdf/goal_b.urdf
+rktl_sim/urdf/walls.urdf
diff --git a/rktl_autonomy/nodes/rocket_league_agent b/rktl_autonomy/nodes/rocket_league_agent
@@ -11,18 +11,20 @@ from stable_baselines3 import PPO
 from os.path import expanduser
 import rospy
 
-# create interface (and init ROS)
+# Create interface (and init ROS).
 env = RocketLeagueInterface(eval=True)
 
-# load the model
+# Load the model.
 weights = expanduser(rospy.get_param('~weights'))
 model = PPO.load(weights)
 
-# evaluate in real-time
+# Evaluate in real-time.
 obs = env.reset()
 while True:
+    # Predict the future action for the sim.
     action, __ = model.predict(obs)
     try:
+        # step the sim with the action from the model.
         obs, __, __, __ = env.step(action)
     except rospy.ROSInterruptException:
         exit()
diff --git a/rktl_autonomy/scripts/train_rocket_league.py b/rktl_autonomy/scripts/train_rocket_league.py
@@ -4,6 +4,7 @@
   BSD 3-Clause License
   Copyright (c) 2021, Autonomous Robotics Club of Purdue (Purdue ARC)
   All rights reserved.
+stable_baselines3 resource: https://stable-baselines3.readthedocs.io/_/downloads/en/master/pdf/
 """
 
 from rktl_autonomy import RocketLeagueInterface
@@ -15,32 +16,34 @@
 from os.path import expanduser
 import uuid
 
-if __name__ == '__main__':      # this is required due to forking processes
-    run_id = str(uuid.uuid4())  # ALL running environments must share this
+if __name__ == '__main__':
+    # This is required due to forking processes.
+    # ALL running environments must share this id.
+    run_id = str(uuid.uuid4())
     print(f"RUN ID: {run_id}")
 
-    # to pass launch args, add to env_kwargs: 'launch_args': ['render:=false', 'plot_log:=true']
-    env = make_vec_env(RocketLeagueInterface, env_kwargs={'run_id':run_id},
-            n_envs=24, vec_env_cls=SubprocVecEnv)
+    # Pass launch args by adding to env_kwargs: 'launch_args': ['render:=false', 'plot_log:=true'].
+    env = make_vec_env(RocketLeagueInterface, env_kwargs={'run_id': run_id},
+                       n_envs=24, vec_env_cls=SubprocVecEnv)
 
     model = PPO("MlpPolicy", env)
 
-    # log training progress as CSV
+    # Log training progress as CSV.
     log_dir = expanduser(f'~/catkin_ws/data/rocket_league/{run_id}')
     logger = configure(log_dir, ["stdout", "csv", "log"])
     model.set_logger(logger)
 
-    # log model weights
-    freq = 20833 # save 20 times
+    # Log model weights.
+    freq = 20833  # save 20 times
     # freq = steps / (n_saves * n_envs)
     callback = CheckpointCallback(save_freq=freq, save_path=log_dir)
 
-    # run training
-    steps = 240000000 # 240M (10M sequential)
+    # Run training.
+    steps = 240000000  # 240M (10M sequential)
     print(f"training on {steps} steps")
     model.learn(total_timesteps=steps, callback=callback)
 
-    # save final weights
+    # Save final weights.
     print("done training")
     model.save(log_dir + "/final_weights")
-    env.close() # this must be done to clean up other processes
+    env.close() # This must be done to clean up other processes
diff --git a/rktl_autonomy/src/rktl_autonomy/_ros_interface.py b/rktl_autonomy/src/rktl_autonomy/_ros_interface.py
@@ -15,16 +15,18 @@
 from rosgraph_msgs.msg import Clock
 from diagnostic_msgs.msg import DiagnosticStatus, KeyValue
 
+
 class SimTimeException(Exception):
     """For when advancing sim time does not go as planned."""
     pass
 
+
 class ROSInterface(Env):
     """Extension of the Gym environment class for all specific interfaces
     to extend. This class handles logic regarding timesteps in ROS, and
     allows users to treat any ROS system as a Gym environment once the
     interface is created.
-
+    # IMPORTANT: All the below abstract methods marked with @abstractmethod must be implemented by subclasses
     All classes extending this for a particular environment must do the following:
         - implement all abstract properties:
             - action_space
@@ -40,13 +42,13 @@ class ROSInterface(Env):
     """
 
     def __init__(self, node_name='gym_interface', eval=False, launch_file=None, launch_args=[], run_id=None):
-        """init function
-        Params:
-            node_name: desired name of this node in the ROS network
-            eval: set true if evaluating an agent in an existing ROS env, set false if training an agent
-            launch_file: if training, launch file to be used (ex: ['rktl_autonomy', 'rocket_league_train.launch'])
-            launch_args: if training, arguments to be passed to roslaunch (ex: ['render:=true', rate:=10])
-            run_id: if training, used to prevent deadlocks. if logging, run_id describes where to save files. Default is randomly generated
+        """
+        initializes the rospy interface
+        @param node_name: desired name of this node in the ROS network
+        @param eval: set true if evaluating an agent in an existing ROS env, set false if training an agent
+        @param launch_file: if training, launch file to be used (ex: ['rktl_autonomy', 'rocket_league_train.launch'])
+        @param launch_args: if training, arguments to be passed to roslaunch (ex: ['render:=true', rate:=10])
+        @param run_id: if training, used to prevent deadlocks. if logging, run_id describes where to save files.
         """
         super().__init__()
         self.__EVAL_MODE = eval
@@ -76,10 +78,11 @@ def __init__(self, node_name='gym_interface', eval=False, launch_file=None, laun
             ros_id = roslaunch.rlutil.get_or_generate_uuid(None, False)
             roslaunch.configure_logging(ros_id)
             launch_file = roslaunch.rlutil.resolve_launch_arguments(launch_file)[0]
-            launch_args = [f'render:={port==11311}', f'plot_log:={port==11311}'] + launch_args + [f'agent_name:={node_name}']
+            launch_args = [f'render:={port == 11311}', f'plot_log:={port == 11311}'] + launch_args + [
+                f'agent_name:={node_name}']
             launch = roslaunch.parent.ROSLaunchParent(ros_id, [(launch_file, launch_args)], port=port)
             launch.start()
-            self.close = lambda : launch.shutdown()
+            self.close = lambda: launch.shutdown()
             # initialize self
             os.environ['ROS_MASTER_URI'] = f'http://localhost:{port}'
             rospy.init_node(node_name)
@@ -91,6 +94,7 @@ def __init__(self, node_name='gym_interface', eval=False, launch_file=None, laun
 
         # private variables
         self._cond = Condition()
+        # TODO: why cant merge to if statement above?
 
         # additional set up for training
         if not self.__EVAL_MODE:
@@ -116,72 +120,77 @@ def step(self, action):
         """
         Implementation of gym.Env.step. This function will intentionally block
         if the ROS environment is not ready.
-
-        Run one timestep of the environment's dynamics. When end of
-        episode is reached, you are responsible for calling `reset()`
-        to reset this environment's state.
-        Accepts an action and returns a tuple (observation, reward, done, info).
-        Args:
-            action (object): an action provided by the agent
-        Returns:
-            observation (object): agent's observation of the current environment
+        Run one timestep of the environment's dynamics.
+        When end of episode is reached, you are responsible for calling `reset()` to reset this environment's state.
+        @param action: action (object): an action provided by the agent
+        @return: observation a tuple of the following:
+            (object): agent's observation of the current environment
             reward (float) : amount of reward returned after previous action
             done (bool): whether the episode has ended, in which case further step() calls will return undefined results
             info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning)
+
         """
+
         self._clear_state()
         self._publish_action(action)
         self.__step_time_and_wait_for_state()
         state = self._get_state()
-        self.__net_reward += state[1]   # logging
+        self.__net_reward += state[1]  # logging
         return state
 
     def reset(self):
-        """Resets the environment to an initial state and returns an initial observation.
-
+        """
+        Resets the environment to an initial state and returns an initial observation.
         Note that this function should not reset the environment's random
-        number generator(s); random variables in the environment's state should
-        be sampled independently between multiple calls to `reset()`. In other
-        words, each call of `reset()` should yield an environment suitable for
-        a new episode, independent of previous episodes.
-        Returns:
-            observation (object): the initial observation.
+        number generator(s).
+        Random variables in the environment's state should
+        be sampled independently between multiple calls to `reset()`.
+        @return: the initial observation.
         """
+
+        # Checks if a new state is ready via: _has_state.
         if self._has_state():
-            # generate log
+            # Gathers the following information: episode #, net reward, duration of the episode.
+
+            # Generate a log.
             info = {
-                'episode'    : self.__episode,
-                'net_reward' : self.__net_reward,
-                'duration'   : (rospy.Time.now() - self.__start_time).to_sec()
+                'episode': self.__episode,
+                'net_reward': self.__net_reward,
+                'duration': (rospy.Time.now() - self.__start_time).to_sec()
             }
+            # Update the message log with these parameters by publishing it.
+
             info.update(self._get_state()[3])
-            # send message
+            # Send message.
             msg = DiagnosticStatus()
             msg.level = DiagnosticStatus.OK
             msg.name = 'ROS-Gym Interface'
             msg.message = 'log of episode data'
             msg.hardware_id = self.__LOG_ID
             msg.values = [KeyValue(key=key, value=str(value)) for key, value in info.items()]
             self.__log_pub.publish(msg)
-            # update variables (update time after reset)
             self.__episode += 1
             self.__net_reward = 0
 
-        # reset
         if not self.__EVAL_MODE:
             self._reset_env()
+        # Reset the ROS interface (abstract method).
         self._reset_self()
         self.__step_time_and_wait_for_state(5)
-        self.__start_time = rospy.Time.now()    # logging
+        self.__start_time = rospy.Time.now()  # logging
         return self._get_state()[0]
 
     def __step_time_and_wait_for_state(self, max_retries=1):
-        """Step time until a state is known."""
+        """
+        Increment time and clock, try to publish the next simulation step in the number of tries.
+        @param max_retries: Number of time steps until state is known.
+        """
         if not self.__EVAL_MODE:
             self.__time += self.__DELTA_T
             self.__clock_pub.publish(self.__time)
             retries = 0
             while not self.__wait_once_for_state():
+
                 if retries >= max_retries:
                     rospy.logerr("Failed to get new state.")
                     raise SimTimeException
@@ -190,56 +199,58 @@ def __step_time_and_wait_for_state(self, max_retries=1):
                     self.__clock_pub.publish(self.__time)
                     retries += 1
         else:
+            # Call for the provided number of retries.
             while not self.__wait_once_for_state():
-                pass    # idle wait
+                pass  # idle wait
 
     def __wait_once_for_state(self):
-        """Wait and allow other threads to run."""
+        """ Wait and allow other threads to run."""
         with self._cond:
             has_state = self._cond.wait_for(self._has_state, 0.25)
         if rospy.is_shutdown():
             raise rospy.ROSInterruptException()
         return has_state
 
-    # All the below abstract methods / properties must be implemented by subclasses
     @property
     @abstractmethod
     def action_space(self):
-        """The Space object corresponding to valid actions."""
+        """ The Space object corresponding to valid actions."""
+
         raise NotImplementedError
 
     @property
     @abstractmethod
     def observation_space(self):
-        """The Space object corresponding to valid observations."""
+        """ The Space object corresponding to valid observations."""
         raise NotImplementedError
 
     @abstractmethod
     def _reset_env(self):
-        """Reset environment for a new episode."""
+        """ Reset environment for a new episode."""
         raise NotImplementedError
 
     @abstractmethod
     def _reset_self(self):
-        """Reset internally for a new episode."""
+        """ Reset internally for a new episode."""
         raise NotImplementedError
 
     @abstractmethod
     def _has_state(self):
-        """Determine if the new state is ready."""
+        """ Determine if the new state is ready."""
+
         raise NotImplementedError
 
     @abstractmethod
     def _clear_state(self):
-        """Clear state variables / flags in preparation for new ones."""
+        """ Clear state variables / flags in preparation for new ones."""
         raise NotImplementedError
 
     @abstractmethod
     def _get_state(self):
-        """Get state tuple (observation, reward, done, info)."""
+        """ Get state tuple (observation, reward, done, info)."""
         raise NotImplementedError
 
     @abstractmethod
     def _publish_action(self, action):
-        """Publish an action to the ROS network."""
+        """ Publish an action to the ROS network."""
         raise NotImplementedError