diff --git a/docs/source/conf.py b/docs/source/conf.py index 4931e0c7..e40fb0d7 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -41,6 +41,7 @@ "sphinx_rtd_theme", "nbsphinx", "sphinx.ext.mathjax", + "sphinxcontrib.youtube", ] templates_path = ["_templates"] @@ -234,7 +235,6 @@ def generate_index(self, index_path, file_paths, dir_paths, source_dir): f.write(lines) def generate_autodoc(self, doc_path, source_file): - # Make header short_name = source_file.name.replace(".py", "") lines = "" diff --git a/docs/source/index.rst b/docs/source/index.rst index 1725e118..5f27b7b9 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -31,6 +31,8 @@ for working with these environments. A whitepaper on the design philosophy behind BSK-RL and an example use case can be :download:`downloaded here <_static/stephenson_bskrl_2024.pdf>`. +.. youtube:: 8qR-AGrCFQw + Quickstart ---------- Installation diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index 0b11ce97..c914fab8 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -26,7 +26,8 @@ Development Version * Improve performance of :class:`~bsk_rl.obs.Eclipse` observations by about 95%. * Logs a warning if the initial battery charge or buffer level is incompatible with its capacity. * Optimize communication when all satellites are communicating with each other. - +* Enable Vizard visualization of the environment by setting the ``vizard_dir`` and ``vizard_settings`` + options in the environment. Version 1.0.1 diff --git a/examples/continuous_orbit_manuevers.ipynb b/examples/continuous_orbit_manuevers.ipynb new file mode 100644 index 00000000..a733910b --- /dev/null +++ b/examples/continuous_orbit_manuevers.ipynb @@ -0,0 +1,464 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Getting Started\n", + "This tutorial demonstrates the configuration and use of a simple BSK-RL environment.\n", + "BSK-RL and dependencies should already be installed at this point (see [Installation](../install.rst)\n", + "if you haven't installed the package yet).\n", + "\n", + "## Load Modules\n", + "In this tutorial, the environment will be created with `gym.make`, so it is necessary to\n", + "import the top-level `bsk_rl` module as well as `gym` and `bsk_rl` components." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# %matplotlib qt # Uncomment to use interactive plotting, may need `pip install PyQt5`\n", + "\n", + "import numpy as np\n", + "from functools import partial\n", + "from bsk_rl import act, obs, sats, ConstellationTasking, comm\n", + "from bsk_rl.sim import dyn, fsw\n", + "from bsk_rl.utils.orbital import relative_to_chief, random_orbit, random_unit_vector\n", + "from bsk_rl.scene import FibonacciSphereRSOPoints\n", + "from bsk_rl.data import RSOInspectionReward, FuelPenalty\n", + "\n", + "from Basilisk.architecture import bskLogging\n", + "from Basilisk.utilities.RigidBodyKinematics import MRP2C\n", + "\n", + "bskLogging.setDefaultLogLevel(bskLogging.BSK_WARNING)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If no errors were raised, you have a functional installation of `bsk_rl`.\n", + "\n", + "## Configure the Satellite\n", + "[Satellites](../api_reference/sats/index.rst) are configurable agents in the environment.\n", + "To make a new environment, start by specifying the [observations](../api_reference/obs/index.rst)\n", + "and [actions](../api_reference/act/index.rst) of a satellite type, as well as the underlying\n", + "Basilisk [simulation](../api_reference/sim/index.rst) models used by the satellite." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import types\n", + "\n", + "\n", + "class TumbleSat(sats.Satellite):\n", + " observation_spec = [\n", + " obs.SatProperties(dict(prop=\"r_BN_N\"), dict(prop=\"sigma_BN\")),\n", + " ]\n", + " action_spec = [act.Drift()]\n", + " dyn_type = types.new_class(\"Dyn\", (dyn.ConjunctionDynModel, dyn.RSODynModel))\n", + " fsw_type = fsw.BasicFSWModel\n", + "\n", + "\n", + "class ThrustSat(sats.Satellite):\n", + " observation_spec = [\n", + " obs.SatProperties(\n", + " dict(prop=\"r_BN_N\"),\n", + " dict(prop=\"c_hat_N\"),\n", + " dict(prop=\"storage_level_fraction\"),\n", + " ),\n", + " obs.RelativeProperties(\n", + " dict(prop=\"r_DC_N\"),\n", + " chief_name=\"Tumbler\",\n", + " ),\n", + " ]\n", + " action_spec = [act.MagicThrust(max_dv=100, fsw_action=\"action_inspect_rso\")]\n", + " dyn_type = types.new_class(\"Dyn\", (dyn.ConjunctionDynModel, dyn.RSOImagingDynModel))\n", + " fsw_type = types.new_class(\n", + " \"FSW\",\n", + " (\n", + " fsw.SteeringFSWModel,\n", + " fsw.MagicOrbitalManeuverFSWModel,\n", + " fsw.RSOImagingFSWModel,\n", + " ),\n", + " )\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Making the Environment\n", + "For this example, we will be using the single-agent [SatelliteTasking](../api_reference/index.rst) \n", + "environment. Along with passing the satellite that we configured, the environment takes\n", + "a [scenario](../api_reference/scene/index.rst), which defines the environment the\n", + "satellite is acting in, and a [rewarder](../api_reference/data/index.rst), which defines\n", + "how data collected from the scenario is rewarded." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Tumbler': Discrete(1),\n", + " 'Thrust-1': Box([-100. -100. -100. 0.], [100. 100. 100. inf], (4,), float32),\n", + " 'Thrust-2': Box([-100. -100. -100. 0.], [100. 100. 100. inf], (4,), float32)}" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scanner_sat_args = dict(\n", + " imageAttErrorRequirement=0.01,\n", + " imageRateErrorRequirement=0.1,\n", + " instrumentBaudRate=1,\n", + " dataStorageCapacity=1e6,\n", + " batteryStorageCapacity=1e9,\n", + " storedCharge_Init=1e9,\n", + " conjunction_radius=2.0,\n", + ")\n", + "\n", + "env = ConstellationTasking(\n", + " satellites=[\n", + " TumbleSat(\n", + " \"Tumbler\",\n", + " obs_type=dict,\n", + " sat_args=dict(\n", + " # sigma_init=np.zeros(3),\n", + " omega_init=np.zeros(3),\n", + " conjunction_radius=2.0,\n", + " ),\n", + " ),\n", + " ThrustSat(\"Thrust-1\", obs_type=dict, sat_args=scanner_sat_args),\n", + " ThrustSat(\"Thrust-2\", obs_type=dict, sat_args=scanner_sat_args),\n", + " ],\n", + " sat_arg_randomizer=relative_to_chief(\n", + " chief_name=\"Tumbler\",\n", + " chief_orbit=partial(random_orbit, i=0, Omega=0, omega=0, f=0),\n", + " deputy_relative_state={\n", + " \"Thrust-1\": lambda: np.concatenate(\n", + " (random_unit_vector() * 50, np.zeros(3))\n", + " ),\n", + " \"Thrust-2\": np.array([-50, 0, 0, 0, 0, 0]),\n", + " },\n", + " ),\n", + " scenario=FibonacciSphereRSOPoints(\n", + " n_points=100,\n", + " radius=1,\n", + " theta_min=np.radians(30),\n", + " ),\n", + " # communicator=comm.LOSCommunication(),\n", + " rewarder=(RSOInspectionReward(), FuelPenalty()),\n", + " time_limit=5700.0 * 3,\n", + " log_level=\"INFO\",\n", + ")\n", + "env.action_spaces" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Interacting with the Environment\n", + "\n", + "First, the environment is reset." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[90;3m2024-12-26 11:31:30,574 \u001b[0m\u001b[mgym \u001b[0m\u001b[mINFO \u001b[0m\u001b[mResetting environment with seed=624502672\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:30,771 \u001b[0m\u001b[36msats.satellite.Thrust-1.FSW \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<0.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[m >\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:30,772 \u001b[0m\u001b[92msats.satellite.Thrust-2.FSW \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<0.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[m >\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:30,813 \u001b[0m\u001b[mgym \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<0.00> \u001b[0m\u001b[mEnvironment reset\u001b[0m\n" + ] + } + ], + "source": [ + "observation, info = env.reset() # seed=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we take the scan action (`action=0`) a few times. This allows for the satellite to\n", + "settle its attitude in the nadir pointing mode to satisfy imaging conditions. Note that \n", + "the logs show little or no data accumulated in the first two steps as it settles, but\n", + "achieves 60 reward (corresponding to 60 seconds of imaging) by the third step." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[90;3m2024-12-26 11:31:30,818 \u001b[0m\u001b[mgym \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<0.00> \u001b[0m\u001b[93;1m=== STARTING STEP ===\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:30,819 \u001b[0m\u001b[34msats.satellite.Tumbler \u001b[0m\u001b[93mWARNING \u001b[0m\u001b[33m<0.00> \u001b[0m\u001b[34mTumbler: \u001b[0m\u001b[93mRequires retasking but received no task.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:30,819 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<0.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mThrusting with inertial dV [0. 0. 0.] with 500.0 second drift.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:30,820 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<0.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[msetting timed terminal event at 500.0\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:30,821 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<0.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mFSW action action_inspect_rso activated.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:30,821 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<0.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mThrusting with inertial dV [0. 0. 0.] with 500.0 second drift.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:30,821 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<0.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[msetting timed terminal event at 500.0\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:30,822 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<0.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mFSW action action_inspect_rso activated.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:30,917 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mtimed termination at 500.0 \u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:30,918 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mtimed termination at 500.0 \u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:30,992 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mInspected 8 points this step\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,067 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mInspected 10 points this step\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,068 \u001b[0m\u001b[mdata.composition \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<500.00> \u001b[0m\u001b[mRSOInspectionReward reward: {'Thrust-1': 0.08, 'Thrust-2': 0.1}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,068 \u001b[0m\u001b[mdata.base \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<500.00> \u001b[0m\u001b[mTotal reward: {'Thrust-1': 0.08, 'Thrust-2': 0.1}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,068 \u001b[0m\u001b[34msats.satellite.Tumbler \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<500.00> \u001b[0m\u001b[34mTumbler: \u001b[0m\u001b[mSatellite Tumbler requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,069 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mSatellite Thrust-1 requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,069 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mSatellite Thrust-2 requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,071 \u001b[0m\u001b[mgym \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<500.00> \u001b[0m\u001b[mStep reward: {'Thrust-1': 0.08, 'Thrust-2': 0.1}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,071 \u001b[0m\u001b[mgym \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<500.00> \u001b[0m\u001b[93;1m=== STARTING STEP ===\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,072 \u001b[0m\u001b[34msats.satellite.Tumbler \u001b[0m\u001b[93mWARNING \u001b[0m\u001b[33m<500.00> \u001b[0m\u001b[34mTumbler: \u001b[0m\u001b[93mRequires retasking but received no task.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,072 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mThrusting with inertial dV [0.01 0. 0. ] with 500.0 second drift.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,073 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[msetting timed terminal event at 1000.0\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,073 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mFSW action action_inspect_rso activated.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,074 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mThrusting with inertial dV [0.01 0. 0. ] with 500.0 second drift.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,074 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[msetting timed terminal event at 1000.0\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,074 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mFSW action action_inspect_rso activated.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,174 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1000.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mtimed termination at 1000.0 \u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,174 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1000.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mtimed termination at 1000.0 \u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,250 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1000.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mInspected 9 points this step\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,327 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1000.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mInspected 9 points this step\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,328 \u001b[0m\u001b[mdata.composition \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1000.00> \u001b[0m\u001b[mRSOInspectionReward reward: {'Thrust-1': 0.03, 'Thrust-2': 0.02}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,328 \u001b[0m\u001b[mdata.composition \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1000.00> \u001b[0m\u001b[mFuelPenalty reward: {'Thrust-1': -0.010000000000005116, 'Thrust-2': -0.010000000000005116}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,329 \u001b[0m\u001b[mdata.base \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1000.00> \u001b[0m\u001b[mTotal reward: {'Thrust-1': 0.019999999999994883, 'Thrust-2': 0.009999999999994885}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,329 \u001b[0m\u001b[34msats.satellite.Tumbler \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1000.00> \u001b[0m\u001b[34mTumbler: \u001b[0m\u001b[mSatellite Tumbler requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,329 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1000.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mSatellite Thrust-1 requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,330 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1000.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mSatellite Thrust-2 requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,331 \u001b[0m\u001b[mgym \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1000.00> \u001b[0m\u001b[mStep reward: {'Thrust-1': 0.019999999999994883, 'Thrust-2': 0.009999999999994885}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,332 \u001b[0m\u001b[mgym \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1000.00> \u001b[0m\u001b[93;1m=== STARTING STEP ===\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,332 \u001b[0m\u001b[34msats.satellite.Tumbler \u001b[0m\u001b[93mWARNING \u001b[0m\u001b[33m<1000.00> \u001b[0m\u001b[34mTumbler: \u001b[0m\u001b[93mRequires retasking but received no task.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,333 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1000.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mThrusting with inertial dV [0.01 0. 0. ] with 500.0 second drift.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,333 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1000.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[msetting timed terminal event at 1500.0\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,334 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1000.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mFSW action action_inspect_rso activated.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,334 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1000.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mThrusting with inertial dV [0.01 0. 0. ] with 500.0 second drift.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,334 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1000.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[msetting timed terminal event at 1500.0\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,335 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1000.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mFSW action action_inspect_rso activated.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,426 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mtimed termination at 1500.0 \u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,427 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mtimed termination at 1500.0 \u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,502 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mInspected 8 points this step\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,578 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mInspected 9 points this step\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,579 \u001b[0m\u001b[mdata.composition \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1500.00> \u001b[0m\u001b[mRSOInspectionReward reward: {'Thrust-1': 0.02, 'Thrust-2': 0.03}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,579 \u001b[0m\u001b[mdata.composition \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1500.00> \u001b[0m\u001b[mFuelPenalty reward: {'Thrust-1': -0.010000000000005116, 'Thrust-2': -0.010000000000005116}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,579 \u001b[0m\u001b[mdata.base \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1500.00> \u001b[0m\u001b[mTotal reward: {'Thrust-1': 0.009999999999994885, 'Thrust-2': 0.019999999999994883}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,580 \u001b[0m\u001b[34msats.satellite.Tumbler \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1500.00> \u001b[0m\u001b[34mTumbler: \u001b[0m\u001b[mSatellite Tumbler requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,580 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mSatellite Thrust-1 requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,580 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mSatellite Thrust-2 requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,582 \u001b[0m\u001b[mgym \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1500.00> \u001b[0m\u001b[mStep reward: {'Thrust-1': 0.009999999999994885, 'Thrust-2': 0.019999999999994883}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,583 \u001b[0m\u001b[mgym \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1500.00> \u001b[0m\u001b[93;1m=== STARTING STEP ===\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,584 \u001b[0m\u001b[34msats.satellite.Tumbler \u001b[0m\u001b[93mWARNING \u001b[0m\u001b[33m<1500.00> \u001b[0m\u001b[34mTumbler: \u001b[0m\u001b[93mRequires retasking but received no task.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,584 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mThrusting with inertial dV [0. 0. 0.] with 500.0 second drift.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,584 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[msetting timed terminal event at 2000.0\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,585 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mFSW action action_inspect_rso activated.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,585 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mThrusting with inertial dV [0. 0. 0.] with 500.0 second drift.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,586 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[msetting timed terminal event at 2000.0\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,586 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<1500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mFSW action action_inspect_rso activated.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,672 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2000.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mtimed termination at 2000.0 \u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,672 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2000.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mtimed termination at 2000.0 \u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,746 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2000.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mInspected 7 points this step\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,820 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2000.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mInspected 9 points this step\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,820 \u001b[0m\u001b[mdata.composition \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2000.00> \u001b[0m\u001b[mRSOInspectionReward reward: {'Thrust-1': 0.02, 'Thrust-2': 0.03}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,821 \u001b[0m\u001b[mdata.base \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2000.00> \u001b[0m\u001b[mTotal reward: {'Thrust-1': 0.02, 'Thrust-2': 0.03}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,821 \u001b[0m\u001b[34msats.satellite.Tumbler \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2000.00> \u001b[0m\u001b[34mTumbler: \u001b[0m\u001b[mSatellite Tumbler requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,821 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2000.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mSatellite Thrust-1 requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,822 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2000.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mSatellite Thrust-2 requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,823 \u001b[0m\u001b[mgym \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2000.00> \u001b[0m\u001b[mStep reward: {'Thrust-1': 0.02, 'Thrust-2': 0.03}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,824 \u001b[0m\u001b[mgym \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2000.00> \u001b[0m\u001b[93;1m=== STARTING STEP ===\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,824 \u001b[0m\u001b[34msats.satellite.Tumbler \u001b[0m\u001b[93mWARNING \u001b[0m\u001b[33m<2000.00> \u001b[0m\u001b[34mTumbler: \u001b[0m\u001b[93mRequires retasking but received no task.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,824 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2000.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mThrusting with inertial dV [0.01 0. 0. ] with 500.0 second drift.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,825 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2000.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[msetting timed terminal event at 2500.0\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,825 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2000.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mFSW action action_inspect_rso activated.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,826 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2000.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mThrusting with inertial dV [0.01 0. 0. ] with 500.0 second drift.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,826 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2000.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[msetting timed terminal event at 2500.0\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,826 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2000.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mFSW action action_inspect_rso activated.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,969 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mtimed termination at 2500.0 \u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:31,969 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mtimed termination at 2500.0 \u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,041 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mInspected 10 points this step\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,113 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mInspected 10 points this step\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,114 \u001b[0m\u001b[mdata.composition \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2500.00> \u001b[0m\u001b[mRSOInspectionReward reward: {'Thrust-1': 0.04, 'Thrust-2': 0.04}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,114 \u001b[0m\u001b[mdata.composition \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2500.00> \u001b[0m\u001b[mFuelPenalty reward: {'Thrust-1': -0.010000000000005116, 'Thrust-2': -0.010000000000005116}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,114 \u001b[0m\u001b[mdata.base \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2500.00> \u001b[0m\u001b[mTotal reward: {'Thrust-1': 0.029999999999994885, 'Thrust-2': 0.029999999999994885}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,114 \u001b[0m\u001b[34msats.satellite.Tumbler \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2500.00> \u001b[0m\u001b[34mTumbler: \u001b[0m\u001b[mSatellite Tumbler requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,115 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mSatellite Thrust-1 requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,115 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mSatellite Thrust-2 requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,116 \u001b[0m\u001b[mgym \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2500.00> \u001b[0m\u001b[mStep reward: {'Thrust-1': 0.029999999999994885, 'Thrust-2': 0.029999999999994885}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,117 \u001b[0m\u001b[mgym \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2500.00> \u001b[0m\u001b[93;1m=== STARTING STEP ===\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,118 \u001b[0m\u001b[34msats.satellite.Tumbler \u001b[0m\u001b[93mWARNING \u001b[0m\u001b[33m<2500.00> \u001b[0m\u001b[34mTumbler: \u001b[0m\u001b[93mRequires retasking but received no task.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,118 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mThrusting with inertial dV [0.01 0. 0. ] with 500.0 second drift.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,118 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[msetting timed terminal event at 3000.0\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,118 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mFSW action action_inspect_rso activated.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,119 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mThrusting with inertial dV [0. 0. 0.] with 500.0 second drift.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,119 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[msetting timed terminal event at 3000.0\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,119 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<2500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mFSW action action_inspect_rso activated.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,210 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3000.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mtimed termination at 3000.0 \u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,211 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3000.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mtimed termination at 3000.0 \u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,282 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3000.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mInspected 10 points this step\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,360 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3000.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mInspected 11 points this step\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,361 \u001b[0m\u001b[mdata.composition \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3000.00> \u001b[0m\u001b[mRSOInspectionReward reward: {'Thrust-1': 0.04, 'Thrust-2': 0.04}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,361 \u001b[0m\u001b[mdata.composition \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3000.00> \u001b[0m\u001b[mFuelPenalty reward: {'Thrust-1': -0.010000000000005116}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,361 \u001b[0m\u001b[mdata.base \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3000.00> \u001b[0m\u001b[mTotal reward: {'Thrust-1': 0.029999999999994885, 'Thrust-2': 0.04}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,362 \u001b[0m\u001b[34msats.satellite.Tumbler \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3000.00> \u001b[0m\u001b[34mTumbler: \u001b[0m\u001b[mSatellite Tumbler requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,362 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3000.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mSatellite Thrust-1 requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,362 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3000.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mSatellite Thrust-2 requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,364 \u001b[0m\u001b[mgym \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3000.00> \u001b[0m\u001b[mStep reward: {'Thrust-1': 0.029999999999994885, 'Thrust-2': 0.04}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,364 \u001b[0m\u001b[mgym \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3000.00> \u001b[0m\u001b[93;1m=== STARTING STEP ===\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,365 \u001b[0m\u001b[34msats.satellite.Tumbler \u001b[0m\u001b[93mWARNING \u001b[0m\u001b[33m<3000.00> \u001b[0m\u001b[34mTumbler: \u001b[0m\u001b[93mRequires retasking but received no task.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,365 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3000.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mThrusting with inertial dV [0.01 0. 0. ] with 500.0 second drift.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,366 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3000.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[msetting timed terminal event at 3500.0\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,366 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3000.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mFSW action action_inspect_rso activated.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,366 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3000.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mThrusting with inertial dV [0.01 0. 0. ] with 500.0 second drift.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,367 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3000.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[msetting timed terminal event at 3500.0\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,368 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3000.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mFSW action action_inspect_rso activated.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,461 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mtimed termination at 3500.0 \u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,462 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mtimed termination at 3500.0 \u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,534 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mInspected 11 points this step\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,606 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mInspected 10 points this step\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,607 \u001b[0m\u001b[mdata.composition \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3500.00> \u001b[0m\u001b[mRSOInspectionReward reward: {'Thrust-1': 0.02, 'Thrust-2': 0.03}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,607 \u001b[0m\u001b[mdata.composition \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3500.00> \u001b[0m\u001b[mFuelPenalty reward: {'Thrust-1': -0.010000000000005116, 'Thrust-2': -0.010000000000005116}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,607 \u001b[0m\u001b[mdata.base \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3500.00> \u001b[0m\u001b[mTotal reward: {'Thrust-1': 0.009999999999994885, 'Thrust-2': 0.019999999999994883}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,608 \u001b[0m\u001b[34msats.satellite.Tumbler \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3500.00> \u001b[0m\u001b[34mTumbler: \u001b[0m\u001b[mSatellite Tumbler requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,608 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mSatellite Thrust-1 requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,608 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mSatellite Thrust-2 requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,610 \u001b[0m\u001b[mgym \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3500.00> \u001b[0m\u001b[mStep reward: {'Thrust-1': 0.009999999999994885, 'Thrust-2': 0.019999999999994883}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,611 \u001b[0m\u001b[mgym \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3500.00> \u001b[0m\u001b[93;1m=== STARTING STEP ===\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,611 \u001b[0m\u001b[34msats.satellite.Tumbler \u001b[0m\u001b[93mWARNING \u001b[0m\u001b[33m<3500.00> \u001b[0m\u001b[34mTumbler: \u001b[0m\u001b[93mRequires retasking but received no task.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,611 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mThrusting with inertial dV [0. 0. 0.] with 500.0 second drift.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,612 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[msetting timed terminal event at 4000.0\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,612 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mFSW action action_inspect_rso activated.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,613 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mThrusting with inertial dV [0. 0. 0.] with 500.0 second drift.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,613 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[msetting timed terminal event at 4000.0\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,614 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<3500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mFSW action action_inspect_rso activated.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,708 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4000.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mtimed termination at 4000.0 \u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,709 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4000.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mtimed termination at 4000.0 \u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,781 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4000.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mInspected 11 points this step\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,853 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4000.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mInspected 10 points this step\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,854 \u001b[0m\u001b[mdata.composition \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4000.00> \u001b[0m\u001b[mRSOInspectionReward reward: {'Thrust-2': 0.02}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,854 \u001b[0m\u001b[mdata.base \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4000.00> \u001b[0m\u001b[mTotal reward: {'Thrust-2': 0.02}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,854 \u001b[0m\u001b[34msats.satellite.Tumbler \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4000.00> \u001b[0m\u001b[34mTumbler: \u001b[0m\u001b[mSatellite Tumbler requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,855 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4000.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mSatellite Thrust-1 requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,855 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4000.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mSatellite Thrust-2 requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,856 \u001b[0m\u001b[mgym \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4000.00> \u001b[0m\u001b[mStep reward: {'Thrust-2': 0.02}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,857 \u001b[0m\u001b[mgym \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4000.00> \u001b[0m\u001b[93;1m=== STARTING STEP ===\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,858 \u001b[0m\u001b[34msats.satellite.Tumbler \u001b[0m\u001b[93mWARNING \u001b[0m\u001b[33m<4000.00> \u001b[0m\u001b[34mTumbler: \u001b[0m\u001b[93mRequires retasking but received no task.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,858 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4000.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mThrusting with inertial dV [0. 0. 0.] with 500.0 second drift.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,859 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4000.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[msetting timed terminal event at 4500.0\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,859 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4000.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mFSW action action_inspect_rso activated.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,860 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4000.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mThrusting with inertial dV [0.01 0. 0. ] with 500.0 second drift.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,860 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4000.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[msetting timed terminal event at 4500.0\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,860 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4000.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mFSW action action_inspect_rso activated.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,951 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mtimed termination at 4500.0 \u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:32,952 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mtimed termination at 4500.0 \u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:33,024 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mInspected 13 points this step\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:33,097 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mInspected 10 points this step\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:33,098 \u001b[0m\u001b[mdata.composition \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4500.00> \u001b[0m\u001b[mRSOInspectionReward reward: {'Thrust-1': 0.01, 'Thrust-2': 0.01}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:33,098 \u001b[0m\u001b[mdata.composition \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4500.00> \u001b[0m\u001b[mFuelPenalty reward: {'Thrust-2': -0.010000000000005116}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:33,099 \u001b[0m\u001b[mdata.base \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4500.00> \u001b[0m\u001b[mTotal reward: {'Thrust-1': 0.01, 'Thrust-2': -5.115699530655604e-15}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:33,099 \u001b[0m\u001b[34msats.satellite.Tumbler \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4500.00> \u001b[0m\u001b[34mTumbler: \u001b[0m\u001b[mSatellite Tumbler requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:33,099 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mSatellite Thrust-1 requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:33,099 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mSatellite Thrust-2 requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:33,101 \u001b[0m\u001b[mgym \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4500.00> \u001b[0m\u001b[mStep reward: {'Thrust-1': 0.01, 'Thrust-2': -5.115699530655604e-15}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:33,102 \u001b[0m\u001b[mgym \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4500.00> \u001b[0m\u001b[93;1m=== STARTING STEP ===\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:33,102 \u001b[0m\u001b[34msats.satellite.Tumbler \u001b[0m\u001b[93mWARNING \u001b[0m\u001b[33m<4500.00> \u001b[0m\u001b[34mTumbler: \u001b[0m\u001b[93mRequires retasking but received no task.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:33,102 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mThrusting with inertial dV [0. 0. 0.] with 500.0 second drift.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:33,103 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[msetting timed terminal event at 5000.0\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:33,103 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4500.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mFSW action action_inspect_rso activated.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:33,104 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mThrusting with inertial dV [0.01 0. 0. ] with 500.0 second drift.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:33,104 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[msetting timed terminal event at 5000.0\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:33,105 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<4500.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mFSW action action_inspect_rso activated.\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:33,194 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<5000.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mtimed termination at 5000.0 \u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:33,195 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<5000.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mtimed termination at 5000.0 \u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:33,267 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<5000.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mInspected 10 points this step\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:33,339 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<5000.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mInspected 12 points this step\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:33,340 \u001b[0m\u001b[mdata.composition \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<5000.00> \u001b[0m\u001b[mRSOInspectionReward reward: {'Thrust-2': 0.01}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:33,340 \u001b[0m\u001b[mdata.composition \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<5000.00> \u001b[0m\u001b[mFuelPenalty reward: {'Thrust-2': -0.010000000000005116}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:33,340 \u001b[0m\u001b[mdata.base \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<5000.00> \u001b[0m\u001b[mTotal reward: {'Thrust-2': -5.115699530655604e-15}\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:33,341 \u001b[0m\u001b[34msats.satellite.Tumbler \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<5000.00> \u001b[0m\u001b[34mTumbler: \u001b[0m\u001b[mSatellite Tumbler requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:33,341 \u001b[0m\u001b[36msats.satellite.Thrust-1 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<5000.00> \u001b[0m\u001b[36mThrust-1: \u001b[0m\u001b[mSatellite Thrust-1 requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:33,341 \u001b[0m\u001b[92msats.satellite.Thrust-2 \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<5000.00> \u001b[0m\u001b[92mThrust-2: \u001b[0m\u001b[mSatellite Thrust-2 requires retasking\u001b[0m\n", + "\u001b[90;3m2024-12-26 11:31:33,343 \u001b[0m\u001b[mgym \u001b[0m\u001b[mINFO \u001b[0m\u001b[33m<5000.00> \u001b[0m\u001b[mStep reward: {'Thrust-2': -5.115699530655604e-15}\u001b[0m\n" + ] + } + ], + "source": [ + "for _ in range(10):\n", + " actions = {\n", + " \"Thrust-1\": np.array([0.01 * np.round(np.random.rand()), 0, 0, 500]),\n", + " \"Thrust-2\": np.array([0.01 * np.round(np.random.rand()), 0, 0, 500]),\n", + " }\n", + " # for sat in env.satellites:\n", + " # if sat.requires_retasking:\n", + " # if isinstance(sat, TumbleSat):\n", + " # actions[sat.name] = 0\n", + " # else:\n", + " # actions[sat.name] = np.concatenate(\n", + " # (np.random.uniform(-20, 20, 3), np.random.uniform(0, 200, 1))\n", + " # )\n", + "\n", + " observation, reward, terminated, truncated, info = env.step(actions)\n", + "\n", + " # print(env.satellites[1].data_store.data.point_inspect_status)\n", + " # print(\"storage:\", observation[\"Thrust-1\"][\"sat_props\"][\"storage_level_fraction\"])\n", + "\n", + "# BN = MRP2C(observation[\"Tumbler\"][\"sat_props\"][\"sigma_BN\"])\n", + "\n", + "# fig, ax = plt.subplots(1, 1, subplot_kw=dict(projection=\"3d\"))\n", + "# for point, inspected in env.satellites[1].data_store.data.point_inspect_status.items():\n", + "# ax.scatter(*(BN.T @ point.r_PB_B), color=\"tab:green\" if inspected else \"tab:red\")\n", + "\n", + "# # ax.scatter(\n", + "# # *observation[\"Thrust-1\"][\"rel_props\"][\"r_DC_N\"], color=\"tab:blue\", marker=\"x\"\n", + "# # )\n", + "# ax.set_aspect(\"equal\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/pyproject.toml b/pyproject.toml index 8d4b2989..2eda19ff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,8 +28,8 @@ dependencies = [ ] [project.optional-dependencies] -docs = ["ipykernel", "ipywidgets", "nbdime", "nbsphinx", "sphinx-rtd-theme"] -rllib = ["dm_tree", "pyarrow", "ray[rllib]", "scikit-image", "torch", "typer"] +docs = ["ipykernel", "ipywidgets", "nbdime", "nbsphinx", "sphinx-rtd-theme", 'sphinxcontrib-youtube'] +rllib = ["dm_tree", "pyarrow", "ray[rllib]==2.35.0", "scikit-image", "torch", "typer"] [project.scripts] finish_install = "bsk_rl.finish_install:pck_install" diff --git a/src/bsk_rl/act/__init__.py b/src/bsk_rl/act/__init__.py index 6e49bec7..1240ec60 100644 --- a/src/bsk_rl/act/__init__.py +++ b/src/bsk_rl/act/__init__.py @@ -38,9 +38,25 @@ class MyActionSatellite(Satellite): +----------------------------+---------+-------------------------------------------------------------------------------------------------------+ | :class:`Scan` | 1 | Scan nadir, collecting data when pointing within a threshold. | +----------------------------+---------+-------------------------------------------------------------------------------------------------------+ + +Continuous Actions +------------------- + +Use :class:`ContinuousAction` for actions with a continuous action space. Currently, satellites +can only have a single continuous action in their ``action_spec``. + ++----------------------------+-------------+-------------------------------------------------------------------------------------------------------+ +| **Action** |**Dimension**| **Description** | ++----------------------------+-------------+-------------------------------------------------------------------------------------------------------+ +| :class:`MagicThrust` | 4 | Instantaneously change the satellite's velocity, and drift for some duration. | ++----------------------------+-------------+-------------------------------------------------------------------------------------------------------+ + + + """ from bsk_rl.act.actions import Action +from bsk_rl.act.continuous_action import ContinuousAction, MagicThrust from bsk_rl.act.discrete_actions import ( Charge, Desat, @@ -63,4 +79,6 @@ class MyActionSatellite(Satellite): "Downlink", "Image", "Scan", + "ContinuousAction", + "MagicThrust", ] diff --git a/src/bsk_rl/act/actions.py b/src/bsk_rl/act/actions.py index 8fd245c4..7ca5d73a 100644 --- a/src/bsk_rl/act/actions.py +++ b/src/bsk_rl/act/actions.py @@ -30,7 +30,6 @@ def select_action_builder(satellite: "Satellite") -> "ActionBuilder": class ActionBuilder(ABC): - def __init__(self, satellite: "Satellite") -> None: """Base class for all action builders. diff --git a/src/bsk_rl/act/continuous_action.py b/src/bsk_rl/act/continuous_action.py new file mode 100644 index 00000000..74d1af6a --- /dev/null +++ b/src/bsk_rl/act/continuous_action.py @@ -0,0 +1,133 @@ +"""Continuous actions set satellite behavior based on some continuous value.""" + +import logging +from abc import abstractmethod +from typing import TYPE_CHECKING, Optional + +import numpy as np +from gymnasium import spaces + +from bsk_rl.act.actions import Action, ActionBuilder + +if TYPE_CHECKING: # pragma: no cover + from bsk_rl.sats import Satellite + from bsk_rl.scene.targets import Target + +logger = logging.getLogger(__name__) + + +class ContinuousActionBuilder(ActionBuilder): + def __init__(self, satellite: "Satellite") -> None: + """Processes actions for a continuous action space. + + Args: + satellite: Satellite to create actions for. + """ + self.action_spec: list[ContinuousAction] + super().__init__(satellite) + assert len(self.action_spec) == 1, "Only one continuous action is supported." + + @property + def _action(self) -> "ContinuousAction": + return self.action_spec[0] + + @property + def action_space(self) -> spaces.Box: + """Continuous action space.""" + return self._action.space + + @property + def action_description(self) -> list[str]: + """Return a human-readable description of the continuous action space.""" + return self._action.action_description() + + def set_action(self, action: np.ndarray) -> None: + """Activate the action by setting the continuous value.""" + self._action.set_action(action) + + +class ContinuousAction(Action): + builder_type = ContinuousActionBuilder + + def __init__(self, name: str = "discrete_act") -> None: + """Base class for discrete, integer-indexable actions. + + Args: + name: Name of the action. + """ + super().__init__(name=name) + + @property + @abstractmethod + def space(self) -> spaces.Box: + """Return the action space.""" + pass + + @property + @abstractmethod + def action_description(self) -> list[str]: + """Return a description of the action space.""" + pass + + @abstractmethod + def set_action(self, action: np.ndarray) -> None: + """Activate an action by a continuous value.""" + pass + + +class MagicThrust(ContinuousAction): + # TODO set the fsw mode to carry out after action + def __init__( + self, + name: str = "thrust_act", + max_dv: float = float("inf"), + max_duration: float = float("inf"), + fsw_action: Optional[str] = None, + ) -> None: + """Instantaneously change the satellite's velocity, and drift for some duration. + + TODO: Support specifying frame of thrust. + + Args: + name: Name of the action. + max_dv: Maximum delta-V that can be applied. [m/s] + """ + super().__init__(name) + self.max_dv = max_dv + self.max_duration = max_duration + self.fsw_action = fsw_action + + @property + def space(self) -> spaces.Box: + """Return the action space.""" + return spaces.Box( + low=np.array( + [-self.max_dv, -self.max_dv, -self.max_dv, self.simulator.sim_rate] + ), + high=np.array([self.max_dv, self.max_dv, self.max_dv, self.max_duration]), + shape=(4,), + dtype=np.float32, + ) + + @property + def action_description(self) -> list[str]: + """Description of the continuous action space.""" + return ["dV_N_x", "dV_N_y", "dV_N_z", "duration"] + + def set_action(self, action: np.ndarray) -> None: + """Thrust the satellite with a given inertial delta-V and drift for some duration.""" + assert len(action) == 4, "Action must have 4 elements." + dv_N = action[0:3] + dt = action[3] + + self.satellite.log_info( + f"Thrusting with inertial dV {dv_N} with {dt} second drift." + ) + self.satellite.fsw.action_magic_thrust(dv_N) + self.satellite.update_timed_terminal_event( + self.satellite.simulator.sim_time + dt + ) + + # Activate the FSW action for the drift period + getattr(self.satellite.fsw, self.fsw_action)() + self.satellite.log_info(f"FSW action {self.fsw_action} activated.") diff --git a/src/bsk_rl/data/__init__.py b/src/bsk_rl/data/__init__.py index b86d2ab5..0195e6fd 100644 --- a/src/bsk_rl/data/__init__.py +++ b/src/bsk_rl/data/__init__.py @@ -74,11 +74,26 @@ ... ) +Multiple reward systems can be added to the environment by instead passing an iterable of +reward systems to the ``data`` field of the environment constructor: + +.. code-block:: python + + env = ConstellationTasking( + ..., + data=(ScanningTimeReward(), SomeOtherReward()), + ... + ) + +On the backend, this creates a :class:`~bsk_rl.data.composition.ComposedDataStore` that +handles the combination of multiple reward systems. """ from bsk_rl.data.base import GlobalReward +from bsk_rl.data.fuel_penalty import FuelPenalty from bsk_rl.data.nadir_data import ScanningTimeReward from bsk_rl.data.no_data import NoReward +from bsk_rl.data.rso_data import RSOInspectionReward from bsk_rl.data.unique_image_data import UniqueImageReward __doc_title__ = "Data & Reward" @@ -87,4 +102,6 @@ "NoReward", "UniqueImageReward", "ScanningTimeReward", + "RSOInspectionReward", + "FuelPenalty", ] diff --git a/src/bsk_rl/data/base.py b/src/bsk_rl/data/base.py index d9e9d609..d4986b0f 100644 --- a/src/bsk_rl/data/base.py +++ b/src/bsk_rl/data/base.py @@ -193,7 +193,7 @@ def reward(self, new_data_dict: dict[str, Data]) -> dict[str, float]: self.data += new_data nonzero_reward = {k: v for k, v in reward.items() if v != 0} - logger.info(f"Data reward: {nonzero_reward}") + logger.info(f"Total reward: {nonzero_reward}") return reward diff --git a/src/bsk_rl/data/composition.py b/src/bsk_rl/data/composition.py new file mode 100644 index 00000000..4f59bb73 --- /dev/null +++ b/src/bsk_rl/data/composition.py @@ -0,0 +1,236 @@ +"""Data composition classes.""" + +import logging +from typing import TYPE_CHECKING, Optional + +from bsk_rl.data.base import Data, DataStore, GlobalReward +from bsk_rl.sats import Satellite +from bsk_rl.scene.scenario import Scenario + +if TYPE_CHECKING: + from bsk_rl.sats import Satellite + +logger = logging.getLogger(__name__) + + +class ComposedData(Data): + """Data for composed data types.""" + + def __init__(self, *data: Data) -> None: + """Data for composed data types. + + Args: + data: Data types to compose. + """ + self.data = data + + def __add__(self, other: "ComposedData") -> "ComposedData": + """Combine two units of composed data. + + Args: + other: Another unit of composed data to combine with this one. + + Returns: + Combined unit of composed data. + """ + if len(self.data) == 0 and len(other.data) == 0: + data = [] + elif len(self.data) == 0: + data = [type(d)() + d for d in other.data] + elif len(other.data) == 0: + data = [d + type(d)() for d in self.data] + elif len(self.data) == len(other.data): + data = [d1 + d2 for d1, d2 in zip(self.data, other.data)] + else: + raise ValueError( + "ComposedData units must have the same number of data types." + ) + return ComposedData(*data) + + def __getattr__(self, name: str): + """Search for an attribute in the datas.""" + for data in self.data: + if hasattr(data, name): + return getattr(data, name) + raise AttributeError(f"No Data in ComposedData has attribute '{name}'") + + +class ComposedDataStore(DataStore): + data_type = ComposedData + + def pass_data(self) -> None: + """Pass data to the sub-datastores. + + :meta private: + """ + for ds, data in zip(self.datastores, self.data.data): + ds.data = data + + def __init__( + self, + satellite: "Satellite", + *datastore_types: type[DataStore], + initial_data: Optional[ComposedData] = None, + ): + """DataStore for composed data types. + + Args: + satellite: Satellite which data is being stored for. + datastore_types: DataStore types to compose. + initial_data: Initial data to start the store with. Usually comes from + :class:`~bsk_rl.data.GlobalReward.initial_data`. + """ + self.data: ComposedData + super().__init__(satellite, initial_data) + self.datastores = tuple([ds(satellite) for ds in datastore_types]) + self.pass_data() + + def __getattr__(self, name: str): + """Search for an attribute in the datastores.""" + for datastore in self.datastores: + if hasattr(datastore, name): + return getattr(datastore, name) + raise AttributeError( + f"No DataStore in ComposedDataStore has attribute '{name}'" + ) + + def get_log_state(self) -> list: + """Pull information used in determining current data contribution.""" + log_states = [ds.get_log_state() for ds in self.datastores] + return log_states + + def compare_log_states(self, prev_state: list, new_state: list) -> Data: + """Generate a unit of composed data based on previous step and current step logs.""" + data = [ + ds.compare_log_states(prev, new) + for ds, prev, new in zip(self.datastores, prev_state, new_state) + ] + return ComposedData(*data) + + def update_from_logs(self) -> Data: + """Update the data store based on collected information.""" + new_data = super().update_from_logs() + self.pass_data() + return new_data + + def update_with_communicated_data(self) -> None: + """Update the data store based on collected information from other satellites.""" + super().update_with_communicated_data() + self.pass_data() + + +class ComposedReward(GlobalReward): + datastore_type = ComposedDataStore + + def pass_data(self) -> Data: + """Pass data to the sub-rewarders. + + :meta private: + """ + for rewarder, data in zip(self.rewarders, self.data.data): + rewarder.data = data + + def __init__(self, *rewarders: GlobalReward) -> None: + """Rewarder for composed data types. + + This type can be automatically constructed by passing a tuple of rewarders to + the environment constructor's `reward` argument. + + Args: + rewarders: Global rewarders to compose. + """ + super().__init__() + self.rewarders = rewarders + + def __getattr__(self, name: str): + """Search for an attribute in the rewarders.""" + for rewarder in self.rewarders: + if hasattr(rewarder, name): + return getattr(rewarder, name) + raise AttributeError( + f"No GlobalReward in ComposedReward has attribute '{name}'" + ) + + def reset_pre_sim_init(self) -> None: + """Handle resetting for all rewarders.""" + super().reset_pre_sim_init() + for rewarder in self.rewarders: + rewarder.reset_pre_sim_init() + + def reset_during_sim_init(self) -> None: + """Handle resetting for all rewarders.""" + super().reset_during_sim_init() + for rewarder in self.rewarders: + rewarder.reset_during_sim_init() + + def reset_post_sim_init(self) -> None: + """Handle resetting for all rewarders.""" + super().reset_post_sim_init() + for rewarder in self.rewarders: + rewarder.reset_post_sim_init() + + def reset_overwrite_previous(self) -> None: + """Handle resetting for all rewarders.""" + super().reset_overwrite_previous() + for rewarder in self.rewarders: + rewarder.reset_overwrite_previous() + + def link_scenario(self, scenario: Scenario) -> None: + """Link the rewarder to the scenario.""" + super().link_scenario(scenario) + for rewarder in self.rewarders: + rewarder.link_scenario(scenario) + + def initial_data(self, satellite: Satellite) -> ComposedData: + """Furnsish the datastore with :class:`ComposedData`.""" + return ComposedData( + *[rewarder.initial_data(satellite) for rewarder in self.rewarders] + ) + + def create_data_store(self, satellite: Satellite) -> None: + """Create a :class:`CompositeDataStore` for a satellite.""" + # TODO support passing kwargs + satellite.data_store = ComposedDataStore( + satellite, + *[r.datastore_type for r in self.rewarders], + initial_data=self.initial_data(satellite), + ) + self.cum_reward[satellite.name] = 0.0 + for rewarder in self.rewarders: + rewarder.cum_reward[satellite.name] = 0.0 + + def calculate_reward( + self, new_data_dict: dict[str, ComposedData] + ) -> dict[str, float]: + """Calculate reward for each data type and combine them.""" + data_len = len(list(new_data_dict.values())[0].data) + + for data in new_data_dict.values(): + assert len(data.data) == data_len + + reward = {} + if data_len != 0: + for i, rewarder in enumerate(self.rewarders): + reward_i = rewarder.calculate_reward( + {sat_id: data.data[i] for sat_id, data in new_data_dict.items()} + ) + + # Logging + nonzero_reward = {k: v for k, v in reward_i.items() if v != 0} + if len(nonzero_reward) > 0: + logger.info(f"{type(rewarder).__name__} reward: {nonzero_reward}") + + for sat_id, sat_reward in reward_i.items(): + reward[sat_id] = reward.get(sat_id, 0.0) + sat_reward + rewarder.cum_reward[sat_id] += sat_reward + return reward + + def reward(self, new_data_dict: dict[str, ComposedData]) -> dict[str, float]: + """Return combined reward calculation and update data.""" + reward = super().reward(new_data_dict) + self.pass_data() + return reward + + +__doc_title__ = "Data Composition" +__all__ = ["ComposedReward", "ComposedDataStore", "ComposedData"] diff --git a/src/bsk_rl/data/fuel_penalty.py b/src/bsk_rl/data/fuel_penalty.py new file mode 100644 index 00000000..ad04b193 --- /dev/null +++ b/src/bsk_rl/data/fuel_penalty.py @@ -0,0 +1,76 @@ +import logging +from typing import TYPE_CHECKING, Callable, Optional + +import numpy as np + +from bsk_rl.data.base import Data, DataStore, GlobalReward + +if TYPE_CHECKING: + from bsk_rl.sats import Satellite + from bsk_rl.scene.targets import Target + +logger = logging.getLogger(__name__) + + +class FuelData(Data): + """Data for fuel usage.""" + + def __init__(self, fuel_used: float = 0.0) -> None: + """Construct fuel data. + + Args: + fuel_used: Amount of fuel used. + """ + self.fuel_used = fuel_used + + def __add__(self, other: "FuelData") -> "FuelData": + """Combine two units of fuel data. + + Args: + other: Another unit of fuel data to combine with this one. + + Returns: + Combined unit of fuel data. + """ + fuel_used = self.fuel_used + other.fuel_used + return FuelData(fuel_used) + + +class FuelDataStore(DataStore): + data_type = FuelData + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def get_log_state(self) -> float: + try: + return self.satellite.fsw.dv_available # TODO update for other fuel models + except AttributeError: + return 0.0 + + def compare_log_states(self, prev_state: np.ndarray, new_state: np.ndarray) -> Data: + fuel_used = prev_state - new_state + return FuelData(fuel_used) + + +class FuelPenalty(GlobalReward): + """Global penalty for fuel usage.""" + + datastore_type = FuelDataStore + + def __init__(self, penalty_weight: float = 1.0) -> None: + """Construct fuel penalty. + + Args: + penalty_weight: Scaling factor to apply to fuel penalty. + """ + super().__init__() + self.penalty_weight = penalty_weight + + def calculate_reward(self, new_data_dict: dict[str, FuelData]) -> dict[str, float]: + penalty = { + sat_name: -data.fuel_used * self.penalty_weight + for sat_name, data in new_data_dict.items() + } + + return penalty diff --git a/src/bsk_rl/data/rso_data.py b/src/bsk_rl/data/rso_data.py new file mode 100644 index 00000000..c368d149 --- /dev/null +++ b/src/bsk_rl/data/rso_data.py @@ -0,0 +1,202 @@ +"""Data system for recording RSO surface.""" + +import logging +from typing import TYPE_CHECKING, Optional + +import numpy as np + +from bsk_rl.data.base import Data, DataStore, GlobalReward +from bsk_rl.sats import Satellite +from bsk_rl.scene.rso_points import RSOPoint +from bsk_rl.sim.dyn import RSODynModel, RSOImagingDynModel +from bsk_rl.utils import vizard + +if TYPE_CHECKING: + from bsk_rl.sats import Satellite + +logger = logging.getLogger(__name__) + +RSO = "rso" +OBSERVER = "observer" + + +class RSOInspectionData(Data): + def __init__(self, point_inspect_status: Optional[dict[RSOPoint, bool]] = None): + if point_inspect_status is None: + point_inspect_status = {} + self.point_inspect_status = point_inspect_status + + def __add__(self, other: "RSOInspectionData"): + point_inspect_status = {} + point_inspect_status.update(self.point_inspect_status) + for point, access in other.point_inspect_status.items(): + if point not in point_inspect_status: + point_inspect_status[point] = access + else: + point_inspect_status[point] = point_inspect_status[point] or access + + return RSOInspectionData(point_inspect_status) + + +class RSOInspectionDataStore(DataStore): + data_type = RSOInspectionData + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.point_access_recorders = [] + self.storage_recorder = None + + if issubclass(self.satellite.dyn_type, RSOImagingDynModel): + self.role = OBSERVER + else: + self.role = RSO + + def set_storage_recorder(self, recorder): + self.storage_recorder = recorder + self.satellite.simulator.AddModelToTask( + self.satellite.dynamics.task_name, recorder, ModelPriority=1000 + ) + + def add_point_access_recorder(self, recorder): + self.point_access_recorders.append(recorder) + self.satellite.simulator.AddModelToTask( + self.satellite.dynamics.task_name, recorder, ModelPriority=1000 + ) + + def clear_recorders(self): + if self.storage_recorder: + self.storage_recorder.clear() + for recorder in self.point_access_recorders: + recorder.clear() + + def get_log_state(self) -> list[list[bool]]: + """Log the storage unit state and point access state for all times in the step. + + Returns: + todo + """ + if self.role == RSO: + return None + + log_len = len(self.storage_recorder.storageLevel) + if log_len <= 1: + imaging_req = np.zeros(log_len) + else: + imaging_req = np.diff(self.storage_recorder.storageLevel) + imaging_req = np.concatenate((imaging_req, [imaging_req[-1]])) + + inspected_logs = [] + for recorder in self.point_access_recorders: + inspected = np.logical_and(imaging_req, recorder.hasAccess) + inspected_logs.append(list(np.array(inspected))) + + self.clear_recorders() + + return inspected_logs + + def compare_log_states(self, _, inspected_logs) -> Data: + if self.role == RSO: + return RSOInspectionData() + + point_inspect_status = {} + for rso_point, log in zip( + self.data.point_inspect_status.keys(), inspected_logs + ): + if any(log): + point_inspect_status[rso_point] = True + + self.update_point_colors( + [ + rso_point + for rso_point in point_inspect_status + if point_inspect_status[rso_point] + ] + ) + + if len(point_inspect_status) > 0: + self.satellite.logger.info( + f"Inspected {len(point_inspect_status)} points this step" + ) + + return RSOInspectionData(point_inspect_status) + + @vizard.visualize + def update_point_colors(self, rso_points, vizInstance=None, vizSupport=None): + """Update target colors in Vizard.""" + for location in vizInstance.locations: + if location.stationName in [str(point) for point in rso_points]: + location.color = vizSupport.toRGBA255("tab:green", alpha=0.5) + + +class RSOInspectionReward(GlobalReward): + datastore_type = RSOInspectionDataStore + + def __init__( + self, inspection_reward_scale: float = 1.0, completion_bonus: float = 0.0 + ): + super().__init__() + self.completion_bonus = completion_bonus + self.inspection_reward_scale = inspection_reward_scale + + def reset_overwrite_previous(self) -> None: + super().reset_overwrite_previous() + self.bonus_reward_yielded = False + + def reset_post_sim_init(self) -> None: + super().reset_post_sim_init() + + for i, observer in enumerate(self.scenario.observers): + observer.data_store.set_storage_recorder( + observer.dynamics.storageUnit.storageUnitDataOutMsg.recorder() + ) + logger.debug( + f"Logging {len(self.scenario.rso.dynamics.rso_points)} access points" + ) + for rso_point_model in self.scenario.rso.dynamics.rso_points: + observer.data_store.add_point_access_recorder( + rso_point_model.accessOutMsgs[i].recorder() + ) + + def initial_data(self, satellite: Satellite) -> Data: + if not issubclass(satellite.dyn_type, RSOImagingDynModel): + return RSOInspectionData() + + return RSOInspectionData({point: False for point in self.scenario.rso_points}) + + def calculate_reward(self, new_data_dict: dict[str, Data]) -> dict[str, float]: + total_points = len(self.scenario.rso_points) + reward = {} + total_new_points = 0 + for satellite_id, data in new_data_dict.items(): + if len(data.point_inspect_status) == 0: + continue + + new_points = 0 + for point, access in data.point_inspect_status.items(): + if access and not self.data.point_inspect_status.get(point, False): + new_points += 1 + + if new_points > 0: + logger.info(f"{satellite_id} inspected {new_points} new points.") + + reward[satellite_id] = ( + new_points / total_points * self.inspection_reward_scale + ) + total_new_points += new_points + if ( + sum(self.data.point_inspect_status.values()) + total_new_points + == len(self.scenario.rso_points) + and not self.bonus_reward_yielded + ): + logger.info("All points inspected! Awarding completion bonus.") + for satellite_id in self.cum_reward: + reward[satellite_id] = ( + reward.get(satellite_id, 0.0) + self.completion_bonus + ) + self.bonus_reward_yielded = True + + return reward + + +__doc_title__ = "RSO Inspection" +__all__ = ["RSOInspectionReward", "RSOInspectionDataStore", "RSOInspectionData"] diff --git a/src/bsk_rl/data/unique_image_data.py b/src/bsk_rl/data/unique_image_data.py index ead45eb7..0697520f 100644 --- a/src/bsk_rl/data/unique_image_data.py +++ b/src/bsk_rl/data/unique_image_data.py @@ -6,6 +6,7 @@ import numpy as np from bsk_rl.data.base import Data, DataStore, GlobalReward +from bsk_rl.utils import vizard if TYPE_CHECKING: from bsk_rl.sats import Satellite @@ -106,8 +107,16 @@ def compare_log_states( imaged.append( [target for target in self.data.known if target.id == target_id][0] ) + self.update_target_colors(imaged) return UniqueImageData(imaged=imaged) + @vizard.visualize + def update_target_colors(self, targets, vizInstance=None, vizSupport=None): + """Update target colors in Vizard.""" + for location in vizInstance.locations: + if location.stationName in [target.name for target in targets]: + location.color = vizSupport.toRGBA255(self.satellite.vizard_color) + class UniqueImageReward(GlobalReward): """GlobalReward for rewarding unique images.""" diff --git a/src/bsk_rl/gym.py b/src/bsk_rl/gym.py index a2e88814..a71d5356 100644 --- a/src/bsk_rl/gym.py +++ b/src/bsk_rl/gym.py @@ -13,11 +13,12 @@ from bsk_rl.comm import CommunicationMethod, NoCommunication from bsk_rl.data import GlobalReward, NoReward +from bsk_rl.data.composition import ComposedReward from bsk_rl.sats import Satellite from bsk_rl.scene import Scenario from bsk_rl.sim import Simulator from bsk_rl.sim.world import WorldModel -from bsk_rl.utils import logging_config +from bsk_rl.utils import logging_config, vizard logger = logging.getLogger(__name__) @@ -36,7 +37,7 @@ def __init__( self, satellites: Union[Satellite, list[Satellite]], scenario: Optional[Scenario] = None, - rewarder: Optional[GlobalReward] = None, + rewarder: Optional[Union[GlobalReward, list[GlobalReward]]] = None, world_type: Optional[type[WorldModel]] = None, world_args: Optional[dict[str, Any]] = None, communicator: Optional[CommunicationMethod] = None, @@ -49,6 +50,8 @@ def __init__( generate_obs_retasking_only: bool = False, log_level: Union[int, str] = logging.WARNING, log_dir: Optional[str] = None, + vizard_dir: Optional[str] = None, + vizard_settings: Optional[dict[str, Any]] = None, render_mode=None, ) -> None: """A `Gymnasium `_ environment adaptable to a wide range satellite tasking problems. @@ -68,7 +71,8 @@ def __init__( scenario: Environment the satellite is acting in; contains information about targets, etc. See :ref:`bsk_rl.scene`. rewarder: Handles recording and rewarding for data collection towards - objectives. See :ref:`bsk_rl.data`. + objectives. Can be a single rewarder or a tuple of multiple rewarders. + See :ref:`bsk_rl.data`. communicator: Manages communication between satellites. See :ref:`bsk_rl.comm`. sat_arg_randomizer: For correlated randomization of satellites arguments. Should be a function that takes a list of satellites and returns a dictionary that @@ -91,10 +95,22 @@ def __init__( zeros. log_level: Logging level for the environment. Default is ``WARNING``. log_dir: Directory to write logs to in addition to the console. + vizard_dir: Path to save Vizard visualization files. If None, no Vizard-related + modules will be imported. + vizard_settings: Settings for Vizard visualization. Set in ``vizIstance.settings``. + Additionally, the key ``vizard_rate`` can be set to the rate at which Vizard updates. + Valid setting can be found `here `_. render_mode: Unused. """ self.seed = None self._configure_logging(log_level, log_dir) + if vizard_dir is not None: + vizard.VIZARD_PATH = vizard_dir + if vizard_settings is not None: + logger.warning( + "Vizard settings provided but Vizard is not enabled. Ignoring settings." + ) + self.vizard_settings = vizard_settings if vizard_settings is not None else {} if isinstance(satellites, Satellite): satellites = [satellites] @@ -125,8 +141,6 @@ def __init__( if scenario is None: scenario = Scenario() - if rewarder is None: - rewarder = NoReward() if world_type is None: world_type = self._minimum_world_model() @@ -137,7 +151,16 @@ def __init__( self.scenario = deepcopy(scenario) self.scenario.link_satellites(self.satellites) - self.rewarder = deepcopy(rewarder) + + rewarder = deepcopy(rewarder) + if rewarder is None: + rewarder = NoReward() + if ( + isinstance(rewarder, Iterable) + and not type(rewarder).__name__ == "MagicMock" + ): + rewarder = ComposedReward(*rewarder) + self.rewarder = rewarder self.rewarder.link_scenario(self.scenario) if communicator is None: @@ -250,8 +273,9 @@ def reset( self.scenario.reset_overwrite_previous() self.rewarder.reset_overwrite_previous() self.communicator.reset_overwrite_previous() - for satellite in self.satellites: + for i, satellite in enumerate(self.satellites): satellite.reset_overwrite_previous() + satellite.create_vizard_data(color=vizard.get_color(i)) self.latest_step_duration = 0.0 self._generate_world_args() @@ -280,6 +304,13 @@ def reset( max_step_duration=self.max_step_duration, time_limit=self.time_limit, ) + self.simulator.setup_vizard(**self.vizard_settings) + + self.scenario.reset_during_sim_init() + self.rewarder.reset_during_sim_init() + self.communicator.reset_during_sim_init() + + self.simulator.finish_init() self.scenario.reset_post_sim_init() self.rewarder.reset_post_sim_init() @@ -389,7 +420,10 @@ def _step(self, actions: MultiSatAct) -> None: raise ValueError("There must be the same number of actions and satellites") for satellite, action in zip(self.satellites, actions): satellite.info = [] # reset satellite info log - if action is not None and action != NO_ACTION: + if action is not None and ( + not isinstance(action, int) + or action != NO_ACTION # TODO improve for non-discrete actions + ): satellite.requires_retasking = False satellite.set_action(action) if not satellite.is_alive(): diff --git a/src/bsk_rl/obs/__init__.py b/src/bsk_rl/obs/__init__.py index cf2dc75d..98bb5594 100644 --- a/src/bsk_rl/obs/__init__.py +++ b/src/bsk_rl/obs/__init__.py @@ -40,6 +40,14 @@ class MyObservationSatellite(Satellite): SatProperties, Time, ) +from bsk_rl.obs.relative_observations import RelativeProperties __doc_title__ = "Observations" -__all__ = ["Observation", "SatProperties", "Time", "OpportunityProperties", "Eclipse"] +__all__ = [ + "Observation", + "SatProperties", + "Time", + "OpportunityProperties", + "Eclipse", + "RelativeProperties", +] diff --git a/src/bsk_rl/obs/observations.py b/src/bsk_rl/obs/observations.py index 43a48256..1417dfb0 100644 --- a/src/bsk_rl/obs/observations.py +++ b/src/bsk_rl/obs/observations.py @@ -48,7 +48,6 @@ def nested_obs_to_space(obs_dict): class ObservationBuilder: - def __init__(self, satellite: "Satellite", obs_type: type = np.ndarray) -> None: """Satellite subclass for composing observations. @@ -312,7 +311,6 @@ def _r_LB_H(sat, opp): class OpportunityProperties(Observation): - _fn_map = { "priority": lambda sat, opp: opp["object"].priority, "r_LP_P": lambda sat, opp: opp["r_LP_P"], diff --git a/src/bsk_rl/obs/relative_observations.py b/src/bsk_rl/obs/relative_observations.py new file mode 100644 index 00000000..2e4cc3d4 --- /dev/null +++ b/src/bsk_rl/obs/relative_observations.py @@ -0,0 +1,167 @@ +"""Relative properties between two satellites.""" + +from typing import Any + +import numpy as np + +from bsk_rl.obs import Observation +from bsk_rl.utils.orbital import rv2omega + + +def r_DC_N(deputy, chief): + """Relative position of the deputy satellite to the chief satellite in inertial frame.""" + return np.array(deputy.dynamics.r_BN_N) - np.array(chief.dynamics.r_BN_N) + + +def r_DC_C(deputy, chief): + """Relative position of the deputy satellite to the chief satellite in chief body frame.""" + pass + + +def r_DC_D(deputy, chief): + """Relative position of the deputy satellite to the chief satellite in deputy body frame.""" + pass + + +def r_DC_Hc(deputy, chief): + """Relative position of the deputy satellite to the chief satellite in chief Hill frame.""" + HcN = chief.dynamics.HN + return HcN @ r_DC_N(deputy, chief) + + +def r_DC_Hd(deputy, chief): + """Relative position of the deputy satellite to the chief satellite in deputy Hill frame.""" + pass + + +def v_DC_N(deputy, chief): + """Relative velocity of the deputy satellite to the chief satellite in inertial frame.""" + return np.array(deputy.dynamics.v_BN_N) - np.array(chief.dynamics.v_BN_N) + + +def v_DC_C(deputy, chief): + """Relative velocity of the deputy satellite to the chief satellite in chief body frame.""" + pass + + +def v_DC_D(deputy, chief): + """Relative velocity of the deputy satellite to the chief satellite in deputy body frame.""" + pass + + +def v_DC_Hc(deputy, chief): + """Relative velocity of the deputy satellite to the chief satellite in chief Hill frame.""" + HcN = chief.dynamics.HN + omega_HcN_N = rv2omega(chief.dynamics.r_BN_N, chief.dynamics.v_BN_N) + return HcN @ (v_DC_N(deputy, chief) - np.cross(omega_HcN_N, r_DC_N(deputy, chief))) + + +def v_DC_Hd(deputy, chief): + """Relative velocity of the deputy satellite to the chief satellite in deputy Hill frame.""" + pass + + +def sigma_DC(deputy, chief): + """Relative attitude of the deputy satellite to the chief satellite.""" + pass + + +def sigma_DHc(deputy, chief): + """Relative attitude of the deputy satellite to the chief satellite in chief Hill frame.""" + pass + + +def sigma_HdC(deputy, chief): + """Relative attitude of the deputy satellite Hill frame to the chief satellite.""" + pass + + +def sigma_HdHc(deputy, chief): + """Relative attitude of the deputy satellite Hill frame to the chief satellite Hill frame.""" + pass + + +# TODO what omegas do we need? Could probably make some thing that generates these and +# the other relative properties (i.e. whether to use body or hill frame for each sat, what +# frame to express in) + + +class RelativeProperties(Observation): + """Add arbitrary properties relative to some other satellite.""" + + def __init__( + self, *rel_properties: dict[str, Any], chief_name: str, name="rel_props" + ) -> None: + """Include properties relative to another satellite. + + .. code-block:: python + + obs.RelativeProperties( + dict(prop="r_DC_N", norm=1e3), + dict(prop="v_DC_N", norm=1e3), + chief_name="ChiefSat", + ), + + Args: + rel_properties: Property specifications. Properties are optionally + normalized by some factor. Each observation is a dictionary with the keys: + + * ``prop``: Name of function in :class:`~bsk_rl.obs.relative_observations`. + * ``norm`` `optional`: Value to normalize property by. Defaults to 1.0. + * ``name`` `optional`: Name of the observation element. Defaults to the value of ``prop``. + * ``fn`` `optional`: Alternatively, call a function that takes the deputy (self) and chief (other) + as arguments. + chief_name: Name of the satellite to compare against. + name: Name of the observation. + + """ + super().__init__(name=name) + + for rel_property in rel_properties: + for key in rel_property: + if key not in ["prop", "norm", "name", "fn"]: + raise ValueError(f"Invalid property key: {key}") + if "norm" not in rel_property: + rel_property["norm"] = 1.0 + if "fn" not in rel_property: + try: + rel_property["fn"] = globals()[rel_property["prop"]] + except KeyError: + raise ValueError( + f"Property prop={rel_property['prop']} is not predefined and no `fn` was provided." + ) + if "name" not in rel_property: + rel_property["name"] = rel_property["prop"] + if rel_property["norm"] != 1.0: + rel_property["name"] += "_normd" + + self.rel_properties = rel_properties + self.chief_name = chief_name + + def reset_post_sim_init(self) -> None: + """Connect to the chief satellite. + + :meta private: + """ + try: + self.chief = [ + sat + for sat in self.satellite.simulator.satellites + if sat.name == self.chief_name + ][0] + except IndexError: + raise ValueError(f"Chief satellite {self.chief_name} not found") + + def get_obs(self) -> dict[str, Any]: + """Return the observation. + + :meta private: + """ + obs = {} + for rel_property in self.rel_properties: + value = rel_property["fn"](self.satellite, self.chief) + if isinstance(value, list): + value = np.array(value) + norm = rel_property["norm"] + obs[rel_property["name"]] = value / norm + return obs diff --git a/src/bsk_rl/sats/access_satellite.py b/src/bsk_rl/sats/access_satellite.py index ef19b0bf..3ecd3c03 100644 --- a/src/bsk_rl/sats/access_satellite.py +++ b/src/bsk_rl/sats/access_satellite.py @@ -11,6 +11,7 @@ from bsk_rl.sats.satellite import Satellite from bsk_rl.scene.targets import Target from bsk_rl.sim import dyn, fsw +from bsk_rl.utils import vizard from bsk_rl.utils.functional import valid_func_name from bsk_rl.utils.orbital import elevation @@ -583,6 +584,7 @@ def _update_image_event(self, target: "Target") -> None: self._info_command(f"imaged {target}"), self._satellite_command + ".imaged += 1", self._satellite_command + ".requires_retasking = True", + self._satellite_command + ".remove_imaging_line()", ], terminal=self.variable_interval, ) @@ -640,7 +642,10 @@ def enable_target_window(self, target: "Target"): self.update_timed_terminal_event( next_window[1], info=f"for {target} window", - extra_actions=[self._satellite_command + ".missed += 1"], + extra_actions=[ + self._satellite_command + ".missed += 1", + self._satellite_command + ".remove_imaging_line()", + ], ) def task_target_for_imaging(self, target: "Target"): @@ -653,3 +658,27 @@ def task_target_for_imaging(self, target: "Target"): self.logger.info(msg) self.fsw.action_image(target.r_LP_P, target.id) self.enable_target_window(target) + self.draw_imaging_line(target) + + @vizard.visualize + def draw_imaging_line( + self, target: "Target", vizSupport=None, vizInstance=None + ) -> None: + """Draw a line from the satellite to the target in vizard.""" + if not hasattr(self, "target_line"): + vizSupport.createTargetLine( + vizInstance, + fromBodyName=self.name, + toBodyName=target.name, + lineColor=self.vizard_color, + ) + self.target_line = vizSupport.targetLineList[-1] + self.target_line.toBodyName = target.name + vizSupport.updateTargetLineList(vizInstance) + + @vizard.visualize + def remove_imaging_line(self, vizSupport=None, vizInstance=None): + """Remove the imaging line from Vizard.""" + if hasattr(self, "target_line"): + self.target_line.toBodyName = self.name + vizSupport.updateTargetLineList(vizInstance) diff --git a/src/bsk_rl/sats/satellite.py b/src/bsk_rl/sats/satellite.py index ea482d11..cc31d577 100644 --- a/src/bsk_rl/sats/satellite.py +++ b/src/bsk_rl/sats/satellite.py @@ -14,6 +14,7 @@ from bsk_rl.act.actions import select_action_builder from bsk_rl.obs.observations import ObservationBuilder from bsk_rl.sim import dyn, fsw +from bsk_rl.utils import vizard from bsk_rl.utils.functional import ( AbstractClassProperty, Resetable, @@ -123,7 +124,7 @@ def generate_sat_args(self, **kwargs) -> None: for k, v in kwargs.items(): if k not in self.sat_args: raise KeyError(f"{k} not a valid key for sat_args") - if self.sat_args[k] != v: + if np.any(self.sat_args[k] != v): self.logger.debug( f"Overwriting {k}={self.sat_args[k]} in sat_args with {v}" ) @@ -138,6 +139,14 @@ def reset_overwrite_previous(self) -> None: self._is_alive = True self.time_of_death = None + @vizard.visualize + def create_vizard_data(self, color, vizSupport=None) -> None: + """Create a location to store data to be passed to enableUnityVisualization.""" + self.vizard_color = color + self.vizard_data = dict( + spriteList=vizSupport.setSprite("SQUARE", color=color), + ) + def reset_pre_sim_init(self) -> None: """Called during environment reset, before Basilisk simulation initialization.""" self.trajectory = TrajectorySimulator( diff --git a/src/bsk_rl/scene/__init__.py b/src/bsk_rl/scene/__init__.py index 47bb17d2..e0d8e5ad 100644 --- a/src/bsk_rl/scene/__init__.py +++ b/src/bsk_rl/scene/__init__.py @@ -9,6 +9,9 @@ """ from bsk_rl.scene.scenario import Scenario, UniformNadirScanning + +pass # Other imports must come after Scenario +from bsk_rl.scene.rso_points import FibonacciSphereRSOPoints, RSOPoints from bsk_rl.scene.targets import CityTargets, UniformTargets __doc_title__ = "Scenario" @@ -17,4 +20,6 @@ "UniformTargets", "CityTargets", "UniformNadirScanning", + "RSOPoints", + "FibonacciSphereRSOPoints", ] diff --git a/src/bsk_rl/scene/rso_points.py b/src/bsk_rl/scene/rso_points.py new file mode 100644 index 00000000..ac776e03 --- /dev/null +++ b/src/bsk_rl/scene/rso_points.py @@ -0,0 +1,149 @@ +"""TODO: Add docstring.""" + +import logging +from abc import abstractmethod +from dataclasses import dataclass +from typing import TYPE_CHECKING + +import numpy as np +import pandas as pd + +from bsk_rl.scene import Scenario +from bsk_rl.sim.dyn import RSODynModel, RSOImagingDynModel +from bsk_rl.sim.fsw import RSOImagingFSWModel +from bsk_rl.utils import vizard + +if TYPE_CHECKING: # pragma: no cover + from bsk_rl.data.base import Data + from bsk_rl.sats import Satellite + +logger = logging.getLogger(__name__) + + +@dataclass +class RSOPoint: + r_PB_B: np.ndarray + n_B: np.ndarray + theta_min: float + range: float + + def __hash__(self) -> int: + """Hash target by unique id.""" + return hash(id(self)) # THIS IS ALMOST CERTAINLY A BAD IDEA + + def __str__(self) -> str: + return f"RSOPoint_{self.r_PB_B}" + + +class RSOPoints(Scenario): + def reset_overwrite_previous(self) -> None: + """Overwrite target list from previous episode.""" + self.rso_points = [] + + def reset_pre_sim_init(self) -> None: + self.rso_points = self.generate_points() + + # Check for RSOs and observers + rsos = [sat for sat in self.satellites if issubclass(sat.dyn_type, RSODynModel)] + if len(rsos) == 0: + logger.warning("No RSODynModel satellites found in scenario.") + return + assert len(rsos) == 1, "Only one RSODynModel satellite is supported." + self.rso = rsos[0] + + self.observers = [ + sat + for sat in self.satellites + if issubclass(sat.dyn_type, RSOImagingDynModel) + ] + if len(self.observers) == 0: + logger.warning("No RSOImagingDynModel satellites found in scenario.") + return + + return super().reset_pre_sim_init() + + def reset_during_sim_init(self) -> None: + # Add points to dynamics and fsw of RSO + assert isinstance(self.rso.dynamics, RSODynModel) + logger.debug("Adding inspection points to RSO and observers") + for point in self.rso_points: + rso_point_model = self.rso.dynamics.add_rso_point( + point.r_PB_B, point.n_B, point.theta_min, point.range + ) + # Add point to each observer + for observer in self.observers: + assert isinstance(observer.dynamics, RSOImagingDynModel) + assert isinstance(observer.fsw, RSOImagingFSWModel) + observer.dynamics.add_rso_point(rso_point_model) + + self.visualize_rso_point(point) + + logger.debug("Targeting RSO with observers") + for observer in self.observers: + observer.fsw.set_target_rso(self.rso) + + @vizard.visualize + def visualize_rso_point(self, rso_point, vizSupport=None, vizInstance=None): + """Visualize target in Vizard.""" + vizSupport.addLocation( + vizInstance, + stationName=str(rso_point), + parentBodyName=self.rso.name, + r_GP_P=list(rso_point.r_PB_B), + gHat_P=list(rso_point.n_B), + fieldOfView=rso_point.theta_min, + color=vizSupport.toRGBA255("tab:red", alpha=0.5), + range=float(rso_point.range), + ) + vizInstance.settings.showLocationCones = -1 + vizInstance.settings.showLocationCommLines = -1 + vizInstance.settings.showLocationLabels = -1 + + @abstractmethod + def generate_points(self) -> list[RSOPoint]: + pass + + +class FibonacciSphereRSOPoints(RSOPoints): + def __init__( + self, + n_points: int = 100, + radius: float = 1.0, + theta_min: float = np.radians(45), + range: float = -1, + # incidence_min: float = np.radians(60), # TODO handle + ): + self.n_points = n_points + self.radius = radius + self.theta_min = theta_min + self.range = range + # self.incidence_min = incidence_min + + def generate_points(self) -> list[RSOPoint]: + points = [] + + # https://gist.github.com/Seanmatthews/a51ac697db1a4f58a6bca7996d75f68c + ga = (3 - np.sqrt(5)) * np.pi # golden angle + theta = ga * np.arange(self.n_points) + z = np.linspace(1 / self.n_points - 1, 1 - 1 / self.n_points, self.n_points) + radius = np.sqrt(1 - z * z) + y = radius * np.sin(theta) + x = radius * np.cos(theta) + + for i in range(self.n_points): + r_PB_B = np.array([x[i], y[i], z[i]]) * self.radius + n_B = np.array([x[i], y[i], z[i]]) + points.append( + RSOPoint( + r_PB_B, + n_B, + self.theta_min, + self.range, + ) + ) + + return points + + +__doc_title__ = "RSO Scenarios" +__all__ = ["RSOPoints"] diff --git a/src/bsk_rl/scene/targets.py b/src/bsk_rl/scene/targets.py index 77a62ccd..ccf4e0bd 100644 --- a/src/bsk_rl/scene/targets.py +++ b/src/bsk_rl/scene/targets.py @@ -15,6 +15,7 @@ from Basilisk.utilities import orbitalMotion from bsk_rl.scene import Scenario +from bsk_rl.utils import vizard from bsk_rl.utils.orbital import lla2ecef if TYPE_CHECKING: # pragma: no cover @@ -115,6 +116,27 @@ def reset_pre_sim_init(self) -> None: type="target", ) + def reset_during_sim_init(self) -> None: + """Visualize targets in Vizard on reset.""" + for target in self.targets: + self.visualize_target(target) + + @vizard.visualize + def visualize_target(self, target, vizSupport=None, vizInstance=None): + """Visualize target in Vizard.""" + vizSupport.addLocation( + vizInstance, + stationName=target.name, + parentBodyName="earth", + r_GP_P=list(target.r_LP_P), + fieldOfView=np.arctan(500 / 800), + color=vizSupport.toRGBA255("white"), + range=1000.0 * 1000, # meters + ) + vizInstance.settings.showLocationCones = -1 + vizInstance.settings.showLocationCommLines = -1 + vizInstance.settings.showLocationLabels = -1 + def regenerate_targets(self) -> None: """Regenerate targets uniformly. @@ -188,7 +210,7 @@ def regenerate_targets(self) -> None: location *= self.radius self.targets.append( Target( - name=city["city"].replace("'", ""), + name=f"{city['city']}, {city['iso2']}".replace("'", ""), r_LP_P=location, priority=self.priority_distribution(), ) diff --git a/src/bsk_rl/sim/dyn.py b/src/bsk_rl/sim/dyn.py index 479027ee..28789db4 100644 --- a/src/bsk_rl/sim/dyn.py +++ b/src/bsk_rl/sim/dyn.py @@ -70,6 +70,7 @@ aliveness_checker, check_aliveness_checkers, default_args, + valid_func_name, ) from bsk_rl.utils.orbital import random_orbit, rv2HN, rv2omega @@ -211,6 +212,11 @@ def BP(self): """Body relative to planet freame rotation matrix.""" return np.matmul(self.BN, self.world.PN.T) + @property + def HN(self): + """Hill frame relative to inertial frame rotation matrix.""" + return rv2HN(self.r_BN_N, self.v_BN_N) + @property def r_BN_N(self): """Body position relative to inertial origin in inertial frame [m].""" @@ -338,7 +344,7 @@ def setup_spacecraft_hub( raise (KeyError("Orbit is overspecified. Provide either (rN, vN) or oe")) self.scObject = spacecraft.Spacecraft() - self.scObject.ModelTag = "sat-" + self.satellite.name + self.scObject.ModelTag = self.satellite.name Ixx = 1.0 / 12.0 * mass * (width**2.0 + depth**2.0) Iyy = 1.0 / 12.0 * mass * (depth**2.0 + height**2.0) @@ -1185,6 +1191,129 @@ def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) +class ConjunctionDynModel(BasicDynamicsModel): + """For evaluating conjunctions between satellites.""" + + def __init__(self, *args, **kwargs) -> None: + """For evaluating conjunctions between satellites.""" + super().__init__(*args, **kwargs) + self.conjunctions = [] + + def _setup_dynamics_objects(self, **kwargs) -> None: + super()._setup_dynamics_objects(**kwargs) + self.setup_conjunctions(**kwargs) + + @aliveness_checker + def conjunction_valid(self) -> bool: + """Check if conjunction has not occured.""" + return len(self.conjunctions) == 0 + + @default_args(conjunction_radius=10) + def setup_conjunctions(self, conjunction_radius: float, **kwargs) -> None: + """Set up conjunction checking between satellites. + + Args: + conjunction_radius: [m] Minimum distance for a conjunction. + kwargs: Passed to other setup functions. + """ + self.conjunction_radius = conjunction_radius + + for sat_dyn in self.simulator.dynamics_list.values(): + if sat_dyn != self and isinstance(sat_dyn, ConjunctionDynModel): + self.simulator.createNewEvent( + valid_func_name( + f"conjunction_{self.satellite.name}_{sat_dyn.satellite.name}" + ), + macros.sec2nano(self.simulator.sim_rate), + True, + [ + f"np.linalg.norm(np.array({self.satellite._satellite_command}.dynamics.r_BN_N) - np.array({sat_dyn.satellite._satellite_command}.dynamics.r_BN_N))" + + " <= " + + f"{self.satellite._satellite_command}.dynamics.conjunction_radius + {sat_dyn.satellite._satellite_command}.dynamics.conjunction_radius" + ], + [ + self.satellite._info_command( + f"collided with {sat_dyn.satellite.name}" + ), + sat_dyn.satellite._info_command( + f"collided with {self.satellite.name}" + ), + f"{self.satellite._satellite_command}.dynamics.conjunctions.append({sat_dyn.satellite._satellite_command})", + f"{sat_dyn.satellite._satellite_command}.dynamics.conjunctions.append({self.satellite._satellite_command})", + ], + terminal=True, + ) + + +class RSODynModel(BasicDynamicsModel): + """For an RSO with points targets for observation.""" + + def __init__(self, *args, **kwargs) -> None: + """Allow for body fixed inspection points to be added to a spacecraft. + + Works with :class:`~bsk_rl.sats.RSOImagingSatellite`. + """ + super().__init__(*args, **kwargs) + + def _setup_dynamics_objects(self, **kwargs) -> None: + super()._setup_dynamics_objects(**kwargs) + + rso_dyn_proc_name = "RSODynProcess" + self.rso_dyn_proc = self.simulator.CreateNewProcess(rso_dyn_proc_name, 1) + self.rso_task_name = "RSODynTask" + self.rso_dyn_proc.addTask( + self.simulator.CreateNewTask( + self.rso_task_name, macros.sec2nano(self.dyn_rate) + ) + ) + + self.rso_points = [] + + def add_rso_point(self, r_LB_B, aHat_B, theta, range): + rso_point_model = spacecraftLocation.SpacecraftLocation() + rso_point_model.primaryScStateInMsg.subscribeTo(self.scObject.scStateOutMsg) + + rso_point_model.planetInMsg.subscribeTo( + self.world.gravFactory.spiceObject.planetStateOutMsgs[self.world.body_index] + ) + rso_point_model.rEquator = self.simulator.world.planet.radEquator + rso_point_model.rPolar = self.simulator.world.planet.radEquator * 0.98 + + rso_point_model.sunInMsg.subscribeTo( + self.world.gravFactory.spiceObject.planetStateOutMsgs[self.world.sun_index] + ) + rso_point_model.eclipseInMsg.subscribeTo( + self.world.eclipseObject.eclipseOutMsgs[self.eclipse_index] + ) + + rso_point_model.r_LB_B = r_LB_B + rso_point_model.aHat_B = aHat_B + rso_point_model.theta = theta + rso_point_model.theta_solar = 0.9 * np.pi / 2 + rso_point_model.shadow_factor_limit = 0.1 + rso_point_model.maximumRange = range + self.simulator.AddModelToTask( + self.rso_task_name, rso_point_model, ModelPriority=1 + ) + + self.rso_points.append(rso_point_model) + return rso_point_model + + +class RSOImagingDynModel(ContinuousImagingDynModel): + """For a satellite observing points on an RSO.""" + + def __init__(self, *args, **kwargs) -> None: + """Allow a satellite to observe points in a RSO. + + Works with :class:`~bsk_rl.sats.RSOImagingSatellite`. + """ + super().__init__(*args, **kwargs) + + def add_rso_point(self, rso_point_model): + rso_point_model.addSpacecraftToModel(self.scObject.scStateOutMsg) + + __doc_title__ = "Dynamics Sims" __all__ = [ "DynamicsModel", @@ -1193,5 +1322,8 @@ def __init__(self, *args, **kwargs) -> None: "ImagingDynModel", "ContinuousImagingDynModel", "GroundStationDynModel", + "ConjunctionDynModel", "FullFeaturedDynModel", + "RSODynModel", + "RSOImagingDynModel", ] diff --git a/src/bsk_rl/sim/fsw.py b/src/bsk_rl/sim/fsw.py index 58a36777..147e421f 100644 --- a/src/bsk_rl/sim/fsw.py +++ b/src/bsk_rl/sim/fsw.py @@ -55,8 +55,10 @@ from Basilisk.utilities import macros as mc from bsk_rl.sim import dyn +from bsk_rl.utils import vizard from bsk_rl.utils.functional import ( AbstractClassProperty, + aliveness_checker, check_aliveness_checkers, default_args, ) @@ -70,7 +72,7 @@ def action( - func: Callable[..., None] + func: Callable[..., None], ) -> Callable[Callable[..., None], Callable[..., None]]: """Decorator to reset the satellite software before executing an action. @@ -635,6 +637,12 @@ def c_hat_P(self): c_hat_B = self.locPoint.pHat_B return np.matmul(self.dynamics.BP.T, c_hat_B) + @property + def c_hat_N(self): + """Instrument pointing direction in the inertial frame.""" + c_hat_B = self.locPoint.pHat_B + return np.matmul(self.dynamics.BN.T, c_hat_B) + @property def c_hat_H(self): """Instrument pointing direction in the hill frame.""" @@ -674,6 +682,7 @@ def _create_module_data(self) -> None: def _setup_fsw_objects(self, **kwargs) -> None: self.setup_location_pointing(**kwargs) self.setup_instrument_controller(**kwargs) + self.show_sensor() @default_args(inst_pHat_B=[0, 0, 1]) def setup_location_pointing( @@ -734,6 +743,21 @@ def setup_instrument_controller( self._add_model_to_task(self.insControl, priority=987) + @vizard.visualize + def show_sensor(self, vizInterface=None, vizSupport=None): + """Visualize the sensor in Vizard.""" + genericSensor = vizInterface.GenericSensor() + genericSensor.normalVector = self.locPoint.pHat_B + genericSensor.r_SB_B = [0.0, 0.0, 0.0] + genericSensor.fieldOfView.push_back(4 * self.insControl.attErrTolerance) + genericSensor.color = vizInterface.IntVector( + vizSupport.toRGBA255(self.fsw.satellite.vizard_color, alpha=0.5) + ) + cmdInMsg = messaging.DeviceCmdMsgReader() + cmdInMsg.subscribeTo(self.insControl.deviceCmdOutMsg) + genericSensor.genericSensorCmdInMsg = cmdInMsg + self.fsw.satellite.vizard_data["genericSensorList"] = [genericSensor] + def reset_for_action(self) -> None: """Reset pointing controller.""" self.fsw.dynamics.imagingTarget.Reset(self.fsw.simulator.sim_time_ns) @@ -988,6 +1012,140 @@ def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) +class MagicOrbitalManeuverFSWModel(BasicFSWModel): + """Model that allows for instantaneous Delta V maneuvers.""" + + def __init__(self, *args, **kwargs) -> None: + """Model that allows for instantaneous Delta V maneuvers.""" + super().__init__(*args, **kwargs) + self.setup_fuel(**kwargs) + self.thrust_count = 0 + + @property + def dv_available(self): + """Delta-V available for the satellite.""" + return self._dv_available + + @aliveness_checker + def fuel_remaining(self) -> bool: + """Check if the satellite has fuel remaining.""" + return self.dv_available > 0 + + @default_args(dv_available_init=100.0) + def setup_fuel(self, dv_available_init: float, **kwargs): + """Set up available fuel for the satellite. + + # TODO: may adjust names for consistency with modelled fuel take in future. + + Args: + dv_available_init: [m/s] Initial fuel level. + kwargs: Passed to other setup functions. + """ + self._dv_available = dv_available_init + + @action + def action_magic_thrust(self, dv_N: np.ndarray) -> None: + """Thrust relative to the inertial frame. + + Args: + dv_N: [m/s] Inertial Delta V. + """ + if np.linalg.norm(dv_N) > self.dv_available: + self.satellite.logger.warning( + f"Maneuver exceeds available Delta V ({np.linalg.norm(dv_N)}/{self.dv_available} m/s)." + ) + + self._dv_available -= np.linalg.norm(dv_N) + + self.dynamics.scObject.dynManager.getStateObject( + self.dynamics.scObject.hub.nameOfHubVelocity + ).setState(list(np.array(self.dynamics.v_BN_N) + np.array(dv_N))) + + self.thrust_count += 1 + + +class RSOImagingFSWModel(ContinuousImagingFSWModel): + def set_target_rso(self, rso: "Satellite") -> None: + self.locPoint.scTargetInMsg.subscribeTo( + rso.dynamics.simpleNavObject.transOutMsg + ) + + class LocPointTask(ContinuousImagingFSWModel.LocPointTask): + """Task to point at the RSO and trigger the instrument.""" + + def __init__(self, *args, **kwargs) -> None: + """Task to point at the RSO and trigger the instrument.""" + super().__init__(*args, **kwargs) + + @default_args(inst_pHat_B=[0, 0, 1]) # TODO + def setup_location_pointing( + self, inst_pHat_B: Iterable[float], **kwargs + ) -> None: + """Set the location pointing guidance module to point at the RSO. + + ``set_target_rso`` must be called externally to connect the RSO to the pointing module. + + Args: + inst_pHat_B: Instrument pointing direction. + kwargs: Passed to other setup functions. + """ + self.locPoint.pHat_B = inst_pHat_B + self.locPoint.scAttInMsg.subscribeTo( + self.fsw.dynamics.simpleNavObject.attOutMsg + ) + self.locPoint.scTransInMsg.subscribeTo( + self.fsw.dynamics.simpleNavObject.transOutMsg + ) + self.locPoint.useBoresightRateDamping = 1 + messaging.AttGuidMsg_C_addAuthor( + self.locPoint.attGuidOutMsg, self.fsw.attGuidMsg + ) + + self._add_model_to_task(self.locPoint, priority=1198) + + @default_args(imageAttErrorRequirement=0.01, imageRateErrorRequirement=None) + def setup_instrument_controller( # TODO + self, + imageAttErrorRequirement: float, + imageRateErrorRequirement: float, + **kwargs, + ) -> None: + """Set the instrument controller parameters for scanning. + + As long as these two conditions are met, scanning will occur continuously. + + Args: + imageAttErrorRequirement: [MRP norm] Pointing attitude error tolerance + for imaging. + imageRateErrorRequirement: [rad/s] Rate tolerance for imaging. Disable + with None. + kwargs: Passed to other setup functions. + """ + self.insControl.attErrTolerance = imageAttErrorRequirement + if imageRateErrorRequirement is not None: + self.insControl.useRateTolerance = 1 + self.insControl.rateErrTolerance = imageRateErrorRequirement + self.insControl.attGuidInMsg.subscribeTo(self.fsw.attGuidMsg) + # Only use this module to check for pointing requirements + self.access_msg = messaging.AccessMsg() + payload = messaging.AccessMsgPayload() + payload.hasAccess = 1 + self.access_msg.write(payload) + self.insControl.accessInMsg.subscribeTo(self.access_msg) + + self._add_model_to_task(self.insControl, priority=987) + + @action + def action_inspect_rso(self) -> None: + self.dynamics.instrument.nodeStatusInMsg.subscribeTo( + self.insControl.deviceCmdOutMsg + ) + self.insControl.controllerStatus = 1 + self.dynamics.instrumentPowerSink.powerStatus = 1 + self.dynamics.instrument.nodeDataName = "inspect_rso" + self.simulator.enableTask(self.LocPointTask.name + self.satellite.name) + + __doc_title__ = "FSW Sims" __all__ = [ "action", @@ -996,4 +1154,5 @@ def __init__(self, *args, **kwargs) -> None: "ContinuousImagingFSWModel", "SteeringFSWModel", "SteeringImagerFSWModel", + "MagicOrbitalManeuverFSWModel", ] diff --git a/src/bsk_rl/sim/simulator.py b/src/bsk_rl/sim/simulator.py index 09d5834c..02dce636 100644 --- a/src/bsk_rl/sim/simulator.py +++ b/src/bsk_rl/sim/simulator.py @@ -1,11 +1,16 @@ """Extended Basilisk SimBaseClass for GeneralSatelliteTasking environments.""" import logging +import os +from pathlib import Path +from time import time from typing import TYPE_CHECKING, Any from Basilisk.utilities import SimulationBaseClass from Basilisk.utilities import macros as mc +from bsk_rl.utils import vizard + if TYPE_CHECKING: # pragma: no cover from bsk_rl.sats import Satellite from bsk_rl.sim.world import WorldModel @@ -57,6 +62,9 @@ def __init__( self.dynamics_list[satellite.name] = satellite.set_dynamics(self.sim_rate) self.fsw_list[satellite.name] = satellite.set_fsw(self.sim_rate) + def finish_init(self) -> None: + """Finish simulator initialization.""" + self.set_vizard_epoch() self.InitializeSimulation() self.ConfigureStopTime(0) self.ExecuteSimulation() @@ -71,6 +79,44 @@ def sim_time(self) -> float: """Simulation time in seconds, tied to SimBase integrator.""" return self.sim_time_ns * mc.NANO2SEC + @vizard.visualize + def setup_vizard(self, vizard_rate=None, vizSupport=None, **vizard_settings): + """Setup Vizard for visualization.""" + save_path = Path(vizard.VIZARD_PATH) + if not save_path.exists(): + os.makedirs(save_path, exist_ok=True) + + viz_proc_name = "VizProcess" + viz_proc = self.CreateNewProcess(viz_proc_name, priority=400) + + # Define process name, task name and task time-step + viz_task_name = "viz_task_name" + if vizard_rate is None: + vizard_rate = self.sim_rate + viz_proc.addTask(self.CreateNewTask(viz_task_name, mc.sec2nano(vizard_rate))) + + customizers = ["spriteList", "genericSensorList"] + list_data = {} + for customizer in customizers: + list_data[customizer] = [ + sat.vizard_data.get(customizer, None) for sat in self.satellites + ] + self.vizInstance = vizSupport.enableUnityVisualization( + self, + viz_task_name, + scList=[sat.dynamics.scObject for sat in self.satellites], + **list_data, + saveFile=save_path / f"viz_{time()}", + ) + for key, value in vizard_settings.items(): + setattr(self.vizInstance.settings, key, value) + vizard.VIZINSTANCE = self.vizInstance + + @vizard.visualize + def set_vizard_epoch(self, vizInstance=None): + """Set the Vizard epoch.""" + vizInstance.epochInMsg.subscribeTo(self.world.gravFactory.epochMsg) + def _set_world( self, world_type: type["WorldModel"], world_args: dict[str, Any] ) -> None: diff --git a/src/bsk_rl/sim/world.py b/src/bsk_rl/sim/world.py index e4f8ff98..957d50e4 100644 --- a/src/bsk_rl/sim/world.py +++ b/src/bsk_rl/sim/world.py @@ -178,7 +178,7 @@ def setup_gravity_bodies( # setup Spice interface for some solar system bodies timeInitString = utc_init self.gravFactory.createSpiceInterface( - bsk_path + "/supportData/EphemerisData/", timeInitString + bsk_path + "/supportData/EphemerisData/", timeInitString, epochInMsg=True ) self.gravFactory.spiceObject.zeroBase = "earth" diff --git a/src/bsk_rl/utils/functional.py b/src/bsk_rl/utils/functional.py index 9b3dbd7e..3aceb955 100644 --- a/src/bsk_rl/utils/functional.py +++ b/src/bsk_rl/utils/functional.py @@ -200,6 +200,10 @@ def reset_pre_sim_init(self) -> None: """Reset before simulator initialization.""" pass + def reset_during_sim_init(self) -> None: + """Reset after simulator models have been created but before the simulator is initialized.""" + pass + def reset_post_sim_init(self) -> None: """Reset after simulator initialization.""" pass diff --git a/src/bsk_rl/utils/orbital.py b/src/bsk_rl/utils/orbital.py index 4b9ba526..94495b80 100644 --- a/src/bsk_rl/utils/orbital.py +++ b/src/bsk_rl/utils/orbital.py @@ -1,7 +1,7 @@ """``bsk_rl.utils.orbital``:Utilities for computing orbital events.""" import logging -from typing import Iterable, Optional +from typing import Callable, Iterable, Optional, Tuple, Union import numpy as np from Basilisk import __path__ @@ -12,7 +12,7 @@ orbitalMotion, simIncludeGravBody, ) -from Basilisk.utilities.orbitalMotion import ClassicElements, elem2rv +from Basilisk.utilities.orbitalMotion import ClassicElements, elem2rv, rv2elem from scipy.interpolate import interp1d bskPath = __path__[0] @@ -20,6 +20,16 @@ logger = logging.getLogger(__name__) +def random_unit_vector() -> np.ndarray: + """Generate a random unit vector. + + Returns: + Random unit vector + """ + vec = np.random.randn(3) + return vec / np.linalg.norm(vec) + + def random_orbit( i: Optional[float] = 45.0, alt: float = 500, @@ -236,6 +246,48 @@ def walker_delta_arg_setup(satellites): return walker_delta_arg_setup +def relative_to_chief( + chief_name: str, + chief_orbit: Union[ClassicElements, Callable], + deputy_relative_state: dict[str, Union[np.ndarray, Callable]], +): + def walker_delta_arg_setup(satellites): + args = {sat: {} for sat in satellites} + + chief = [satellite for satellite in satellites if satellite.name == chief_name][ + 0 + ] + mu = chief.sat_args_generator["mu"] + + if isinstance(chief_orbit, ClassicElements): + args[chief]["oe"] = chief_orbit + else: + args[chief]["oe"] = chief_orbit() + + rc_N, vc_N = elem2rv(mu, args[chief]["oe"]) + + for satellite_name, relative_state in deputy_relative_state.items(): + deputy = [ + satellite + for satellite in satellites + if satellite.name == satellite_name + ][0] + + if not isinstance(relative_state, list) and not isinstance( + relative_state, np.ndarray + ): + relative_state = relative_state() + + rho_H = relative_state[0:3] + rho_deriv_H = relative_state[3:6] + rd_N, vd_N = hill2cd(rc_N, vc_N, rho_H, rho_deriv_H) + args[deputy]["oe"] = rv2elem(mu, rd_N, vd_N) + + return args + + return walker_delta_arg_setup + + class TrajectorySimulator(SimulationBaseClass.SimBaseClass): """Class for propagating trajectory using a point mass simulation.""" @@ -502,8 +554,41 @@ def rv2omega(r_N: np.ndarray, v_N: np.ndarray): return omega_HN_N +def cd2hill( + rc_N: np.ndarray, vc_N: np.ndarray, rd_N: np.ndarray, vd_N: np.ndarray +) -> Tuple[np.ndarray, np.ndarray]: + h_N = np.cross(rc_N, vc_N) + o_h_N = h_N / np.linalg.norm(h_N) + ON = rv2HN(rc_N, vc_N) + + f_dot = np.linalg.norm(h_N) / np.linalg.norm(rc_N) ** 2 + omega_ON_N = f_dot * o_h_N + + rho_H = ON @ (rd_N - rc_N) + rho_deriv_H = ON @ (vd_N - vc_N) - np.cross(ON @ omega_ON_N, rho_H) + + return rho_H, rho_deriv_H + + +def hill2cd( + rc_N: np.ndarray, vc_N: np.ndarray, rho_H: np.ndarray, rho_deriv_H: np.ndarray +) -> Tuple[np.ndarray, np.ndarray]: + h_N = np.cross(rc_N, vc_N) + o_h_N = h_N / np.linalg.norm(h_N) + ON = rv2HN(rc_N, vc_N) + + f_dot = np.linalg.norm(h_N) / np.linalg.norm(rc_N) ** 2 + omega_ON_N = f_dot * o_h_N + + rd_N = rc_N + ON.T @ rho_H + vd_N = vc_N + ON.T @ (rho_deriv_H + np.cross(ON @ omega_ON_N, rho_H)) + + return rd_N, vd_N + + __doc_title__ = "Orbital" __all__ = [ + "random_unit_vector", "random_orbit", "random_epoch", "lla2ecef", diff --git a/src/bsk_rl/utils/rllib/__init__.py b/src/bsk_rl/utils/rllib/__init__.py index 1ebb5dc4..5167676d 100644 --- a/src/bsk_rl/utils/rllib/__init__.py +++ b/src/bsk_rl/utils/rllib/__init__.py @@ -25,6 +25,7 @@ from bsk_rl import ConstellationTasking, GeneralSatelliteTasking, SatelliteTasking from bsk_rl.utils.rllib.callbacks import EpisodeDataParallelWrapper, EpisodeDataWrapper +from bsk_rl.utils.wrappers import SanitizeNanParallelWrapper, SanitizeNanWrapper def load_torch_mlp_policy(policy_path: str, env: GeneralSatelliteTasking): @@ -119,7 +120,7 @@ def _satellite_tasking_env_creator(env_config): satellite_data_callback = None return EpisodeDataWrapper( - SatelliteTasking(**env_config), + SanitizeNanWrapper(SatelliteTasking(**env_config)), episode_data_callback=episode_data_callback, satellite_data_callback=satellite_data_callback, ) @@ -140,7 +141,7 @@ def _constellation_tasking_env_creator(env_config): return ParallelPettingZooEnv( EpisodeDataParallelWrapper( - ConstellationTasking(**env_config), + SanitizeNanParallelWrapper(ConstellationTasking(**env_config)), episode_data_callback=episode_data_callback, satellite_data_callback=satellite_data_callback, ) diff --git a/src/bsk_rl/utils/rllib/discounting.py b/src/bsk_rl/utils/rllib/discounting.py index 266b280a..83b64ad4 100644 --- a/src/bsk_rl/utils/rllib/discounting.py +++ b/src/bsk_rl/utils/rllib/discounting.py @@ -9,7 +9,7 @@ :class:`MakeAddedStepActionValid`, and :class:`CondenseMultiStepActions`). """ -from typing import Any, List, Optional +from typing import Any, List, Literal, Optional import numpy as np from ray.rllib.algorithms.ppo.ppo_learner import PPOLearner @@ -116,7 +116,7 @@ def __call__( ): last_action = NO_ACTION for action in reversed(episode.actions): - if last_action == NO_ACTION: + if isinstance(action, int) and last_action == NO_ACTION: last_action = action else: break @@ -233,12 +233,15 @@ def compute_value_targets_time_discounted( step_durations, gamma: float, lambda_: float, + reward_time: Literal["step_start", "step_end"] = "step_end", ): """Computes value function (vf) targets given vf predictions and rewards. Note that advantages can then easily be computed via the formula: advantages = targets - vf_predictions """ + assert reward_time in ["step_start", "step_end"] + # Shift step durations to associate with previous timestep # delta_t->t+1 comes with t+1's info, but should be used with t step_durations = np.concatenate((step_durations[1:], [step_durations[-1]])) @@ -248,8 +251,14 @@ def compute_value_targets_time_discounted( flat_values = np.append(flat_values, 0.0) # intermediates = rewards + gamma * (1 - lambda_) * flat_values[1:] - # intermediates = rewards + gamma**step_durations * (1 - lambda_) * flat_values[1:] - intermediates = gamma**step_durations * (rewards + (1 - lambda_) * flat_values[1:]) + if reward_time == "step_start": + intermediates = ( + rewards + gamma**step_durations * (1 - lambda_) * flat_values[1:] + ) + elif reward_time == "step_end": + intermediates = gamma**step_durations * ( + rewards + (1 - lambda_) * flat_values[1:] + ) continues = 1.0 - terminateds Rs = [] @@ -333,6 +342,7 @@ def _compute_gae_from_episodes( ) # Compute value targets. + reward_time = self.config.learner_config_dict.get("reward_time", "step_end") module_value_targets = compute_value_targets_time_discounted( values=module_vf_preds, rewards=unpad_data_if_necessary( @@ -358,6 +368,7 @@ def _compute_gae_from_episodes( ), gamma=self.config.gamma, lambda_=self.config.lambda_, + reward_time=reward_time, ) # Remove the extra timesteps again from vf_preds and value targets. Now that diff --git a/src/bsk_rl/utils/vizard.py b/src/bsk_rl/utils/vizard.py new file mode 100644 index 00000000..bca2a6dd --- /dev/null +++ b/src/bsk_rl/utils/vizard.py @@ -0,0 +1,52 @@ +"""Utilities for Vizard visualization.""" + +import inspect +import logging +from functools import wraps + +logger = logging.getLogger(__name__) + +VIZARD_PATH = None +VIZINSTANCE = None + + +def visualize(func): + """Decorator for functions that enable Vizard.""" + + @wraps(func) + def wrapper(*args, **kwargs): + if VIZARD_PATH is None: + return + + from Basilisk.simulation import vizInterface + from Basilisk.utilities import vizSupport + + if not vizSupport.vizFound: + logger.warning("Vizard not found, disabling visualization") + return + + sig = inspect.signature(func) + + if "vizInstance" in sig.parameters: + kwargs["vizInstance"] = VIZINSTANCE + if "vizSupport" in sig.parameters: + kwargs["vizSupport"] = vizSupport + if "vizInterface" in sig.parameters: + kwargs["vizInterface"] = vizInterface + + return func(*args, **kwargs) + + return wrapper + + +@visualize +def get_color(index): + from matplotlib.colors import TABLEAU_COLORS + + n_colors = len(TABLEAU_COLORS) + color = list(TABLEAU_COLORS.keys())[index % n_colors] + return color + + +__doc_title__ = "Vizard" +__all__ = ["visualize", "VIZARD_PATH"] diff --git a/src/bsk_rl/utils/wrappers.py b/src/bsk_rl/utils/wrappers.py new file mode 100644 index 00000000..e52a6413 --- /dev/null +++ b/src/bsk_rl/utils/wrappers.py @@ -0,0 +1,56 @@ +import logging + +import numpy as np +from gymnasium import Wrapper +from pettingzoo.utils import BaseParallelWrapper + +logger = logging.getLogger(__name__) + + +def sanitize_nan(value, replace_with=0, warn=True): + """Replace NaN values with a given value.""" + recast_to_list = False + if isinstance(value, list): + recast_to_list = True + value = np.array(value) + + if isinstance(value, np.ndarray): + if warn and np.isnan(value).any(): + logger.warning( + f"Replacing NaN values in array with {replace_with}. Array: {value}" + ) + value[np.isnan(value)] = replace_with + elif isinstance(value, dict): + for key, val in value.items(): + value[key] = sanitize_nan(val, replace_with) + elif isinstance(value, tuple): + value = tuple(sanitize_nan(val, replace_with) for val in value) + + if recast_to_list: + value = value.tolist() + + return value + + +class SanitizeNanBaseWrapper: + def reset(self, **kwargs): + observation, info = self.env.reset(**kwargs) + observation = sanitize_nan(observation) + return observation, info + + def step(self, action): + observation, reward, terminated, truncated, info = self.env.step(action) + observation = sanitize_nan(observation) + return observation, reward, terminated, truncated, info + + +class SanitizeNanWrapper(SanitizeNanBaseWrapper, Wrapper): + def __init__(self, env): + SanitizeNanBaseWrapper.__init__(self) + Wrapper.__init__(self, env) + + +class SanitizeNanParallelWrapper(SanitizeNanBaseWrapper, BaseParallelWrapper): + def __init__(self, env): + SanitizeNanBaseWrapper.__init__(self) + BaseParallelWrapper.__init__(self, env) diff --git a/tests/integration/data/test_int_data.py b/tests/integration/data/test_int_data.py index f652899e..96f1d4d2 100644 --- a/tests/integration/data/test_int_data.py +++ b/tests/integration/data/test_int_data.py @@ -1,5 +1,57 @@ +import gymnasium as gym + +from bsk_rl import act, data, obs, sats, scene +from bsk_rl.data.composition import ComposedReward +from bsk_rl.utils.orbital import random_orbit + # For data models not tested in other tests # NoData sufficiently checked in many cases # UniqueImageData sufficiently checked in test_int_communication + +# from ..test_int_full_environments + + +class FullFeaturedSatellite(sats.ImagingSatellite): + observation_spec = [ + obs.SatProperties(dict(prop="r_BN_P", module="dynamics", norm=6e6)), + obs.Time(), + ] + action_spec = [act.Image(n_ahead_image=10)] + + +def test_multi_rewarder(): + env = gym.make( + "GeneralSatelliteTasking-v1", + satellites=[ + FullFeaturedSatellite( + "Sentinel-2A", + sat_args=FullFeaturedSatellite.default_sat_args( + oe=random_orbit, + imageAttErrorRequirement=0.01, + imageRateErrorRequirement=0.01, + ), + ), + FullFeaturedSatellite( + "Sentinel-2B", + sat_args=FullFeaturedSatellite.default_sat_args( + oe=random_orbit, + imageAttErrorRequirement=0.01, + imageRateErrorRequirement=0.01, + ), + ), + ], + scenario=scene.UniformTargets(n_targets=1000), + rewarder=(data.UniqueImageReward(), data.UniqueImageReward()), + sim_rate=0.5, + max_step_duration=1e9, + time_limit=5700.0, + disable_env_checker=True, + ) + + assert isinstance(env.unwrapped.rewarder, ComposedReward) + + env.reset() + for _ in range(10): + env.step(env.action_space.sample()) diff --git a/tests/unittest/data/test_composition.py b/tests/unittest/data/test_composition.py new file mode 100644 index 00000000..0ec7b5fb --- /dev/null +++ b/tests/unittest/data/test_composition.py @@ -0,0 +1,157 @@ +from unittest.mock import MagicMock + +import pytest + +from bsk_rl.data.composition import ComposedData, ComposedDataStore, ComposedReward + + +class TestComposedData: + @pytest.mark.parametrize( + "data1, data2, expected", + [ + ((1, 2), (3, 4), (4, 6)), + ((), (3, 4), (3, 4)), + ((1, 2), (), (1, 2)), + ((), (), ()), + ], + ) + def test_add(self, data1, data2, expected): + composed_data = ComposedData(*data1) + other_composed_data = ComposedData(*data2) + result = composed_data + other_composed_data + assert result.data == expected + + def test_add_different_lengths(self): + composed_data = ComposedData(1, 2) + other_composed_data = ComposedData(3) + with pytest.raises(ValueError): + composed_data + other_composed_data + + def test_getattr(self): + data1 = MagicMock() + data1.a = 1 + del data1.b + del data1.c + data2 = MagicMock() + data2.b = 2 + del data2.a + del data2.c + composed_data = ComposedData(data1, data2) + assert composed_data.a == 1 + assert composed_data.b == 2 + with pytest.raises(AttributeError): + _ = composed_data.c + + +class TestComposedDataStore: + def test_pass_data(self): + sat = MagicMock() + ds1 = MagicMock() + ds1_type = MagicMock(return_value=ds1) + ds2 = MagicMock() + ds2_type = MagicMock(return_value=ds2) + composed_data_store = ComposedDataStore(sat, ds1_type, ds2_type) + composed_data_store.data = ComposedData(1, 2) + composed_data_store.pass_data() + assert ds1.data == 1 + assert ds2.data == 2 + + def test_getattr(self): + sat = MagicMock() + ds1 = MagicMock() + ds1_type = MagicMock(return_value=ds1) + ds2 = MagicMock() + ds2_type = MagicMock(return_value=ds2) + composed_data_store = ComposedDataStore(sat, ds1_type, ds2_type) + ds1.a = 1 + assert composed_data_store.a == 1 + + def test_get_log_state(self): + sat = MagicMock() + ds1 = MagicMock(get_log_state=MagicMock(return_value=1)) + ds1_type = MagicMock(return_value=ds1) + ds2 = MagicMock(get_log_state=MagicMock(return_value=2)) + ds2_type = MagicMock(return_value=ds2) + composed_data_store = ComposedDataStore(sat, ds1_type, ds2_type) + + log_states = composed_data_store.get_log_state() + for ds in [ds1, ds2]: + ds.get_log_state.assert_called_once() + assert log_states == [1, 2] + + def test_compare_log_states(self): + sat = MagicMock() + ds1 = MagicMock(get_log_state=MagicMock(return_value=1)) + ds1_type = MagicMock(return_value=ds1) + ds2 = MagicMock(get_log_state=MagicMock(return_value=2)) + ds2_type = MagicMock(return_value=ds2) + composed_data_store = ComposedDataStore(sat, ds1_type, ds2_type) + + composed_data_store.compare_log_states([1, 2], [3, 4]) + ds1.compare_log_states.assert_called_once_with(1, 3) + ds2.compare_log_states.assert_called_once_with(2, 4) + + +class TestComposedReward: + def test_pass_data(self): + rewarder1 = MagicMock() + rewarder2 = MagicMock() + composed_rewarder = ComposedReward(rewarder1, rewarder2) + composed_rewarder.data = ComposedData(1, 2) + composed_rewarder.pass_data() + assert rewarder1.data == 1 + assert rewarder2.data == 2 + + @pytest.mark.parametrize( + "function", + [ + "reset_pre_sim_init", + "reset_during_sim_init", + "reset_post_sim_init", + "reset_overwrite_previous", + ], + ) + def test_resetable(self, function): + rewarder1 = MagicMock() + rewarder2 = MagicMock() + composed_rewarder = ComposedReward(rewarder1, rewarder2) + getattr(composed_rewarder, function)() + for rewarder in [rewarder1, rewarder2]: + getattr(rewarder, function).assert_called_once() + + def test_initial_data(self): + rewarder1 = MagicMock(initial_data=MagicMock(return_value=1)) + rewarder2 = MagicMock(initial_data=MagicMock(return_value=2)) + composed_rewarder = ComposedReward(rewarder1, rewarder2) + data = composed_rewarder.initial_data("sat") + assert data.data == (1, 2) + + def test_data_store(self): + sat = MagicMock() + ds1 = MagicMock(get_log_state=MagicMock(return_value=1)) + ds1_type = MagicMock(return_value=ds1) + rewarder1 = MagicMock(datastore_type=ds1_type) + ds2 = MagicMock(get_log_state=MagicMock(return_value=2)) + ds2_type = MagicMock(return_value=ds2) + rewarder2 = MagicMock(datastore_type=ds2_type) + composed_rewarder = ComposedReward(rewarder1, rewarder2) + composed_rewarder.create_data_store(sat) + assert sat.data_store.datastores == (ds1, ds2) + + def test_calculate_reward(self): + rewarder1 = MagicMock( + calculate_reward=MagicMock(return_value={"sat1": 1, "sat2": 2}) + ) + rewarder2 = MagicMock( + calculate_reward=MagicMock(return_value={"sat1": 3, "sat2": 4}) + ) + composed_rewarder = ComposedReward(rewarder1, rewarder2) + + reward = composed_rewarder.calculate_reward( + { + "sat1": MagicMock(data=["d11", "d21"]), + "sat2": MagicMock(data=["d12", "d22"]), + } + ) + + assert reward == {"sat1": 4, "sat2": 6} diff --git a/tests/unittest/test_gym_env.py b/tests/unittest/test_gym_env.py index 91744701..04584c02 100644 --- a/tests/unittest/test_gym_env.py +++ b/tests/unittest/test_gym_env.py @@ -4,6 +4,7 @@ from gymnasium import spaces from bsk_rl import ConstellationTasking, GeneralSatelliteTasking, SatelliteTasking +from bsk_rl.data.composition import ComposedReward from bsk_rl.sats import Satellite @@ -68,6 +69,18 @@ def test_minimum_world_model_mixed(self): assert issubclass(model, TypeA) assert issubclass(model, TypeB) + def test_multiple_rewarders(self): + mock_sat = MagicMock() + mock_sat.sat_args_generator = {} + mock_rewarder = [MagicMock(scenario=None), MagicMock(scenario=None)] + env = GeneralSatelliteTasking( + satellites=[mock_sat], + world_type=MagicMock(), + scenario=MagicMock(), + rewarder=mock_rewarder, + ) + assert isinstance(env.rewarder, ComposedReward) + @patch("bsk_rl.gym.Simulator") def test_reset(self, mock_sim): mock_sat = MagicMock()