diff --git a/.gitignore b/.gitignore index 44bbb13..d7c5768 100644 --- a/.gitignore +++ b/.gitignore @@ -159,3 +159,5 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. .idea/ .vscode/ +sandbox/ +*.mp4 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 89ad4b3..b0f0e9b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,7 +27,7 @@ repos: args: - '--per-file-ignores=*/__init__.py:F401' - --ignore=E203,W503,E741 - - --max-complexity=30 + - --max-complexity=45 - --max-line-length=456 - --show-source - --statistics diff --git a/pyproject.toml b/pyproject.toml index 71a6292..fe1987f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ dependencies = [ "mujoco-py<2.2,>=2.1", "numpy>=1.21.0", "gymnasium>=0.26", + "moviepy>=1.0.3", ] dynamic = ["version"] diff --git a/rrls/__init__.py b/rrls/__init__.py index 2829cfe..0a7f310 100644 --- a/rrls/__init__.py +++ b/rrls/__init__.py @@ -57,6 +57,42 @@ def register_robotics_envs(): order_enforce=False, disable_env_checker=True, ) + register( + id="rrls/force-ant-v0", + entry_point="rrls.envs.ant:ForceAnt", + order_enforce=False, + disable_env_checker=True, + ) + register( + id="rrls/force-halfcheetah-v0", + entry_point="rrls.envs.half_cheetah:ForceHalfCheetah", + order_enforce=False, + disable_env_checker=True, + ) + register( + id="rrls/force-hopper-v0", + entry_point="rrls.envs.hopper:ForceHopper", + order_enforce=False, + disable_env_checker=True, + ) + register( + id="rrls/force-humanoidstandup-v0", + entry_point="rrls.envs.humanoid:ForceHumanoidStandUp", + order_enforce=False, + disable_env_checker=True, + ) + register( + id="rrls/force-invertedpendulum-v0", + entry_point="rrls.envs.pendulum:ForceInvertedPendulum", + order_enforce=False, + disable_env_checker=True, + ) + register( + id="rrls/force-walker-v0", + entry_point="rrls.envs.walker:ForceWalker2d", + order_enforce=False, + disable_env_checker=True, + ) # Advserarial environments # HalfCheetah @@ -127,6 +163,73 @@ def register_robotics_envs(): "params_bound": envs.AntParamsBound.ONE_DIM.value, }, ) + register( + id="rrls/robust-ant-adversarial-forces-v0", + entry_point=make_wrapped_env, # type: ignore + order_enforce=False, + disable_env_checker=True, + kwargs={ + "cls_env": envs.ForceAnt, + "wrapper": wrappers.DynamicAdversarial, + "params_bound": envs.AntParamsBound.RARL.value, + }, + ) + register( + id="rrls/robust-halfcheetah-adversarial-forces-v0", + entry_point=make_wrapped_env, # type: ignore + order_enforce=False, + disable_env_checker=True, + kwargs={ + "cls_env": envs.ForceHalfCheetah, + "wrapper": wrappers.DynamicAdversarial, + "params_bound": envs.HalfCheetahParamsBound.RARL.value, + }, + ) + register( + id="rrls/robust-hopper-adversarial-forces-v0", + entry_point=make_wrapped_env, # type: ignore + order_enforce=False, + disable_env_checker=True, + kwargs={ + "cls_env": envs.ForceHopper, + "wrapper": wrappers.DynamicAdversarial, + "params_bound": envs.HopperParamsBound.RARL.value, + }, + ) + register( + id="rrls/robust-humanoidstandup-adversarial-forces-v0", + entry_point=make_wrapped_env, # type: ignore + order_enforce=False, + disable_env_checker=True, + kwargs={ + "cls_env": envs.ForceHumanoidStandUp, + "wrapper": wrappers.DynamicAdversarial, + "params_bound": envs.HumanoidStandupParamsBound.RARL.value, + }, + ) + register( + id="rrls/robust-invertedpendulum-adversarial-forces-v0", + entry_point=make_wrapped_env, # type: ignore + order_enforce=False, + disable_env_checker=True, + kwargs={ + "cls_env": envs.ForceInvertedPendulum, + "wrapper": wrappers.DynamicAdversarial, + "params_bound": envs.InvertedPendulumParamsBound.RARL.value, + }, + ) + register( + id="rrls/robust-walker-adversarial-forces-v0", + entry_point=make_wrapped_env, # type: ignore + order_enforce=False, + disable_env_checker=True, + kwargs={ + "cls_env": envs.ForceWalker2d, + "wrapper": wrappers.DynamicAdversarial, + "params_bound": envs.Walker2dParamsBound.RARL.value, + }, + ) + # Hopper register( id="rrls/robust-hopper-adversarial-3d-v0", diff --git a/rrls/envs/__init__.py b/rrls/envs/__init__.py index c910c33..844f10c 100644 --- a/rrls/envs/__init__.py +++ b/rrls/envs/__init__.py @@ -1,11 +1,19 @@ from __future__ import annotations -from .ant import AntParamsBound, RobustAnt -from .half_cheetah import HalfCheetahParamsBound, RobustHalfCheetah -from .hopper import HopperParamsBound, RobustHopper -from .humanoid import HumanoidStandupParamsBound, RobustHumanoidStandUp -from .pendulum import InvertedPendulumParamsBound, RobustInvertedPendulum -from .walker import RobustWalker2d, Walker2dParamsBound +from .ant import AntParamsBound, ForceAnt, RobustAnt +from .half_cheetah import ForceHalfCheetah, HalfCheetahParamsBound, RobustHalfCheetah +from .hopper import ForceHopper, HopperParamsBound, RobustHopper +from .humanoid import ( + ForceHumanoidStandUp, + HumanoidStandupParamsBound, + RobustHumanoidStandUp, +) +from .pendulum import ( + ForceInvertedPendulum, + InvertedPendulumParamsBound, + RobustInvertedPendulum, +) +from .walker import ForceWalker2d, RobustWalker2d, Walker2dParamsBound __all__ = [ "AntParamsBound", @@ -20,4 +28,10 @@ "RobustHumanoidStandUp", "RobustInvertedPendulum", "RobustWalker2d", + "ForceAnt", + "ForceHalfCheetah", + "ForceHopper", + "ForceHumanoidStandUp", + "ForceInvertedPendulum", + "ForceWalker2d", ] diff --git a/rrls/envs/ant.py b/rrls/envs/ant.py index 5f37e3f..59bca0a 100644 --- a/rrls/envs/ant.py +++ b/rrls/envs/ant.py @@ -1,6 +1,7 @@ from __future__ import annotations from enum import Enum +from typing import Any import gymnasium as gym from gymnasium import Wrapper @@ -19,6 +20,14 @@ class AntParamsBound(Enum): "frontleftlegmass": [0.01, 3.0], "frontrightlegmass": [0.01, 3.0], } + RARL = { + "torsoforce_x": [-3.0, 3.0], + "torsoforce_y": [-3.0, 3.0], + "frontleftlegforce_x": [-3.0, 3.0], + "frontleftlegforce_y": [-3.0, 3.0], + "frontrightlegforce_x": [-3.0, 3.0], + "frontrightlegforce_y": [-3.0, 3.0], + } class RobustAnt(Wrapper): @@ -66,8 +75,9 @@ def __init__( backrightlegmass: float | None = None, backrightlegauxmass: float | None = None, backrightleganklemass: float | None = None, + **kwargs: dict[str, Any], ): - super().__init__(env=gym.make("Ant-v5")) + super().__init__(env=gym.make("Ant-v5", **kwargs)) self.set_params( torsomass=torsomass, frontleftlegmass=frontleftlegmass, @@ -184,3 +194,319 @@ def _change_params(self): if self.backrightleganklemass is not None: self.unwrapped.model.body_mass[13] = self.backrightleganklemass + + +class ForceAnt(Wrapper): + """ + Force Ant environment. You can apply forces to the environment using the set_params method. + The parameters are changed by calling the change_params method. The parameters are: + - torsoforce_x + - torsoforce_y + - torsoforce_z + - frontleftlegforce_x + - frontleftlegforce_y + - frontleftlegforce_z + - frontleftlegauxforce_x + - frontleftlegauxforce_y + - frontleftlegauxforce_z + - frontleftlegankleforce_x + - frontleftlegankleforce_y + - frontleftlegankleforce_z + - frontrightlegforce_x + - frontrightlegforce_y + - frontrightlegforce_z + - frontrightlegauxforce_x + - frontrightlegauxforce_y + - frontrightlegauxforce_z + - frontrightlegankleforce_x + - frontrightlegankleforce_y + - frontrightlegankleforce_z + - backleftlegforce_x + - backleftlegforce_y + - backleftlegforce_z + - backleftlegauxforce_x + - backleftlegauxforce_y + - backleftlegauxforce_z + - backleftlegankleforce_x + - backleftlegankleforce_y + - backleftlegankleforce_z + - backrightlegforce_x + - backrightlegforce_y + - backrightlegforce_z + - backrightlegauxforce_x + - backrightlegauxforce_y + - backrightlegauxforce_z + - backrightlegankleforce_x + - backrightlegankleforce_y + - backrightlegankleforce_z + """ + + # HACK: This is a hack to avoid the following error: + # gymnasium.error.InvalidMetadata: Expect the environment metadata to be dict, actual type: + metadata = { + "render_modes": [ + "human", + "rgb_array", + "depth_array", + ], + } + + def __init__(self, **kwargs: dict[str, Any]): + super().__init__(env=gym.make("Ant-v5", **kwargs)) + self.set_params() + + def set_params( + self, + torsoforce_x: float | None = None, + torsoforce_y: float | None = None, + torsoforce_z: float | None = None, + frontleftlegforce_x: float | None = None, + frontleftlegforce_y: float | None = None, + frontleftlegforce_z: float | None = None, + frontleftlegauxforce_x: float | None = None, + frontleftlegauxforce_y: float | None = None, + frontleftlegauxforce_z: float | None = None, + frontleftlegankleforce_x: float | None = None, + frontleftlegankleforce_y: float | None = None, + frontleftlegankleforce_z: float | None = None, + frontrightlegforce_x: float | None = None, + frontrightlegforce_y: float | None = None, + frontrightlegforce_z: float | None = None, + frontrightlegauxforce_x: float | None = None, + frontrightlegauxforce_y: float | None = None, + frontrightlegauxforce_z: float | None = None, + frontrightlegankleforce_x: float | None = None, + frontrightlegankleforce_y: float | None = None, + frontrightlegankleforce_z: float | None = None, + backleftlegforce_x: float | None = None, + backleftlegforce_y: float | None = None, + backleftlegforce_z: float | None = None, + backleftlegauxforce_x: float | None = None, + backleftlegauxforce_y: float | None = None, + backleftlegauxforce_z: float | None = None, + backleftlegankleforce_x: float | None = None, + backleftlegankleforce_y: float | None = None, + backleftlegankleforce_z: float | None = None, + backrightlegforce_x: float | None = None, + backrightlegforce_y: float | None = None, + backrightlegforce_z: float | None = None, + backrightlegauxforce_x: float | None = None, + backrightlegauxforce_y: float | None = None, + backrightlegauxforce_z: float | None = None, + backrightlegankleforce_x: float | None = None, + backrightlegankleforce_y: float | None = None, + backrightlegankleforce_z: float | None = None, + ): + self.torsoforce_x = torsoforce_x + self.torsoforce_y = torsoforce_y + self.torsoforce_z = torsoforce_z + self.frontleftlegforce_x = frontleftlegforce_x + self.frontleftlegforce_y = frontleftlegforce_y + self.frontleftlegforce_z = frontleftlegforce_z + self.frontleftlegauxforce_x = frontleftlegauxforce_x + self.frontleftlegauxforce_y = frontleftlegauxforce_y + self.frontleftlegauxforce_z = frontleftlegauxforce_z + self.frontleftlegankleforce_x = frontleftlegankleforce_x + self.frontleftlegankleforce_y = frontleftlegankleforce_y + self.frontleftlegankleforce_z = frontleftlegankleforce_z + self.frontrightlegforce_x = frontrightlegforce_x + self.frontrightlegforce_y = frontrightlegforce_y + self.frontrightlegforce_z = frontrightlegforce_z + self.frontrightlegauxforce_x = frontrightlegauxforce_x + self.frontrightlegauxforce_y = frontrightlegauxforce_y + self.frontrightlegauxforce_z = frontrightlegauxforce_z + self.frontrightlegankleforce_x = frontrightlegankleforce_x + self.frontrightlegankleforce_y = frontrightlegankleforce_y + self.frontrightlegankleforce_z = frontrightlegankleforce_z + self.backleftlegforce_x = backleftlegforce_x + self.backleftlegforce_y = backleftlegforce_y + self.backleftlegforce_z = backleftlegforce_z + self.backleftlegauxforce_x = backleftlegauxforce_x + self.backleftlegauxforce_y = backleftlegauxforce_y + self.backleftlegauxforce_z = backleftlegauxforce_z + self.backleftlegankleforce_x = backleftlegankleforce_x + self.backleftlegankleforce_y = backleftlegankleforce_y + self.backleftlegankleforce_z = backleftlegankleforce_z + self.backrightlegforce_x = backrightlegforce_x + self.backrightlegforce_y = backrightlegforce_y + self.backrightlegforce_z = backrightlegforce_z + self.backrightlegauxforce_x = backrightlegauxforce_x + self.backrightlegauxforce_y = backrightlegauxforce_y + self.backrightlegauxforce_z = backrightlegauxforce_z + self.backrightlegankleforce_x = backrightlegankleforce_x + self.backrightlegankleforce_y = backrightlegankleforce_y + self.backrightlegankleforce_z = backrightlegankleforce_z + self._change_params() + + def get_params(self): + return { + "torsoforce_x": self.torsoforce_x, + "torsoforce_y": self.torsoforce_y, + "torsoforce_z": self.torsoforce_z, + "frontleftlegforce_x": self.frontleftlegforce_x, + "frontleftlegforce_y": self.frontleftlegforce_y, + "frontleftlegforce_z": self.frontleftlegforce_z, + "frontleftlegauxforce_x": self.frontleftlegauxforce_x, + "frontleftlegauxforce_y": self.frontleftlegauxforce_y, + "frontleftlegauxforce_z": self.frontleftlegauxforce_z, + "frontleftlegankleforce_x": self.frontleftlegankleforce_x, + "frontleftlegankleforce_y": self.frontleftlegankleforce_y, + "frontleftlegankleforce_z": self.frontleftlegankleforce_z, + "frontrightlegforce_x": self.frontrightlegforce_x, + "frontrightlegforce_y": self.frontrightlegforce_y, + "frontrightlegforce_z": self.frontrightlegforce_z, + "frontrightlegauxforce_x": self.frontrightlegauxforce_x, + "frontrightlegauxforce_y": self.frontrightlegauxforce_y, + "frontrightlegauxforce_z": self.frontrightlegauxforce_z, + "frontrightlegankleforce_x": self.frontrightlegankleforce_x, + "frontrightlegankleforce_y": self.frontrightlegankleforce_y, + "frontrightlegankleforce_z": self.frontrightlegankleforce_z, + "backleftlegforce_x": self.backleftlegforce_x, + "backleftlegforce_y": self.backleftlegforce_y, + "backleftlegforce_z": self.backleftlegforce_z, + "backleftlegauxforce_x": self.backleftlegauxforce_x, + "backleftlegauxforce_y": self.backleftlegauxforce_y, + "backleftlegauxforce_z": self.backleftlegauxforce_z, + "backleftlegankleforce_x": self.backleftlegankleforce_x, + "backleftlegankleforce_y": self.backleftlegankleforce_y, + "backleftlegankleforce_z": self.backleftlegankleforce_z, + "backrightlegforce_x": self.backrightlegforce_x, + "backrightlegforce_y": self.backrightlegforce_y, + "backrightlegforce_z": self.backrightlegforce_z, + "backrightlegauxforce_x": self.backrightlegauxforce_x, + "backrightlegauxforce_y": self.backrightlegauxforce_y, + "backrightlegauxforce_z": self.backrightlegauxforce_z, + "backrightlegankleforce_x": self.backrightlegankleforce_x, + "backrightlegankleforce_y": self.backrightlegankleforce_y, + "backrightlegankleforce_z": self.backrightlegankleforce_z, + } + + def _change_params(self): + if self.torsoforce_x is not None: + self.unwrapped.data.xfrc_applied[1, 0] = self.torsoforce_x + + if self.torsoforce_y is not None: + self.unwrapped.data.xfrc_applied[1, 1] = self.torsoforce_y + + if self.torsoforce_z is not None: + self.unwrapped.data.xfrc_applied[1, 2] = self.torsoforce_z + + if self.frontleftlegforce_x is not None: + self.unwrapped.data.xfrc_applied[2, 0] = self.frontleftlegforce_x + + if self.frontleftlegforce_y is not None: + self.unwrapped.data.xfrc_applied[2, 1] = self.frontleftlegforce_y + + if self.frontleftlegforce_z is not None: + self.unwrapped.data.xfrc_applied[2, 2] = self.frontleftlegforce_z + + if self.frontleftlegauxforce_x is not None: + self.unwrapped.data.xfrc_applied[3, 0] = self.frontleftlegauxforce_x + + if self.frontleftlegauxforce_y is not None: + self.unwrapped.data.xfrc_applied[3, 1] = self.frontleftlegauxforce_y + + if self.frontleftlegauxforce_z is not None: + self.unwrapped.data.xfrc_applied[3, 2] = self.frontleftlegauxforce_z + + if self.frontleftlegankleforce_x is not None: + self.unwrapped.data.xfrc_applied[4, 0] = self.frontleftlegankleforce_x + + if self.frontleftlegankleforce_y is not None: + self.unwrapped.data.xfrc_applied[4, 1] = self.frontleftlegankleforce_y + + if self.frontleftlegankleforce_z is not None: + self.unwrapped.data.xfrc_applied[4, 2] = self.frontleftlegankleforce_z + + if self.frontrightlegforce_x is not None: + self.unwrapped.data.xfrc_applied[5, 0] = self.frontrightlegforce_x + + if self.frontrightlegforce_y is not None: + self.unwrapped.data.xfrc_applied[5, 1] = self.frontrightlegforce_y + + if self.frontrightlegforce_z is not None: + self.unwrapped.data.xfrc_applied[5, 2] = self.frontrightlegforce_z + + if self.frontrightlegauxforce_x is not None: + self.unwrapped.data.xfrc_applied[6, 0] = self.frontrightlegauxforce_x + + if self.frontrightlegauxforce_y is not None: + self.unwrapped.data.xfrc_applied[6, 1] = self.frontrightlegauxforce_y + + if self.frontrightlegauxforce_z is not None: + self.unwrapped.data.xfrc_applied[6, 2] = self.frontrightlegauxforce_z + + if self.frontrightlegankleforce_x is not None: + self.unwrapped.data.xfrc_applied[7, 0] = self.frontrightlegankleforce_x + + if self.frontrightlegankleforce_y is not None: + self.unwrapped.data.xfrc_applied[7, 1] = self.frontrightlegankleforce_y + + if self.frontrightlegankleforce_z is not None: + self.unwrapped.data.xfrc_applied[7, 2] = self.frontrightlegankleforce_z + + if self.backleftlegforce_x is not None: + self.unwrapped.data.xfrc_applied[8, 0] = self.backleftlegforce_x + + if self.backleftlegforce_y is not None: + self.unwrapped.data.xfrc_applied[8, 1] = self.backleftlegforce_y + + if self.backleftlegforce_z is not None: + self.unwrapped.data.xfrc_applied[8, 2] = self.backleftlegforce_z + + if self.backleftlegauxforce_x is not None: + self.unwrapped.data.xfrc_applied[9, 0] = self.backleftlegauxforce_x + + if self.backleftlegauxforce_y is not None: + self.unwrapped.data.xfrc_applied[9, 1] = self.backleftlegauxforce_y + + if self.backleftlegauxforce_z is not None: + self.unwrapped.data.xfrc_applied[9, 2] = self.backleftlegauxforce_z + + if self.backleftlegankleforce_x is not None: + self.unwrapped.data.xfrc_applied[10, 0] = self.backleftlegankleforce_x + + if self.backleftlegankleforce_y is not None: + self.unwrapped.data.xfrc_applied[10, 1] = self.backleftlegankleforce_y + + if self.backleftlegankleforce_z is not None: + self.unwrapped.data.xfrc_applied[10, 2] = self.backleftlegankleforce_z + + if self.backrightlegforce_x is not None: + self.unwrapped.data.xfrc_applied[11, 0] = self.backrightlegforce_x + + if self.backrightlegforce_y is not None: + self.unwrapped.data.xfrc_applied[11, 1] = self.backrightlegforce_y + + if self.backrightlegforce_z is not None: + self.unwrapped.data.xfrc_applied[11, 2] = self.backrightlegforce_z + + if self.backrightlegauxforce_x is not None: + self.unwrapped.data.xfrc_applied[12, 0] = self.backrightlegauxforce_x + + if self.backrightlegauxforce_y is not None: + self.unwrapped.data.xfrc_applied[12, 1] = self.backrightlegauxforce_y + + if self.backrightlegauxforce_z is not None: + self.unwrapped.data.xfrc_applied[12, 2] = self.backrightlegauxforce_z + + if self.backrightlegankleforce_x is not None: + self.unwrapped.data.xfrc_applied[13, 0] = self.backrightlegankleforce_x + + if self.backrightlegankleforce_y is not None: + self.unwrapped.data.xfrc_applied[13, 1] = self.backrightlegankleforce_y + + if self.backrightlegankleforce_z is not None: + self.unwrapped.data.xfrc_applied[13, 2] = self.backrightlegankleforce_z + + def reset(self, *, seed: int | None = None, options: dict | None = None): + if options is not None: + self.set_params(**options) + obs, info = self.env.reset(seed=seed, options=options) + info.update(self.get_params()) + return obs, info + + def step(self, action): + obs, reward, terminated, truncated, info = self.env.step(action) + info.update(self.get_params()) + return obs, reward, terminated, truncated, info diff --git a/rrls/envs/half_cheetah.py b/rrls/envs/half_cheetah.py index 3feac3b..18a3f4a 100644 --- a/rrls/envs/half_cheetah.py +++ b/rrls/envs/half_cheetah.py @@ -1,6 +1,7 @@ from __future__ import annotations from enum import Enum +from typing import Any import gymnasium as gym from gymnasium import Wrapper @@ -21,6 +22,14 @@ class HalfCheetahParamsBound(Enum): "torsomass": [0.1, 7.0], "backthighmass": [0.1, 3.0], } + RARL = { + "torsoforce_x": [-3.0, 3.0], + "torsoforce_y": [-3.0, 3.0], + "backfootforce_x": [-3.0, 3.0], + "backfootforce_y": [-3.0, 3.0], + "forwardfootforce_x": [-3.0, 3.0], + "forwardfootforce_y": [-3.0, 3.0], + } class RobustHalfCheetah(Wrapper): @@ -56,8 +65,9 @@ def __init__( forwardthighmass: float | None = None, forwardshinmass: float | None = None, forwardfootmass: float | None = None, + **kwargs: dict[str, Any], ): - super().__init__(env=gym.make("HalfCheetah-v5")) + super().__init__(env=gym.make("HalfCheetah-v5", **kwargs)) self.set_params( worldfriction=worldfriction, @@ -135,109 +145,170 @@ def _change_params(self): self.unwrapped.model.body_mass[7] = self.forwardfootmass -# class RobustHalfCheetah(HalfCheetahEnv): -# """ -# Robust HalfCheetah environment. You can change the parameters of the environment using options in -# the reset method or by using the set_params method. The parameters are changed by calling -# the change_params method. The parameters are: -# - worldfriction -# - torsomass -# - backthighmass -# - backshinmass -# - backfootmass -# - forwardthighmass -# - forwardshinmass -# - forwardfootmass -# """ - -# def __init__( -# self, -# worldfriction: float | None = None, -# torsomass: float | None = None, -# backthighmass: float | None = None, -# backshinmass: float | None = None, -# backfootmass: float | None = None, -# forwardthighmass: float | None = None, -# forwardshinmass: float | None = None, -# forwardfootmass: float | None = None, -# ): -# self.worldfriction = worldfriction -# self.torsomass = torsomass -# self.backthighmass = backthighmass -# super().__init__() - -# self.set_params( -# worldfriction=worldfriction, -# torsomass=torsomass, -# backthighmass=backthighmass, -# backshinmass=backshinmass, -# backfootmass=backfootmass, -# forwardthighmass=forwardthighmass, -# forwardshinmass=forwardshinmass, -# forwardfootmass=forwardfootmass, -# ) -# self._change_params() - -# def set_params( -# self, -# worldfriction: float | None = None, -# torsomass: float | None = None, -# backthighmass: float | None = None, -# backshinmass: float | None = None, -# backfootmass: float | None = None, -# forwardthighmass: float | None = None, -# forwardshinmass: float | None = None, -# forwardfootmass: float | None = None, -# ): -# self.worldfriction = worldfriction -# self.torsomass = torsomass -# self.backthighmass = backthighmass -# self.backshinmass = backshinmass -# self.backfootmass = backfootmass -# self.forwardthighmass = forwardthighmass -# self.forwardshinmass = forwardshinmass -# self.forwardfootmass = forwardfootmass -# self._change_params() - -# def get_params(self): -# return { -# "worldfriction": self.worldfriction, -# "torsomass": self.torsomass, -# "backthighmass": self.backthighmass, -# "backshinmass": self.backshinmass, -# "backfootmass": self.backfootmass, -# "forwardthighmass": self.forwardthighmass, -# "forwardshinmass": self.forwardshinmass, -# "forwardfootmass": self.forwardfootmass, -# } - -# def reset(self, *, seed: int | None = None, options: dict | None = None): -# if options is not None: -# self.set_params(**options) - -# obs, info = super().reset(seed=seed, options=options) -# info.update(self.get_params()) -# return obs, info - -# def step(self, action): -# obs, reward, terminated, truncated, info = super().step(action) -# info.update(self.get_params()) -# return obs, reward, terminated, truncated, info - -# def _change_params(self): -# if self.worldfriction is not None: -# self.model.geom_friction[:, 0] = self.worldfriction -# if self.torsomass is not None: -# self.model.body_mass[1] = self.torsomass -# if self.backthighmass is not None: -# self.model.body_mass[2] = self.backthighmass -# if self.backshinmass is not None: -# self.model.body_mass[3] = self.backshinmass -# if self.backfootmass is not None: -# self.model.body_mass[4] = self.backfootmass -# if self.forwardthighmass is not None: -# self.model.body_mass[5] = self.forwardthighmass -# if self.forwardshinmass is not None: -# self.model.body_mass[6] = self.forwardshinmass -# if self.forwardfootmass is not None: -# self.model.body_mass[7] = self.forwardfootmass +class ForceHalfCheetah(Wrapper): + """ + Force HalfCheetah environment. You can apply forces to the robot using the env.data.xfrc_applied + attribute. The parameters are: + - torsoforce_x + - torsoforce_y + - torsoforce_z + - backthighforce_x + - backthighforce_y + - backthighforce_z + - backshinforce_x + - backshinforce_y + - backshinforce_z + - backfootforce_x + - backfootforce_y + - backfootforce_z + - forwardthighforce_x + - forwardthighforce_y + - forwardthighforce_z + - forwardshinforce_x + - forwardshinforce_y + - forwardshinforce_z + - forwardfootforce_x + - forwardfootforce_y + - forwardfootforce_z + """ + + metadata = { + "render_modes": [ + "human", + "rgb_array", + "depth_array", + ], + } + + def __init__(self, **kwargs: dict[str, Any]): + super().__init__(env=gym.make("HalfCheetah-v5", **kwargs)) + self.set_params() + self._change_params() + + def set_params( + self, + torsoforce_x: float | None = None, + torsoforce_y: float | None = None, + torsoforce_z: float | None = None, + backthighforce_x: float | None = None, + backthighforce_y: float | None = None, + backthighforce_z: float | None = None, + backshinforce_x: float | None = None, + backshinforce_y: float | None = None, + backshinforce_z: float | None = None, + backfootforce_x: float | None = None, + backfootforce_y: float | None = None, + backfootforce_z: float | None = None, + forwardthighforce_x: float | None = None, + forwardthighforce_y: float | None = None, + forwardthighforce_z: float | None = None, + forwardshinforce_x: float | None = None, + forwardshinforce_y: float | None = None, + forwardshinforce_z: float | None = None, + forwardfootforce_x: float | None = None, + forwardfootforce_y: float | None = None, + forwardfootforce_z: float | None = None, + ): + self.torsoforce_x = torsoforce_x + self.torsoforce_y = torsoforce_y + self.torsoforce_z = torsoforce_z + self.backthighforce_x = backthighforce_x + self.backthighforce_y = backthighforce_y + self.backthighforce_z = backthighforce_z + self.backshinforce_x = backshinforce_x + self.backshinforce_y = backshinforce_y + self.backshinforce_z = backshinforce_z + self.backfootforce_x = backfootforce_x + self.backfootforce_y = backfootforce_y + self.backfootforce_z = backfootforce_z + self.forwardthighforce_x = forwardthighforce_x + self.forwardthighforce_y = forwardthighforce_y + self.forwardthighforce_z = forwardthighforce_z + self.forwardshinforce_x = forwardshinforce_x + self.forwardshinforce_y = forwardshinforce_y + self.forwardshinforce_z = forwardshinforce_z + self.forwardfootforce_x = forwardfootforce_x + self.forwardfootforce_y = forwardfootforce_y + self.forwardfootforce_z = forwardfootforce_z + self._change_params() + + def get_params(self): + return { + "torsoforce_x": self.torsoforce_x, + "torsoforce_y": self.torsoforce_y, + "torsoforce_z": self.torsoforce_z, + "backthighforce_x": self.backthighforce_x, + "backthighforce_y": self.backthighforce_y, + "backthighforce_z": self.backthighforce_z, + "backshinforce_x": self.backshinforce_x, + "backshinforce_y": self.backshinforce_y, + "backshinforce_z": self.backshinforce_z, + "backfootforce_x": self.backfootforce_x, + "backfootforce_y": self.backfootforce_y, + "backfootforce_z": self.backfootforce_z, + "forwardthighforce_x": self.forwardthighforce_x, + "forwardthighforce_y": self.forwardthighforce_y, + "forwardthighforce_z": self.forwardthighforce_z, + "forwardshinforce_x": self.forwardshinforce_x, + "forwardshinforce_y": self.forwardshinforce_y, + "forwardshinforce_z": self.forwardshinforce_z, + "forwardfootforce_x": self.forwardfootforce_x, + "forwardfootforce_y": self.forwardfootforce_y, + "forwardfootforce_z": self.forwardfootforce_z, + } + + def _change_params(self): + if self.torsoforce_x is not None: + self.unwrapped.data.xfrc_applied[1, 0] = self.torsoforce_x + if self.torsoforce_y is not None: + self.unwrapped.data.xfrc_applied[1, 1] = self.torsoforce_y + if self.torsoforce_z is not None: + self.unwrapped.data.xfrc_applied[1, 2] = self.torsoforce_z + if self.backthighforce_x is not None: + self.unwrapped.data.xfrc_applied[2, 0] = self.backthighforce_x + if self.backthighforce_y is not None: + self.unwrapped.data.xfrc_applied[2, 1] = self.backthighforce_y + if self.backthighforce_z is not None: + self.unwrapped.data.xfrc_applied[2, 2] = self.backthighforce_z + if self.backshinforce_x is not None: + self.unwrapped.data.xfrc_applied[3, 0] = self.backshinforce_x + if self.backshinforce_y is not None: + self.unwrapped.data.xfrc_applied[3, 1] = self.backshinforce_y + if self.backshinforce_z is not None: + self.unwrapped.data.xfrc_applied[3, 2] = self.backshinforce_z + if self.backfootforce_x is not None: + self.unwrapped.data.xfrc_applied[4, 0] = self.backfootforce_x + if self.backfootforce_y is not None: + self.unwrapped.data.xfrc_applied[4, 1] = self.backfootforce_y + if self.backfootforce_z is not None: + self.unwrapped.data.xfrc_applied[4, 2] = self.backfootforce_z + if self.forwardthighforce_x is not None: + self.unwrapped.data.xfrc_applied[5, 0] = self.forwardthighforce_x + if self.forwardthighforce_y is not None: + self.unwrapped.data.xfrc_applied[5, 1] = self.forwardthighforce_y + if self.forwardthighforce_z is not None: + self.unwrapped.data.xfrc_applied[5, 2] = self.forwardthighforce_z + if self.forwardshinforce_x is not None: + self.unwrapped.data.xfrc_applied[6, 0] = self.forwardshinforce_x + if self.forwardshinforce_y is not None: + self.unwrapped.data.xfrc_applied[6, 1] = self.forwardshinforce_y + if self.forwardshinforce_z is not None: + self.unwrapped.data.xfrc_applied[6, 2] = self.forwardshinforce_z + if self.forwardfootforce_x is not None: + self.unwrapped.data.xfrc_applied[7, 0] = self.forwardfootforce_x + if self.forwardfootforce_y is not None: + self.unwrapped.data.xfrc_applied[7, 1] = self.forwardfootforce_y + if self.forwardfootforce_z is not None: + self.unwrapped.data.xfrc_applied[7, 2] = self.forwardfootforce_z + + def reset(self, *, seed: int | None = None, options: dict | None = None): + if options is not None: + self.set_params(**options) + obs, info = self.env.reset(seed=seed, options=options) + info.update(self.get_params()) + return obs, info + + def step(self, action): + obs, reward, terminated, truncated, info = self.env.step(action) + info.update(self.get_params()) + return obs, reward, terminated, truncated, info diff --git a/rrls/envs/hopper.py b/rrls/envs/hopper.py index 106cc48..476a665 100644 --- a/rrls/envs/hopper.py +++ b/rrls/envs/hopper.py @@ -1,6 +1,7 @@ from __future__ import annotations from enum import Enum +from typing import Any import gymnasium as gym from gymnasium import Wrapper @@ -19,6 +20,10 @@ class HopperParamsBound(Enum): "torsomass": [0.1, 3.0], "thighmass": [0.1, 4.0], } + RARL = { + "footforce_x": [-3.0, 3.0], + "footforce_y": [-3.0, 3.0], + } class RobustHopper(Wrapper): @@ -48,8 +53,9 @@ def __init__( thighmass: float | None = None, legmass: float | None = None, footmass: float | None = None, + **kwargs: dict[str, Any], ): - super().__init__(env=gym.make("Hopper-v5")) + super().__init__(env=gym.make("Hopper-v5", **kwargs)) self.set_params( worldfriction=worldfriction, @@ -113,85 +119,126 @@ def _change_params(self): self.unwrapped.model.body_mass[4] = self.footmass -# class RobustHopper(HopperEnv): -# """ -# Robust Hopper environment. You can change the parameters of the environment using options in -# the reset method or by using the set_params method. The parameters are changed by calling -# the change_params method. The parameters are: -# - worldfriction -# - torsomass -# - thighmass -# - legmass -# - footmass -# """ - -# def __init__( -# self, -# worldfriction: float | None = None, -# torsomass: float | None = None, -# thighmass: float | None = None, -# legmass: float | None = None, -# footmass: float | None = None, -# ): -# super().__init__() - -# self.set_params( -# worldfriction=worldfriction, -# torsomass=torsomass, -# thighmass=thighmass, -# legmass=legmass, -# footmass=footmass, -# ) -# self._change_params() - -# def set_params( -# self, -# worldfriction: float | None = None, -# torsomass: float | None = None, -# thighmass: float | None = None, -# legmass: float | None = None, -# footmass: float | None = None, -# ): -# self.worldfriction = worldfriction -# self.torsomass = torsomass -# self.thighmass = thighmass -# self.legmass = legmass -# self.footmass = footmass -# self._change_params() - -# def get_params(self): -# return { -# "worldfriction": self.worldfriction, -# "torsomass": self.torsomass, -# "thighmass": self.thighmass, -# "legmass": self.legmass, -# "footmass": self.footmass, -# } - -# def reset(self, *, seed: int | None = None, options: dict | None = None): -# if options is not None: -# self.set_params(**options) -# obs, info = super().reset(seed=seed, options=options) -# info.update(self.get_params()) -# return obs, info - -# def step(self, action): -# obs, reward, terminated, truncated, info = super().step(action) -# info.update(self.get_params()) -# return obs, reward, terminated, truncated, info - -# def _change_params(self): -# if self.worldfriction is not None: -# self.model.geom_friction[0, 0] = self.worldfriction - -# if self.torsomass is not None: -# self.model.body_mass[1] = self.torsomass - -# if self.thighmass is not None: -# self.model.body_mass[2] = self.thighmass - -# if self.legmass is not None: -# self.model.body_mass[3] = self.legmass - -# if self.footmass is not None: -# self.model.body_mass[4] = self.footmass +class ForceHopper(Wrapper): + """ + Force Hopper environment. You can apply forces to the environment using the set_params method. + The parameters are changed by calling the change_params method. The parameters are: + - torsoforce_x + - torsoforce_y + - torsoforce_z + - thighforce_x + - thighforce_y + - thighforce_z + - legforce_x + - legforce_y + - legforce_z + - footforce_x + - footforce_y + - footforce_z + """ + + metadata = { + "render_modes": [ + "human", + "rgb_array", + "depth_array", + ], + } + + def __init__(self, **kwargs: dict[str, Any]): + super().__init__(env=gym.make("Hopper-v5", **kwargs)) + self.set_params() + + def set_params( + self, + torsoforce_x: float | None = None, + torsoforce_y: float | None = None, + torsoforce_z: float | None = None, + thighforce_x: float | None = None, + thighforce_y: float | None = None, + thighforce_z: float | None = None, + legforce_x: float | None = None, + legforce_y: float | None = None, + legforce_z: float | None = None, + footforce_x: float | None = None, + footforce_y: float | None = None, + footforce_z: float | None = None, + ): + self.torsoforce_x = torsoforce_x + self.torsoforce_y = torsoforce_y + self.torsoforce_z = torsoforce_z + self.thighforce_x = thighforce_x + self.thighforce_y = thighforce_y + self.thighforce_z = thighforce_z + self.legforce_x = legforce_x + self.legforce_y = legforce_y + self.legforce_z = legforce_z + self.footforce_x = footforce_x + self.footforce_y = footforce_y + self.footforce_z = footforce_z + self._change_params() + + def get_params(self): + return { + "torsoforce_x": self.torsoforce_x, + "torsoforce_y": self.torsoforce_y, + "torsoforce_z": self.torsoforce_z, + "thighforce_x": self.thighforce_x, + "thighforce_y": self.thighforce_y, + "thighforce_z": self.thighforce_z, + "legforce_x": self.legforce_x, + "legforce_y": self.legforce_y, + "legforce_z": self.legforce_z, + "footforce_x": self.footforce_x, + "footforce_y": self.footforce_y, + "footforce_z": self.footforce_z, + } + + def _change_params(self): + if self.torsoforce_x is not None: + self.unwrapped.data.xfrc_applied[1, 0] = self.torsoforce_x + + if self.torsoforce_y is not None: + self.unwrapped.data.xfrc_applied[1, 1] = self.torsoforce_y + + if self.torsoforce_z is not None: + self.unwrapped.data.xfrc_applied[1, 2] = self.torsoforce_z + + if self.thighforce_x is not None: + self.unwrapped.data.xfrc_applied[2, 0] = self.thighforce_x + + if self.thighforce_y is not None: + self.unwrapped.data.xfrc_applied[2, 1] = self.thighforce_y + + if self.thighforce_z is not None: + self.unwrapped.data.xfrc_applied[2, 2] = self.thighforce_z + + if self.legforce_x is not None: + self.unwrapped.data.xfrc_applied[3, 0] = self.legforce_x + + if self.legforce_y is not None: + self.unwrapped.data.xfrc_applied[3, 1] = self.legforce_y + + if self.legforce_z is not None: + self.unwrapped.data.xfrc_applied[3, 2] = self.legforce_z + + if self.footforce_x is not None: + self.unwrapped.data.xfrc_applied[4, 0] = self.footforce_x + + if self.footforce_y is not None: + self.unwrapped.data.xfrc_applied[4, 1] = self.footforce_y + + if self.footforce_z is not None: + self.unwrapped.data.xfrc_applied[4, 2] = self.footforce_z + + def reset(self, *, seed: int | None = None, options: dict | None = None): + if options is not None: + self.set_params(**options) + obs, info = self.env.reset(seed=seed, options=options) + info.update(self.get_params()) + return obs, info + + def step(self, action): + obs, reward, terminated, truncated, info = self.env.step(action) + info.update(self.get_params()) + return obs, reward, terminated, truncated, info diff --git a/rrls/envs/humanoid.py b/rrls/envs/humanoid.py index b2a5b5e..c2d9a03 100644 --- a/rrls/envs/humanoid.py +++ b/rrls/envs/humanoid.py @@ -1,6 +1,7 @@ from __future__ import annotations from enum import Enum +from typing import Any import gymnasium as gym from gymnasium import Wrapper @@ -19,6 +20,14 @@ class HumanoidStandupParamsBound(Enum): "leftthighmass": [0.1, 5.0], "rightfootmass": [0.1, 8.0], } + RARL = { + "torsoforce_x": [-3.0, 3.0], + "torsoforce_y": [-3.0, 3.0], + "rightthighforce_x": [-3.0, 3.0], + "rightthighforce_y": [-3.0, 3.0], + "leftfootforce_x": [-3.0, 3.0], + "leftfootforce_y": [-3.0, 3.0], + } class RobustHumanoidStandUp(Wrapper): @@ -64,8 +73,9 @@ def __init__( rightlowerarmmass: float | None = None, leftupperarmmass: float | None = None, leftlowerarmmass: float | None = None, + **kwargs: dict[str, Any], ): - super().__init__(env=gym.make("HumanoidStandup-v5")) + super().__init__(env=gym.make("HumanoidStandup-v5", **kwargs)) self.set_params( torsomass=torsomass, @@ -184,3 +194,317 @@ def _change_params(self): if self.leftlowerarmmass is not None: self.unwrapped.model.body_mass[13] = self.leftlowerarmmass + + +class ForceHumanoidStandUp(Wrapper): + """ + Force HumanoidStandUp environment. You can apply forces to the environment using the set_params method. + The parameters are changed by calling the change_params method. The parameters are: + - torsoforce_x + - torsoforce_y + - torsoforce_z + - lwaisforce_x + - lwaisforce_y + - lwaisforce_z + - pelvisforce_x + - pelvisforce_y + - pelvisforce_z + - rightthighforce_x + - rightthighforce_y + - rightthighforce_z + - rightshinforce_x + - rightshinforce_y + - rightshinforce_z + - rightfootforce_x + - rightfootforce_y + - rightfootforce_z + - leftthighforce_x + - leftthighforce_y + - leftthighforce_z + - leftshinforce_x + - leftshinforce_y + - leftshinforce_z + - leftfootforce_x + - leftfootforce_y + - leftfootforce_z + - rightupperarmforce_x + - rightupperarmforce_y + - rightupperarmforce_z + - rightlowerarmforce_x + - rightlowerarmforce_y + - rightlowerarmforce_z + - leftupperarmforce_x + - leftupperarmforce_y + - leftupperarmforce_z + - leftlowerarmforce_x + - leftlowerarmforce_y + - leftlowerarmforce_z + """ + + metadata = { + "render_modes": [ + "human", + "rgb_array", + "depth_array", + ], + } + + def __init__(self, **kwargs: dict[str, Any]): + super().__init__(env=gym.make("HumanoidStandup-v5", **kwargs)) + self.set_params() + + def set_params( + self, + torsoforce_x: float | None = None, + torsoforce_y: float | None = None, + torsoforce_z: float | None = None, + lwaisforce_x: float | None = None, + lwaisforce_y: float | None = None, + lwaisforce_z: float | None = None, + pelvisforce_x: float | None = None, + pelvisforce_y: float | None = None, + pelvisforce_z: float | None = None, + rightthighforce_x: float | None = None, + rightthighforce_y: float | None = None, + rightthighforce_z: float | None = None, + rightshinforce_x: float | None = None, + rightshinforce_y: float | None = None, + rightshinforce_z: float | None = None, + rightfootforce_x: float | None = None, + rightfootforce_y: float | None = None, + rightfootforce_z: float | None = None, + leftthighforce_x: float | None = None, + leftthighforce_y: float | None = None, + leftthighforce_z: float | None = None, + leftshinforce_x: float | None = None, + leftshinforce_y: float | None = None, + leftshinforce_z: float | None = None, + leftfootforce_x: float | None = None, + leftfootforce_y: float | None = None, + leftfootforce_z: float | None = None, + rightupperarmforce_x: float | None = None, + rightupperarmforce_y: float | None = None, + rightupperarmforce_z: float | None = None, + rightlowerarmforce_x: float | None = None, + rightlowerarmforce_y: float | None = None, + rightlowerarmforce_z: float | None = None, + leftupperarmforce_x: float | None = None, + leftupperarmforce_y: float | None = None, + leftupperarmforce_z: float | None = None, + leftlowerarmforce_x: float | None = None, + leftlowerarmforce_y: float | None = None, + leftlowerarmforce_z: float | None = None, + ): + self.torsoforce_x = torsoforce_x + self.torsoforce_y = torsoforce_y + self.torsoforce_z = torsoforce_z + self.lwaisforce_x = lwaisforce_x + self.lwaisforce_y = lwaisforce_y + self.lwaisforce_z = lwaisforce_z + self.pelvisforce_x = pelvisforce_x + self.pelvisforce_y = pelvisforce_y + self.pelvisforce_z = pelvisforce_z + self.rightthighforce_x = rightthighforce_x + self.rightthighforce_y = rightthighforce_y + self.rightthighforce_z = rightthighforce_z + self.rightshinforce_x = rightshinforce_x + self.rightshinforce_y = rightshinforce_y + self.rightshinforce_z = rightshinforce_z + self.rightfootforce_x = rightfootforce_x + self.rightfootforce_y = rightfootforce_y + self.rightfootforce_z = rightfootforce_z + self.leftthighforce_x = leftthighforce_x + self.leftthighforce_y = leftthighforce_y + self.leftthighforce_z = leftthighforce_z + self.leftshinforce_x = leftshinforce_x + self.leftshinforce_y = leftshinforce_y + self.leftshinforce_z = leftshinforce_z + self.leftfootforce_x = leftfootforce_x + self.leftfootforce_y = leftfootforce_y + self.leftfootforce_z = leftfootforce_z + self.rightupperarmforce_x = rightupperarmforce_x + self.rightupperarmforce_y = rightupperarmforce_y + self.rightupperarmforce_z = rightupperarmforce_z + self.rightlowerarmforce_x = rightlowerarmforce_x + self.rightlowerarmforce_y = rightlowerarmforce_y + self.rightlowerarmforce_z = rightlowerarmforce_z + self.leftupperarmforce_x = leftupperarmforce_x + self.leftupperarmforce_y = leftupperarmforce_y + self.leftupperarmforce_z = leftupperarmforce_z + self.leftlowerarmforce_x = leftlowerarmforce_x + self.leftlowerarmforce_y = leftlowerarmforce_y + self.leftlowerarmforce_z = leftlowerarmforce_z + self._change_params() + + def get_params(self): + return { + "torsoforce_x": self.torsoforce_x, + "torsoforce_y": self.torsoforce_y, + "torsoforce_z": self.torsoforce_z, + "lwaisforce_x": self.lwaisforce_x, + "lwaisforce_y": self.lwaisforce_y, + "lwaisforce_z": self.lwaisforce_z, + "pelvisforce_x": self.pelvisforce_x, + "pelvisforce_y": self.pelvisforce_y, + "pelvisforce_z": self.pelvisforce_z, + "rightthighforce_x": self.rightthighforce_x, + "rightthighforce_y": self.rightthighforce_y, + "rightthighforce_z": self.rightthighforce_z, + "rightshinforce_x": self.rightshinforce_x, + "rightshinforce_y": self.rightshinforce_y, + "rightshinforce_z": self.rightshinforce_z, + "rightfootforce_x": self.rightfootforce_x, + "rightfootforce_y": self.rightfootforce_y, + "rightfootforce_z": self.rightfootforce_z, + "leftthighforce_x": self.leftthighforce_x, + "leftthighforce_y": self.leftthighforce_y, + "leftthighforce_z": self.leftthighforce_z, + "leftshinforce_x": self.leftshinforce_x, + "leftshinforce_y": self.leftshinforce_y, + "leftshinforce_z": self.leftshinforce_z, + "leftfootforce_x": self.leftfootforce_x, + "leftfootforce_y": self.leftfootforce_y, + "leftfootforce_z": self.leftfootforce_z, + "rightupperarmforce_x": self.rightupperarmforce_x, + "rightupperarmforce_y": self.rightupperarmforce_y, + "rightupperarmforce_z": self.rightupperarmforce_z, + "rightlowerarmforce_x": self.rightlowerarmforce_x, + "rightlowerarmforce_y": self.rightlowerarmforce_y, + "rightlowerarmforce_z": self.rightlowerarmforce_z, + "leftupperarmforce_x": self.leftupperarmforce_x, + "leftupperarmforce_y": self.leftupperarmforce_y, + "leftupperarmforce_z": self.leftupperarmforce_z, + "leftlowerarmforce_x": self.leftlowerarmforce_x, + "leftlowerarmforce_y": self.leftlowerarmforce_y, + "leftlowerarmforce_z": self.leftlowerarmforce_z, + } + + def _change_params(self): + if self.torsoforce_x is not None: + self.unwrapped.data.xfrc_applied[1][0] = self.torsoforce_x + + if self.torsoforce_y is not None: + self.unwrapped.data.xfrc_applied[1][1] = self.torsoforce_y + + if self.torsoforce_z is not None: + self.unwrapped.data.xfrc_applied[1][2] = self.torsoforce_z + + if self.lwaisforce_x is not None: + self.unwrapped.data.xfrc_applied[2][0] = self.lwaisforce_x + + if self.lwaisforce_y is not None: + self.unwrapped.data.xfrc_applied[2][1] = self.lwaisforce_y + + if self.lwaisforce_z is not None: + self.unwrapped.data.xfrc_applied[2][2] = self.lwaisforce_z + + if self.pelvisforce_x is not None: + self.unwrapped.data.xfrc_applied[3][0] = self.pelvisforce_x + + if self.pelvisforce_y is not None: + self.unwrapped.data.xfrc_applied[3][1] = self.pelvisforce_y + + if self.pelvisforce_z is not None: + self.unwrapped.data.xfrc_applied[3][2] = self.pelvisforce_z + + if self.rightthighforce_x is not None: + self.unwrapped.data.xfrc_applied[4][0] = self.rightthighforce_x + + if self.rightthighforce_y is not None: + self.unwrapped.data.xfrc_applied[4][1] = self.rightthighforce_y + + if self.rightthighforce_z is not None: + self.unwrapped.data.xfrc_applied[4][2] = self.rightthighforce_z + + if self.rightshinforce_x is not None: + self.unwrapped.data.xfrc_applied[5][0] = self.rightshinforce_x + + if self.rightshinforce_y is not None: + self.unwrapped.data.xfrc_applied[5][1] = self.rightshinforce_y + + if self.rightshinforce_z is not None: + self.unwrapped.data.xfrc_applied[5][2] = self.rightshinforce_z + + if self.rightfootforce_x is not None: + self.unwrapped.data.xfrc_applied[6][0] = self.rightfootforce_x + + if self.rightfootforce_y is not None: + self.unwrapped.data.xfrc_applied[6][1] = self.rightfootforce_y + + if self.rightfootforce_z is not None: + self.unwrapped.data.xfrc_applied[6][2] = self.rightfootforce_z + + if self.leftthighforce_x is not None: + self.unwrapped.data.xfrc_applied[7][0] = self.leftthighforce_x + + if self.leftthighforce_y is not None: + self.unwrapped.data.xfrc_applied[7][1] = self.leftthighforce_y + + if self.leftthighforce_z is not None: + self.unwrapped.data.xfrc_applied[7][2] = self.leftthighforce_z + + if self.leftshinforce_x is not None: + self.unwrapped.data.xfrc_applied[8][0] = self.leftshinforce_x + + if self.leftshinforce_y is not None: + self.unwrapped.data.xfrc_applied[8][1] = self.leftshinforce_y + + if self.leftshinforce_z is not None: + self.unwrapped.data.xfrc_applied[8][2] = self.leftshinforce_z + + if self.leftfootforce_x is not None: + self.unwrapped.data.xfrc_applied[9][0] = self.leftfootforce_x + + if self.leftfootforce_y is not None: + self.unwrapped.data.xfrc_applied[9][1] = self.leftfootforce_y + + if self.leftfootforce_z is not None: + self.unwrapped.data.xfrc_applied[9][2] = self.leftfootforce_z + + if self.rightupperarmforce_x is not None: + self.unwrapped.data.xfrc_applied[10][0] = self.rightupperarmforce_x + + if self.rightupperarmforce_y is not None: + self.unwrapped.data.xfrc_applied[10][1] = self.rightupperarmforce_y + + if self.rightupperarmforce_z is not None: + self.unwrapped.data.xfrc_applied[10][2] = self.rightupperarmforce_z + + if self.rightlowerarmforce_x is not None: + self.unwrapped.data.xfrc_applied[11][0] = self.rightlowerarmforce_x + + if self.rightlowerarmforce_y is not None: + self.unwrapped.data.xfrc_applied[11][1] = self.rightlowerarmforce_y + + if self.rightlowerarmforce_z is not None: + self.unwrapped.data.xfrc_applied[11][2] = self.rightlowerarmforce_z + + if self.leftupperarmforce_x is not None: + self.unwrapped.data.xfrc_applied[12][0] = self.leftupperarmforce_x + + if self.leftupperarmforce_y is not None: + self.unwrapped.data.xfrc_applied[12][1] = self.leftupperarmforce_y + + if self.leftupperarmforce_z is not None: + self.unwrapped.data.xfrc_applied[12][2] = self.leftupperarmforce_z + + if self.leftlowerarmforce_x is not None: + self.unwrapped.data.xfrc_applied[13][0] = self.leftlowerarmforce_x + + if self.leftlowerarmforce_y is not None: + self.unwrapped.data.xfrc_applied[13][1] = self.leftlowerarmforce_y + + if self.leftlowerarmforce_z is not None: + self.unwrapped.data.xfrc_applied[13][2] = self.leftlowerarmforce_z + + def reset(self, *, seed: int | None = None, options: dict | None = None): + if options is not None: + self.set_params(**options) + obs, info = self.env.reset(seed=seed, options=options) + info.update(self.get_params()) + return obs, info + + def step(self, action): + obs, reward, terminated, truncated, info = self.env.step(action) + info.update(self.get_params()) + return obs, reward, terminated, truncated, info diff --git a/rrls/envs/pendulum.py b/rrls/envs/pendulum.py index cc5441e..ca70f58 100644 --- a/rrls/envs/pendulum.py +++ b/rrls/envs/pendulum.py @@ -1,6 +1,7 @@ from __future__ import annotations from enum import Enum +from typing import Any import gymnasium as gym from gymnasium import Wrapper @@ -14,6 +15,10 @@ class InvertedPendulumParamsBound(Enum): "polemass": [1.0, 31.0], "cartmass": [1.0, 11.0], } + RARL = { + "poleforce_x": [-3.0, 3.0], + "poleforce_y": [-3.0, 3.0], + } class RobustInvertedPendulum(Wrapper): @@ -33,8 +38,13 @@ class RobustInvertedPendulum(Wrapper): ], } - def __init__(self, polemass: float | None = None, cartmass: float | None = None): - super().__init__(env=gym.make("InvertedPendulum-v5")) + def __init__( + self, + polemass: float | None = None, + cartmass: float | None = None, + **kwargs: dict[str, Any], + ): + super().__init__(env=gym.make("InvertedPendulum-v5", **kwargs)) self.set_params(polemass=polemass, cartmass=cartmass) def set_params(self, polemass: float | None = None, cartmass: float | None = None): @@ -65,3 +75,77 @@ def _change_params(self): self.unwrapped.model.body_mass[1] = self.cartmass if self.polemass is not None: self.unwrapped.model.body_mass[2] = self.polemass + + +class ForceInvertedPendulum(Wrapper): + """ + Force InvertedPendulum environment. You can apply forces to the environment using the set_params method. + The parameters are changed by calling the change_params method. The parameters are: + - poleforce + - cartforce + """ + + metadata = { + "render_modes": [ + "human", + "rgb_array", + "depth_array", + ], + } + + def __init__(self, **kwargs: dict[str, Any]): + super().__init__(env=gym.make("InvertedPendulum-v5", **kwargs)) + self.set_params() + + def set_params( + self, + poleforce_x: float | None = None, + poleforce_y: float | None = None, + poleforce_z: float | None = None, + cartforce_x: float | None = None, + cartforce_y: float | None = None, + cartforce_z: float | None = None, + ): + self.poleforce_x = poleforce_x + self.poleforce_y = poleforce_y + self.poleforce_z = poleforce_z + self.cartforce_x = cartforce_x + self.cartforce_y = cartforce_y + self.cartforce_z = cartforce_z + self._change_params() + + def get_params(self): + return { + "poleforce_x": self.poleforce_x, + "poleforce_y": self.poleforce_y, + "poleforce_z": self.poleforce_z, + "cartforce_x": self.cartforce_x, + "cartforce_y": self.cartforce_y, + "cartforce_z": self.cartforce_z, + } + + def _change_params(self): + if self.cartforce_x is not None: + self.unwrapped.data.xfrc_applied[1, 0] = self.cartforce_x + if self.cartforce_y is not None: + self.unwrapped.data.xfrc_applied[1, 1] = self.cartforce_y + if self.cartforce_z is not None: + self.unwrapped.data.xfrc_applied[1, 2] = self.cartforce_z + if self.poleforce_x is not None: + self.unwrapped.data.xfrc_applied[2, 0] = self.poleforce_x + if self.poleforce_y is not None: + self.unwrapped.data.xfrc_applied[2, 1] = self.poleforce_y + if self.poleforce_z is not None: + self.unwrapped.data.xfrc_applied[2, 2] = self.poleforce_z + + def reset(self, *, seed: int | None = None, options: dict | None = None): + if options is not None: + self.set_params(**options) + obs, info = self.env.reset(seed=seed, options=options) + info.update(self.get_params()) + return obs, info + + def step(self, action): + obs, reward, terminated, truncated, info = self.env.step(action) + info.update(self.get_params()) + return obs, reward, terminated, truncated, info diff --git a/rrls/envs/walker.py b/rrls/envs/walker.py index 0a927a0..1f9744d 100644 --- a/rrls/envs/walker.py +++ b/rrls/envs/walker.py @@ -1,6 +1,7 @@ from __future__ import annotations from enum import Enum +from typing import Any import gymnasium as gym from gymnasium import Wrapper @@ -19,6 +20,12 @@ class Walker2dParamsBound(Enum): "torsomass": [0.1, 5.0], "thighmass": [0.1, 6.0], } + RARL = { + "legforce_x": [-3.0, 3.0], + "legforce_y": [-3.0, 3.0], + "leftfootforce_x": [-3.0, 3.0], + "leftfootforce_y": [-3.0, 3.0], + } class RobustWalker2d(Wrapper): @@ -54,8 +61,9 @@ def __init__( leftthighmass: float | None = None, leftlegmass: float | None = None, leftfootmass: float | None = None, + **kwargs: dict[str, Any], ): - super().__init__(env=gym.make("Walker2d-v5")) + super().__init__(env=gym.make("Walker2d-v5", **kwargs)) self.set_params( worldfriction=worldfriction, torsomass=torsomass, @@ -137,3 +145,191 @@ def _change_params(self): if self.leftfootmass is not None: self.unwrapped.model.body_mass[7] = self.leftfootmass + + +class ForceWalker2d(Wrapper): + """ + Force Walker2d environment. You can apply forces to the environment using the set_params method. + The parameters are changed by calling the change_params method. The parameters are: + - torsoforce_x + - torsoforce_y + - torsoforce_z + - thighforce_x + - thighforce_y + - thighforce_z + - legforce_x + - legforce_y + - legforce_z + - footforce_x + - footforce_y + - footforce_z + - leftthighforce_x + - leftthighforce_y + - leftthighforce_z + - leftlegforce_x + - leftlegforce_y + - leftlegforce_z + - leftfootforce_x + - leftfootforce_y + - leftfootforce_z + """ + + metadata = { + "render_modes": [ + "human", + "rgb_array", + "depth_array", + ], + } + + def __init__(self, **kwargs: dict[str, Any]): + super().__init__(env=gym.make("Walker2d-v5", **kwargs)) + self.set_params() + + def set_params( + self, + torsoforce_x: float | None = None, + torsoforce_y: float | None = None, + torsoforce_z: float | None = None, + thighforce_x: float | None = None, + thighforce_y: float | None = None, + thighforce_z: float | None = None, + legforce_x: float | None = None, + legforce_y: float | None = None, + legforce_z: float | None = None, + footforce_x: float | None = None, + footforce_y: float | None = None, + footforce_z: float | None = None, + leftthighforce_x: float | None = None, + leftthighforce_y: float | None = None, + leftthighforce_z: float | None = None, + leftlegforce_x: float | None = None, + leftlegforce_y: float | None = None, + leftlegforce_z: float | None = None, + leftfootforce_x: float | None = None, + leftfootforce_y: float | None = None, + leftfootforce_z: float | None = None, + ): + self.torsoforce_x = torsoforce_x + self.torsoforce_y = torsoforce_y + self.torsoforce_z = torsoforce_z + self.thighforce_x = thighforce_x + self.thighforce_y = thighforce_y + self.thighforce_z = thighforce_z + self.legforce_x = legforce_x + self.legforce_y = legforce_y + self.legforce_z = legforce_z + self.footforce_x = footforce_x + self.footforce_y = footforce_y + self.footforce_z = footforce_z + self.leftthighforce_x = leftthighforce_x + self.leftthighforce_y = leftthighforce_y + self.leftthighforce_z = leftthighforce_z + self.leftlegforce_x = leftlegforce_x + self.leftlegforce_y = leftlegforce_y + self.leftlegforce_z = leftlegforce_z + self.leftfootforce_x = leftfootforce_x + self.leftfootforce_y = leftfootforce_y + self.leftfootforce_z = leftfootforce_z + self._change_params() + + def get_params(self): + return { + "torsoforce_x": self.torsoforce_x, + "torsoforce_y": self.torsoforce_y, + "torsoforce_z": self.torsoforce_z, + "thighforce_x": self.thighforce_x, + "thighforce_y": self.thighforce_y, + "thighforce_z": self.thighforce_z, + "legforce_x": self.legforce_x, + "legforce_y": self.legforce_y, + "legforce_z": self.legforce_z, + "footforce_x": self.footforce_x, + "footforce_y": self.footforce_y, + "footforce_z": self.footforce_z, + "leftthighforce_x": self.leftthighforce_x, + "leftthighforce_y": self.leftthighforce_y, + "leftthighforce_z": self.leftthighforce_z, + "leftlegforce_x": self.leftlegforce_x, + "leftlegforce_y": self.leftlegforce_y, + "leftlegforce_z": self.leftlegforce_z, + "leftfootforce_x": self.leftfootforce_x, + "leftfootforce_y": self.leftfootforce_y, + "leftfootforce_z": self.leftfootforce_z, + } + + def _change_params(self): + if self.torsoforce_x is not None: + self.unwrapped.data.xfrc_applied[1, 0] = self.torsoforce_x + + if self.torsoforce_y is not None: + self.unwrapped.data.xfrc_applied[1, 1] = self.torsoforce_y + + if self.torsoforce_z is not None: + self.unwrapped.data.xfrc_applied[1, 2] = self.torsoforce_z + + if self.thighforce_x is not None: + self.unwrapped.data.xfrc_applied[2, 0] = self.thighforce_x + + if self.thighforce_y is not None: + self.unwrapped.data.xfrc_applied[2, 1] = self.thighforce_y + + if self.thighforce_z is not None: + self.unwrapped.data.xfrc_applied[2, 2] = self.thighforce_z + + if self.legforce_x is not None: + self.unwrapped.data.xfrc_applied[3, 0] = self.legforce_x + + if self.legforce_y is not None: + self.unwrapped.data.xfrc_applied[3, 1] = self.legforce_y + + if self.legforce_z is not None: + self.unwrapped.data.xfrc_applied[3, 2] = self.legforce_z + + if self.footforce_x is not None: + self.unwrapped.data.xfrc_applied[4, 0] = self.footforce_x + + if self.footforce_y is not None: + self.unwrapped.data.xfrc_applied[4, 1] = self.footforce_y + + if self.footforce_z is not None: + self.unwrapped.data.xfrc_applied[4, 2] = self.footforce_z + + if self.leftthighforce_x is not None: + self.unwrapped.data.xfrc_applied[5, 0] = self.leftthighforce_x + + if self.leftthighforce_y is not None: + self.unwrapped.data.xfrc_applied[5, 1] = self.leftthighforce_y + + if self.leftthighforce_z is not None: + self.unwrapped.data.xfrc_applied[5, 2] = self.leftthighforce_z + + if self.leftlegforce_x is not None: + self.unwrapped.data.xfrc_applied[6, 0] = self.leftlegforce_x + + if self.leftlegforce_y is not None: + self.unwrapped.data.xfrc_applied[6, 1] = self.leftlegforce_y + + if self.leftlegforce_z is not None: + self.unwrapped.data.xfrc_applied[6, 2] = self.leftlegforce_z + + if self.leftfootforce_x is not None: + self.unwrapped.data.xfrc_applied[7, 0] = self.leftfootforce_x + + if self.leftfootforce_y is not None: + self.unwrapped.data.xfrc_applied[7, 1] = self.leftfootforce_y + + if self.leftfootforce_z is not None: + self.unwrapped.data.xfrc_applied[7, 2] = self.leftfootforce_z + + def reset(self, *, seed: int | None = None, options: dict | None = None): + if options is not None: + self.set_params(**options) + obs, info = self.env.reset(seed=seed, options=options) + info.update(self.get_params()) + return obs, info + + def step(self, action): + obs, reward, terminated, truncated, info = self.env.step(action) + info.update(self.get_params()) + return obs, reward, terminated, truncated, info diff --git a/test/test_copy.py b/test/test_copy.py index d8814cf..ff6695f 100644 --- a/test/test_copy.py +++ b/test/test_copy.py @@ -15,6 +15,12 @@ gym.make("rrls/robust-invertedpendulum-v0"), gym.make("rrls/robust-humanoidstandup-v0"), gym.make("rrls/robust-walker-v0"), + gym.make("rrls/force-ant-v0"), + gym.make("rrls/force-halfcheetah-v0"), + gym.make("rrls/force-hopper-v0"), + gym.make("rrls/force-invertedpendulum-v0"), + gym.make("rrls/force-humanoidstandup-v0"), + gym.make("rrls/force-walker-v0"), ] diff --git a/test/test_effective_parameters.py b/test/test_effective_parameters.py index 7c5342d..3c58a4c 100644 --- a/test/test_effective_parameters.py +++ b/test/test_effective_parameters.py @@ -16,6 +16,12 @@ gym.make("rrls/robust-invertedpendulum-v0"), gym.make("rrls/robust-humanoidstandup-v0"), gym.make("rrls/robust-walker-v0"), + gym.make("rrls/force-ant-v0"), + gym.make("rrls/force-halfcheetah-v0"), + gym.make("rrls/force-hopper-v0"), + gym.make("rrls/force-invertedpendulum-v0"), + gym.make("rrls/force-humanoidstandup-v0"), + gym.make("rrls/force-walker-v0"), ] bounds = [ @@ -25,6 +31,12 @@ rrls.envs.InvertedPendulumParamsBound.TWO_DIM.value, rrls.envs.HumanoidStandupParamsBound.THREE_DIM.value, rrls.envs.Walker2dParamsBound.THREE_DIM.value, + rrls.envs.AntParamsBound.RARL.value, + rrls.envs.HalfCheetahParamsBound.RARL.value, + rrls.envs.HopperParamsBound.RARL.value, + rrls.envs.InvertedPendulumParamsBound.RARL.value, + rrls.envs.HumanoidStandupParamsBound.RARL.value, + rrls.envs.Walker2dParamsBound.RARL.value, ] envs_and_bounds = zip(envs, bounds)