diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 00000000..996c690e --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,45 @@ +# Description + +Please include a summary of the change and which issue is fixed. Please also include relevant motivation and context. List any dependencies that are required for this change. + +Fixes # (issue) + +## Type of change + +Please delete options that are not relevant. + +- [ ] Bug fix (non-breaking change which fixes an issue) +- [ ] New feature (non-breaking change which adds functionality) +- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) +- [ ] This change requires a documentation update + +### Screenshots +Please attach before and after screenshots of the change if applicable. + + + +# Checklist: + +- [ ] I have run the [`pre-commit` checks](https://pre-commit.com/) with `pre-commit run --all-files` (see `CONTRIBUTING.md` instructions to set it up) +- [ ] I have commented my code, particularly in hard-to-understand areas +- [ ] I have made corresponding changes to the documentation +- [ ] My changes generate no new warnings +- [ ] I have added tests that prove my fix is effective or that my feature works +- [ ] New and existing unit tests pass locally with my changes + + diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c169e25a..beb160e3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,6 +12,6 @@ jobs: - run: | docker build -f py.Dockerfile \ --build-arg PYTHON_VERSION=${{ matrix.python-version }} \ - --tag gym-docker . + --tag gym-robotics-docker . - name: Run tests - run: docker run gym-docker pytest --forked --import-mode=append + run: docker run gym-robotics-docker pytest diff --git a/.github/workflows/lint_python.yml b/.github/workflows/lint_python.yml deleted file mode 100644 index 06768f67..00000000 --- a/.github/workflows/lint_python.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: lint_python -on: [pull_request, push] -jobs: - lint_python: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 - - run: pip install isort mypy pytest pyupgrade safety - - run: isort --check-only --profile black . || true - - run: pip install -e . - - run: mypy --install-types --non-interactive . || true - - run: pytest . || true - - run: pytest --doctest-modules . || true - - run: shopt -s globstar && pyupgrade --py36-plus **/*.py || true - - pyright: - name: Check types with pyright - runs-on: ubuntu-latest - strategy: - matrix: - python-platform: [ "Linux" ] - python-version: [ "3.7"] - fail-fast: false - env: - PYRIGHT_VERSION: 1.1.183 - steps: - - uses: actions/checkout@v2 - - uses: jakebailey/pyright-action@v1 - with: - version: ${{ env.PYRIGHT_VERSION }} - python-platform: ${{ matrix.python-platform }} - python-version: ${{ matrix.python-version }} - no-comments: ${{ matrix.python-version != '3.9' || matrix.python-platform != 'Linux' }} # Having each job create the same comment is too noisy. diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a5c7cf61..8670f8d6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,15 +1,7 @@ +# exclude: '^gym_robotics/' repos: - - repo: https://github.com/PyCQA/bandit/ - rev: 1.7.0 - hooks: - - id: bandit - args: - - --recursive - - --skip - - B101,B108,B301,B403,B404,B603 - - . - repo: https://github.com/python/black - rev: 21.7b0 + rev: 22.3.0 hooks: - id: black - repo: https://github.com/codespell-project/codespell @@ -19,12 +11,37 @@ repos: args: - --ignore-words-list=nd,reacher,thist,ths - repo: https://gitlab.com/pycqa/flake8 - rev: 3.9.2 + rev: 4.0.1 hooks: - id: flake8 args: - - --ignore=E203,E402,E712,E722,E731,E741,F401,F403,F405,F524,F841,W503 + - '--per-file-ignores=*/__init__.py:F401' + - --ignore=E203,W503 - --max-complexity=30 - --max-line-length=456 - --show-source - --statistics + - repo: https://github.com/PyCQA/isort + rev: 5.10.1 + hooks: + - id: isort + args: ["--profile", "black"] + - repo: https://github.com/asottile/pyupgrade + rev: v2.32.0 + hooks: + - id: pyupgrade + args: ["--py37-plus"] + - repo: local + hooks: + - id: pyright + exclude: ^gym_robotics/_version.py + name: pyright + entry: pyright + language: node + pass_filenames: false + types: [python] + additional_dependencies: ["pyright"] + args: + - --project=pyproject.toml + + diff --git a/CODE_OF_CONDUCT.rst b/CODE_OF_CONDUCT.rst index e208081c..ae6a17a5 100644 --- a/CODE_OF_CONDUCT.rst +++ b/CODE_OF_CONDUCT.rst @@ -1,12 +1,12 @@ -OpenAI Gym is dedicated to providing a harassment-free experience for +Farama Foundation is dedicated to providing a harassment-free experience for everyone, regardless of gender, gender identity and expression, sexual orientation, disability, physical appearance, body size, age, race, or religion. We do not tolerate harassment of participants in any form. -This code of conduct applies to all OpenAI Gym spaces (including Gist +This code of conduct applies to all Farama Foundation spaces (including Gist comments) both online and off. Anyone who violates this code of conduct may be sanctioned or expelled from these spaces at the -discretion of the OpenAI team. +discretion of the Farama Foundation team. We may add additional rules over time, which will be made clearly available to participants. Participants are responsible for knowing diff --git a/README.md b/README.md index 8224f928..d23d9433 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,21 @@ +[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://pre-commit.com/) +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)

+# Gym-Robotics Details and documentation on these robotics environments are available in OpenAI's [blog post](https://blog.openai.com/ingredients-for-robotics-research/) and the accompanying [technical report](https://arxiv.org/abs/1802.09464). +Requirements: +- Python 3.7 to 3.10 +- Gym v0.26 +- NumPy 1.18+ +- Mujoco 2.2.2 + If you use these environments, please cite the following paper: -``` +```bibtex @misc{1802.09464, Author = {Matthias Plappert and Marcin Andrychowicz and Alex Ray and Bob McGrew and Bowen Baker and Glenn Powell and Jonas Schneider and Josh Tobin and Maciek Chociej and Peter Welinder and Vikash Kumar and Wojciech Zaremba}, Title = {Multi-Goal Reinforcement Learning: Challenging Robotics Environments and Request for Research}, @@ -15,42 +24,71 @@ If you use these environments, please cite the following paper: } ``` +## New MuJoCo Python Bindings + +The latest version and future versions of the MuJoCo environments will no longer depend on `mujoco-py`. Instead the new [mujoco](https://mujoco.readthedocs.io/en/latest/python.html) python bindings will be the required dependency for future gym MuJoCo environment versions. Old gym MuJoCo environment versions that depend on `mujoco-py` will still be kept but unmaintained. +Dependencies for old MuJoCo environments can still be installed by `pip install gym_robotics[mujoco_py]`. + ## Fetch environments -[FetchReach-v0](https://gym.openai.com/envs/FetchReach-v0/): Fetch has to move its end-effector to the desired goal position. +[FetchReach-v3](https://gym.openai.com/envs/FetchReach-v0/): Fetch has to move its end-effector to the desired goal position. -[FetchSlide-v0](https://gym.openai.com/envs/FetchSlide-v0/): Fetch has to hit a puck across a long table such that it slides and comes to rest on the desired goal. +[FetchSlide-v2](https://gym.openai.com/envs/FetchSlide-v0/): Fetch has to hit a puck across a long table such that it slides and comes to rest on the desired goal. -[FetchPush-v0](https://gym.openai.com/envs/FetchPush-v0/): Fetch has to move a box by pushing it until it reaches a desired goal position. +[FetchPush-v2](https://gym.openai.com/envs/FetchPush-v0/): Fetch has to move a box by pushing it until it reaches a desired goal position. -[FetchPickAndPlace-v0](https://gym.openai.com/envs/FetchPickAndPlace-v0/): Fetch has to pick up a box from a table using its gripper and move it to a desired goal above the table. +[FetchPickAndPlace-v2](https://gym.openai.com/envs/FetchPickAndPlace-v0/): Fetch has to pick up a box from a table using its gripper and move it to a desired goal above the table. ## Shadow Dexterous Hand environments -[HandReach-v0](https://gym.openai.com/envs/HandReach-v0/): ShadowHand has to reach with its thumb and a selected finger until they meet at a desired goal position above the palm. +[HandReach-v1](https://gym.openai.com/envs/HandReach-v0/): ShadowHand has to reach with its thumb and a selected finger until they meet at a desired goal position above the palm. -[HandManipulateBlock-v0](https://gym.openai.com/envs/HandManipulateBlock-v0/): ShadowHand has to manipulate a block until it achieves a desired goal position and rotation. +[HandManipulateBlock-v1](https://gym.openai.com/envs/HandManipulateBlock-v0/): ShadowHand has to manipulate a block until it achieves a desired goal position and rotation. -[HandManipulateEgg-v0](https://gym.openai.com/envs/HandManipulateEgg-v0/): ShadowHand has to manipulate an egg until it achieves a desired goal position and rotation. +[HandManipulateEgg-v1](https://gym.openai.com/envs/HandManipulateEgg-v0/): ShadowHand has to manipulate an egg until it achieves a desired goal position and rotation. -[HandManipulatePen-v0](https://gym.openai.com/envs/HandManipulatePen-v0/): ShadowHand has to manipulate a pen until it achieves a desired goal position and rotation. +[HandManipulatePen-v1](https://gym.openai.com/envs/HandManipulatePen-v0/): ShadowHand has to manipulate a pen until it achieves a desired goal position and rotation. + +# Hand environments with Touch Sensors + +Touch sensor observations are also available in all Hand environments, with exception of `HandReach`. These environments add to the palm of the hand and the phalanges of the fingers two types of touch sensors depending on the environment. These touch sensors are: +- **Boolean Touch Sensor**: the observations of each touch sensor can return a value of `0` if no contact is detected with and object, and `1` otherwise. +- **Continuous Touch Sensor**: the value returned by each touch sensor is a continuous value that represents the external force made by an object over the sensor. + +These environments are instanceated by adding the following strings to the Hand environment id's: `_BooleanTouchSensor` or `_ContinuousTouchSensor`. For example, to add boolean touch sensors to `HandManipulateBlock-v1`, make the environment in the following way: +``` +env = gym.make('HandManipulateBlock_BooleanTouchSensor-v1') +``` + +If using these environments please also cite the following paper: + +```bibtex +@article{melnik2021using, + title={Using tactile sensing to improve the sample efficiency and performance of deep deterministic policy gradients for simulated in-hand manipulation tasks}, + author={Melnik, Andrew and Lach, Luca and Plappert, Matthias and Korthals, Timo and Haschke, Robert and Ritter, Helge}, + journal={Frontiers in Robotics and AI}, + pages={57}, + year={2021}, + publisher={Frontiers} +} +``` diff --git a/gym_robotics/__init__.py b/gym_robotics/__init__.py index 9f6b6e37..0b392a8b 100644 --- a/gym_robotics/__init__.py +++ b/gym_robotics/__init__.py @@ -2,6 +2,10 @@ from gym_robotics.core import GoalEnv +from . import _version + +__version__ = _version.get_versions()["version"] + def register_robotics_envs(): def _merge(a, b): @@ -17,28 +21,63 @@ def _merge(a, b): # Fetch register( id=f"FetchSlide{suffix}-v1", - entry_point="gym_robotics.envs:FetchSlideEnv", + entry_point="gym_robotics.envs:MujocoPyFetchSlideEnv", + kwargs=kwargs, + max_episode_steps=50, + ) + + register( + id=f"FetchSlide{suffix}-v2", + entry_point="gym_robotics.envs:MujocoFetchSlideEnv", kwargs=kwargs, max_episode_steps=50, ) register( id=f"FetchPickAndPlace{suffix}-v1", - entry_point="gym_robotics.envs:FetchPickAndPlaceEnv", + entry_point="gym_robotics.envs:MujocoPyFetchPickAndPlaceEnv", + kwargs=kwargs, + max_episode_steps=50, + ) + + register( + id=f"FetchPickAndPlace{suffix}-v2", + entry_point="gym_robotics.envs:MujocoFetchPickAndPlaceEnv", kwargs=kwargs, max_episode_steps=50, ) register( id=f"FetchReach{suffix}-v1", - entry_point="gym_robotics.envs:FetchReachEnv", + entry_point="gym_robotics.envs:MujocoPyFetchReachEnv", + kwargs=kwargs, + max_episode_steps=50, + ) + + register( + id=f"FetchReach{suffix}-v2", + entry_point="gym_robotics.envs:MujocoPyFetchReachEnv", + kwargs=kwargs, + max_episode_steps=50, + ) + + register( + id=f"FetchReach{suffix}-v3", + entry_point="gym_robotics.envs:MujocoFetchReachEnv", kwargs=kwargs, max_episode_steps=50, ) register( id=f"FetchPush{suffix}-v1", - entry_point="gym_robotics.envs:FetchPushEnv", + entry_point="gym_robotics.envs:MujocoPyFetchPushEnv", + kwargs=kwargs, + max_episode_steps=50, + ) + + register( + id=f"FetchPush{suffix}-v2", + entry_point="gym_robotics.envs:MujocoFetchPushEnv", kwargs=kwargs, max_episode_steps=50, ) @@ -46,23 +85,74 @@ def _merge(a, b): # Hand register( id=f"HandReach{suffix}-v0", - entry_point="gym_robotics.envs:HandReachEnv", + entry_point="gym_robotics.envs:MujocoPyHandReachEnv", + kwargs=kwargs, + max_episode_steps=50, + ) + + register( + id=f"HandReach{suffix}-v1", + entry_point="gym_robotics.envs:MujocoHandReachEnv", kwargs=kwargs, max_episode_steps=50, ) register( id=f"HandManipulateBlockRotateZ{suffix}-v0", - entry_point="gym_robotics.envs:HandBlockEnv", + entry_point="gym_robotics.envs:MujocoPyHandBlockEnv", + kwargs=_merge( + { + "target_position": "ignore", + "target_rotation": "z", + }, + kwargs, + ), + max_episode_steps=100, + ) + + register( + id=f"HandManipulateBlockRotateZ{suffix}-v1", + entry_point="gym_robotics.envs:MujocoHandBlockEnv", + kwargs=_merge( + { + "target_position": "ignore", + "target_rotation": "z", + }, + kwargs, + ), + max_episode_steps=100, + ) + + register( + id=f"HandManipulateBlockRotateZ_BooleanTouchSensors{suffix}-v0", + entry_point="gym_robotics.envs:MujocoPyHandBlockTouchSensorsEnv", + kwargs=_merge( + { + "target_position": "ignore", + "target_rotation": "z", + "touch_get_obs": "boolean", + }, + kwargs, + ), + max_episode_steps=100, + ) + + register( + id=f"HandManipulateBlockRotateZ{suffix}-v1", + entry_point="gym_robotics.envs:MujocoHandBlockEnv", kwargs=_merge( - {"target_position": "ignore", "target_rotation": "z"}, kwargs + { + "target_position": "ignore", + "target_rotation": "z", + }, + kwargs, ), max_episode_steps=100, ) register( - id=f"HandManipulateBlockRotateZTouchSensors{suffix}-v0", - entry_point="gym_robotics.envs:HandBlockTouchSensorsEnv", + id=f"HandManipulateBlockRotateZ_BooleanTouchSensors{suffix}-v1", + entry_point="gym_robotics.envs:MujocoHandBlockTouchSensorsEnv", kwargs=_merge( { "target_position": "ignore", @@ -75,8 +165,22 @@ def _merge(a, b): ) register( - id=f"HandManipulateBlockRotateZTouchSensors{suffix}-v1", - entry_point="gym_robotics.envs:HandBlockTouchSensorsEnv", + id=f"HandManipulateBlockRotateZ_ContinuousTouchSensors{suffix}-v0", + entry_point="gym_robotics.envs:MujocoPyHandBlockTouchSensorsEnv", + kwargs=_merge( + { + "target_position": "ignore", + "target_rotation": "z", + "touch_get_obs": "sensordata", + }, + kwargs, + ), + max_episode_steps=100, + ) + + register( + id=f"HandManipulateBlockRotateZ_ContinuousTouchSensors{suffix}-v1", + entry_point="gym_robotics.envs:MujocoHandBlockTouchSensorsEnv", kwargs=_merge( { "target_position": "ignore", @@ -90,16 +194,47 @@ def _merge(a, b): register( id=f"HandManipulateBlockRotateParallel{suffix}-v0", - entry_point="gym_robotics.envs:HandBlockEnv", + entry_point="gym_robotics.envs:MujocoPyHandBlockEnv", + kwargs=_merge( + { + "target_position": "ignore", + "target_rotation": "parallel", + }, + kwargs, + ), + max_episode_steps=100, + ) + + register( + id=f"HandManipulateBlockRotateParallel{suffix}-v1", + entry_point="gym_robotics.envs:MujocoHandBlockEnv", + kwargs=_merge( + { + "target_position": "ignore", + "target_rotation": "parallel", + }, + kwargs, + ), + max_episode_steps=100, + ) + + register( + id=f"HandManipulateBlockRotateParallel_BooleanTouchSensors{suffix}-v0", + entry_point="gym_robotics.envs:MujocoPyHandBlockTouchSensorsEnv", kwargs=_merge( - {"target_position": "ignore", "target_rotation": "parallel"}, kwargs + { + "target_position": "ignore", + "target_rotation": "parallel", + "touch_get_obs": "boolean", + }, + kwargs, ), max_episode_steps=100, ) register( - id=f"HandManipulateBlockRotateParallelTouchSensors{suffix}-v0", - entry_point="gym_robotics.envs:HandBlockTouchSensorsEnv", + id=f"HandManipulateBlockRotateParallel_BooleanTouchSensors{suffix}-v1", + entry_point="gym_robotics.envs:MujocoHandBlockTouchSensorsEnv", kwargs=_merge( { "target_position": "ignore", @@ -112,8 +247,22 @@ def _merge(a, b): ) register( - id=f"HandManipulateBlockRotateParallelTouchSensors{suffix}-v1", - entry_point="gym_robotics.envs:HandBlockTouchSensorsEnv", + id=f"HandManipulateBlockRotateParallel_ContinuousTouchSensors{suffix}-v0", + entry_point="gym_robotics.envs:MujocoPyHandBlockTouchSensorsEnv", + kwargs=_merge( + { + "target_position": "ignore", + "target_rotation": "parallel", + "touch_get_obs": "sensordata", + }, + kwargs, + ), + max_episode_steps=100, + ) + + register( + id=f"HandManipulateBlockRotateParallel_ContinuousTouchSensors{suffix}-v1", + entry_point="gym_robotics.envs:MujocoHandBlockTouchSensorsEnv", kwargs=_merge( { "target_position": "ignore", @@ -127,16 +276,47 @@ def _merge(a, b): register( id=f"HandManipulateBlockRotateXYZ{suffix}-v0", - entry_point="gym_robotics.envs:HandBlockEnv", + entry_point="gym_robotics.envs:MujocoPyHandBlockEnv", + kwargs=_merge( + { + "target_position": "ignore", + "target_rotation": "xyz", + }, + kwargs, + ), + max_episode_steps=100, + ) + + register( + id=f"HandManipulateBlockRotateXYZ{suffix}-v1", + entry_point="gym_robotics.envs:MujocoHandBlockEnv", + kwargs=_merge( + { + "target_position": "ignore", + "target_rotation": "xyz", + }, + kwargs, + ), + max_episode_steps=100, + ) + + register( + id=f"HandManipulateBlockRotateXYZ_BooleanTouchSensors{suffix}-v0", + entry_point="gym_robotics.envs:MujocoPyHandBlockTouchSensorsEnv", kwargs=_merge( - {"target_position": "ignore", "target_rotation": "xyz"}, kwargs + { + "target_position": "ignore", + "target_rotation": "xyz", + "touch_get_obs": "boolean", + }, + kwargs, ), max_episode_steps=100, ) register( - id=f"HandManipulateBlockRotateXYZTouchSensors{suffix}-v0", - entry_point="gym_robotics.envs:HandBlockTouchSensorsEnv", + id=f"HandManipulateBlockRotateXYZ_BooleanTouchSensors{suffix}-v1", + entry_point="gym_robotics.envs:MujocoHandBlockTouchSensorsEnv", kwargs=_merge( { "target_position": "ignore", @@ -149,8 +329,22 @@ def _merge(a, b): ) register( - id=f"HandManipulateBlockRotateXYZTouchSensors{suffix}-v1", - entry_point="gym_robotics.envs:HandBlockTouchSensorsEnv", + id=f"HandManipulateBlockRotateXYZ_ContinuousTouchSensors{suffix}-v0", + entry_point="gym_robotics.envs:MujocoPyHandBlockTouchSensorsEnv", + kwargs=_merge( + { + "target_position": "ignore", + "target_rotation": "xyz", + "touch_get_obs": "sensordata", + }, + kwargs, + ), + max_episode_steps=100, + ) + + register( + id=f"HandManipulateBlockRotateXYZ_ContinuousTouchSensors{suffix}-v1", + entry_point="gym_robotics.envs:MujocoHandBlockTouchSensorsEnv", kwargs=_merge( { "target_position": "ignore", @@ -164,9 +358,26 @@ def _merge(a, b): register( id=f"HandManipulateBlockFull{suffix}-v0", - entry_point="gym_robotics.envs:HandBlockEnv", + entry_point="gym_robotics.envs:MujocoPyHandBlockEnv", + kwargs=_merge( + { + "target_position": "random", + "target_rotation": "xyz", + }, + kwargs, + ), + max_episode_steps=100, + ) + + register( + id=f"HandManipulateBlockFull{suffix}-v1", + entry_point="gym_robotics.envs:MujocoHandBlockEnv", kwargs=_merge( - {"target_position": "random", "target_rotation": "xyz"}, kwargs + { + "target_position": "random", + "target_rotation": "xyz", + }, + kwargs, ), max_episode_steps=100, ) @@ -174,16 +385,33 @@ def _merge(a, b): # Alias for "Full" register( id=f"HandManipulateBlock{suffix}-v0", - entry_point="gym_robotics.envs:HandBlockEnv", + entry_point="gym_robotics.envs:MujocoPyHandBlockEnv", + kwargs=_merge( + { + "target_position": "random", + "target_rotation": "xyz", + }, + kwargs, + ), + max_episode_steps=100, + ) + + register( + id=f"HandManipulateBlock{suffix}-v1", + entry_point="gym_robotics.envs:MujocoHandBlockEnv", kwargs=_merge( - {"target_position": "random", "target_rotation": "xyz"}, kwargs + { + "target_position": "random", + "target_rotation": "xyz", + }, + kwargs, ), max_episode_steps=100, ) register( - id=f"HandManipulateBlockTouchSensors{suffix}-v0", - entry_point="gym_robotics.envs:HandBlockTouchSensorsEnv", + id=f"HandManipulateBlock_BooleanTouchSensors{suffix}-v0", + entry_point="gym_robotics.envs:MujocoPyHandBlockTouchSensorsEnv", kwargs=_merge( { "target_position": "random", @@ -196,8 +424,36 @@ def _merge(a, b): ) register( - id=f"HandManipulateBlockTouchSensors{suffix}-v1", - entry_point="gym_robotics.envs:HandBlockTouchSensorsEnv", + id=f"HandManipulateBlock_BooleanTouchSensors{suffix}-v1", + entry_point="gym_robotics.envs:MujocoHandBlockTouchSensorsEnv", + kwargs=_merge( + { + "target_position": "random", + "target_rotation": "xyz", + "touch_get_obs": "boolean", + }, + kwargs, + ), + max_episode_steps=100, + ) + + register( + id=f"HandManipulateBlock_ContinuousTouchSensors{suffix}-v0", + entry_point="gym_robotics.envs:MujocoPyHandBlockTouchSensorsEnv", + kwargs=_merge( + { + "target_position": "random", + "target_rotation": "xyz", + "touch_get_obs": "boolean", + }, + kwargs, + ), + max_episode_steps=100, + ) + + register( + id=f"HandManipulateBlock_ContinuousTouchSensors{suffix}-v1", + entry_point="gym_robotics.envs:MujocoHandBlockTouchSensorsEnv", kwargs=_merge( { "target_position": "random", @@ -211,16 +467,47 @@ def _merge(a, b): register( id=f"HandManipulateEggRotate{suffix}-v0", - entry_point="gym_robotics.envs:HandEggEnv", + entry_point="gym_robotics.envs:MujocoPyHandEggEnv", + kwargs=_merge( + { + "target_position": "ignore", + "target_rotation": "xyz", + }, + kwargs, + ), + max_episode_steps=100, + ) + + register( + id=f"HandManipulateEggRotate{suffix}-v1", + entry_point="gym_robotics.envs:MujocoHandEggEnv", + kwargs=_merge( + { + "target_position": "ignore", + "target_rotation": "xyz", + }, + kwargs, + ), + max_episode_steps=100, + ) + + register( + id=f"HandManipulateEggRotate_BooleanTouchSensors{suffix}-v0", + entry_point="gym_robotics.envs:MujocoPyHandEggTouchSensorsEnv", kwargs=_merge( - {"target_position": "ignore", "target_rotation": "xyz"}, kwargs + { + "target_position": "ignore", + "target_rotation": "xyz", + "touch_get_obs": "boolean", + }, + kwargs, ), max_episode_steps=100, ) register( - id=f"HandManipulateEggRotateTouchSensors{suffix}-v0", - entry_point="gym_robotics.envs:HandEggTouchSensorsEnv", + id=f"HandManipulateEggRotate_BooleanTouchSensors{suffix}-v1", + entry_point="gym_robotics.envs:MujocoHandEggTouchSensorsEnv", kwargs=_merge( { "target_position": "ignore", @@ -233,8 +520,22 @@ def _merge(a, b): ) register( - id=f"HandManipulateEggRotateTouchSensors{suffix}-v1", - entry_point="gym_robotics.envs:HandEggTouchSensorsEnv", + id=f"HandManipulateEggRotate_ContinuousTouchSensors{suffix}-v0", + entry_point="gym_robotics.envs:MujocoPyHandEggTouchSensorsEnv", + kwargs=_merge( + { + "target_position": "ignore", + "target_rotation": "xyz", + "touch_get_obs": "sensordata", + }, + kwargs, + ), + max_episode_steps=100, + ) + + register( + id=f"HandManipulateEggRotate_ContinuousTouchSensors{suffix}-v1", + entry_point="gym_robotics.envs:MujocoHandEggTouchSensorsEnv", kwargs=_merge( { "target_position": "ignore", @@ -248,9 +549,26 @@ def _merge(a, b): register( id=f"HandManipulateEggFull{suffix}-v0", - entry_point="gym_robotics.envs:HandEggEnv", + entry_point="gym_robotics.envs:MujocoPyHandEggEnv", kwargs=_merge( - {"target_position": "random", "target_rotation": "xyz"}, kwargs + { + "target_position": "random", + "target_rotation": "xyz", + }, + kwargs, + ), + max_episode_steps=100, + ) + + register( + id=f"HandManipulateEggFull{suffix}-v1", + entry_point="gym_robotics.envs:MujocoHandEggEnv", + kwargs=_merge( + { + "target_position": "random", + "target_rotation": "xyz", + }, + kwargs, ), max_episode_steps=100, ) @@ -258,16 +576,47 @@ def _merge(a, b): # Alias for "Full" register( id=f"HandManipulateEgg{suffix}-v0", - entry_point="gym_robotics.envs:HandEggEnv", + entry_point="gym_robotics.envs:MujocoPyHandEggEnv", + kwargs=_merge( + { + "target_position": "random", + "target_rotation": "xyz", + }, + kwargs, + ), + max_episode_steps=100, + ) + + register( + id=f"HandManipulateEgg{suffix}-v1", + entry_point="gym_robotics.envs:MujocoHandEggEnv", + kwargs=_merge( + { + "target_position": "random", + "target_rotation": "xyz", + }, + kwargs, + ), + max_episode_steps=100, + ) + + register( + id=f"HandManipulateEgg_BooleanTouchSensors{suffix}-v0", + entry_point="gym_robotics.envs:MujocoPyHandEggTouchSensorsEnv", kwargs=_merge( - {"target_position": "random", "target_rotation": "xyz"}, kwargs + { + "target_position": "random", + "target_rotation": "xyz", + "touch_get_obs": "boolean", + }, + kwargs, ), max_episode_steps=100, ) register( - id=f"HandManipulateEggTouchSensors{suffix}-v0", - entry_point="gym_robotics.envs:HandEggTouchSensorsEnv", + id=f"HandManipulateEgg_BooleanTouchSensors{suffix}-v1", + entry_point="gym_robotics.envs:MujocoHandEggTouchSensorsEnv", kwargs=_merge( { "target_position": "random", @@ -280,8 +629,22 @@ def _merge(a, b): ) register( - id=f"HandManipulateEggTouchSensors{suffix}-v1", - entry_point="gym_robotics.envs:HandEggTouchSensorsEnv", + id=f"HandManipulateEgg_ContinuousTouchSensors{suffix}-v0", + entry_point="gym_robotics.envs:MujocoPyHandEggTouchSensorsEnv", + kwargs=_merge( + { + "target_position": "random", + "target_rotation": "xyz", + "touch_get_obs": "sensordata", + }, + kwargs, + ), + max_episode_steps=100, + ) + + register( + id=f"HandManipulateEgg_ContinuousTouchSensors{suffix}-v1", + entry_point="gym_robotics.envs:MujocoHandEggTouchSensorsEnv", kwargs=_merge( { "target_position": "random", @@ -295,16 +658,47 @@ def _merge(a, b): register( id=f"HandManipulatePenRotate{suffix}-v0", - entry_point="gym_robotics.envs:HandPenEnv", + entry_point="gym_robotics.envs:MujocoPyHandPenEnv", + kwargs=_merge( + { + "target_position": "ignore", + "target_rotation": "xyz", + }, + kwargs, + ), + max_episode_steps=100, + ) + + register( + id=f"HandManipulatePenRotate{suffix}-v1", + entry_point="gym_robotics.envs:MujocoHandPenEnv", + kwargs=_merge( + { + "target_position": "ignore", + "target_rotation": "xyz", + }, + kwargs, + ), + max_episode_steps=100, + ) + + register( + id=f"HandManipulatePenRotate_BooleanTouchSensors{suffix}-v0", + entry_point="gym_robotics.envs:MujocoPyHandPenTouchSensorsEnv", kwargs=_merge( - {"target_position": "ignore", "target_rotation": "xyz"}, kwargs + { + "target_position": "ignore", + "target_rotation": "xyz", + "touch_get_obs": "boolean", + }, + kwargs, ), max_episode_steps=100, ) register( - id=f"HandManipulatePenRotateTouchSensors{suffix}-v0", - entry_point="gym_robotics.envs:HandPenTouchSensorsEnv", + id=f"HandManipulatePenRotate_BooleanTouchSensors{suffix}-v1", + entry_point="gym_robotics.envs:MujocoHandPenTouchSensorsEnv", kwargs=_merge( { "target_position": "ignore", @@ -317,8 +711,22 @@ def _merge(a, b): ) register( - id=f"HandManipulatePenRotateTouchSensors{suffix}-v1", - entry_point="gym_robotics.envs:HandPenTouchSensorsEnv", + id=f"HandManipulatePenRotate_ContinuousTouchSensors{suffix}-v0", + entry_point="gym_robotics.envs:MujocoPyHandPenTouchSensorsEnv", + kwargs=_merge( + { + "target_position": "ignore", + "target_rotation": "xyz", + "touch_get_obs": "sensordata", + }, + kwargs, + ), + max_episode_steps=100, + ) + + register( + id=f"HandManipulatePenRotate_ContinuousTouchSensors{suffix}-v1", + entry_point="gym_robotics.envs:MujocoHandPenTouchSensorsEnv", kwargs=_merge( { "target_position": "ignore", @@ -332,26 +740,59 @@ def _merge(a, b): register( id=f"HandManipulatePenFull{suffix}-v0", - entry_point="gym_robotics.envs:HandPenEnv", + entry_point="gym_robotics.envs:MujocoPyHandPenEnv", kwargs=_merge( - {"target_position": "random", "target_rotation": "xyz"}, kwargs + { + "target_position": "random", + "target_rotation": "xyz", + }, + kwargs, + ), + max_episode_steps=100, + ) + + register( + id=f"HandManipulatePenFull{suffix}-v1", + entry_point="gym_robotics.envs:MujocoHandPenEnv", + kwargs=_merge( + { + "target_position": "random", + "target_rotation": "xyz", + }, + kwargs, ), max_episode_steps=100, ) - # Alias for "Full" register( id=f"HandManipulatePen{suffix}-v0", - entry_point="gym_robotics.envs:HandPenEnv", + entry_point="gym_robotics.envs:MujocoPyHandPenEnv", + kwargs=_merge( + { + "target_position": "random", + "target_rotation": "xyz", + }, + kwargs, + ), + max_episode_steps=100, + ) + + register( + id=f"HandManipulatePen{suffix}-v1", + entry_point="gym_robotics.envs:MujocoHandPenEnv", kwargs=_merge( - {"target_position": "random", "target_rotation": "xyz"}, kwargs + { + "target_position": "random", + "target_rotation": "xyz", + }, + kwargs, ), max_episode_steps=100, ) register( - id=f"HandManipulatePenTouchSensors{suffix}-v0", - entry_point="gym_robotics.envs:HandPenTouchSensorsEnv", + id=f"HandManipulatePen_BooleanTouchSensors{suffix}-v0", + entry_point="gym_robotics.envs:MujocoPyHandPenTouchSensorsEnv", kwargs=_merge( { "target_position": "random", @@ -364,8 +805,22 @@ def _merge(a, b): ) register( - id=f"HandManipulatePenTouchSensors{suffix}-v1", - entry_point="gym_robotics.envs:HandPenTouchSensorsEnv", + id=f"HandManipulatePen_BooleanTouchSensors{suffix}-v1", + entry_point="gym_robotics.envs:MujocoHandPenTouchSensorsEnv", + kwargs=_merge( + { + "target_position": "random", + "target_rotation": "xyz", + "touch_get_obs": "boolean", + }, + kwargs, + ), + max_episode_steps=100, + ) + + register( + id=f"HandManipulatePen_ContinuousTouchSensors{suffix}-v0", + entry_point="gym_robotics.envs:MujocoPyHandPenTouchSensorsEnv", kwargs=_merge( { "target_position": "random", @@ -377,7 +832,21 @@ def _merge(a, b): max_episode_steps=100, ) + register( + id=f"HandManipulatePen_ContinuousTouchSensors{suffix}-v1", + entry_point="gym_robotics.envs:MujocoHandPenTouchSensorsEnv", + kwargs=_merge( + { + "target_position": "random", + "target_rotation": "xyz", + "touch_get_obs": "sensordata", + }, + kwargs, + ), + max_episode_steps=100, + ) -from . import _version + +from . import _version # noqa __version__ = _version.get_versions()["version"] diff --git a/gym_robotics/_version.py b/gym_robotics/_version.py index cdc5ce83..e04291f8 100644 --- a/gym_robotics/_version.py +++ b/gym_robotics/_version.py @@ -4,12 +4,14 @@ # directories (produced by setup.py build) will contain a much shorter file # that just contains the computed version number. -# This file is released into the public domain. Generated by -# versioneer-0.21 (https://github.com/python-versioneer/python-versioneer) +# This file is released into the public domain. +# Generated by versioneer-0.26 +# https://github.com/python-versioneer/python-versioneer """Git implementation of _version.py.""" import errno +import functools import os import re import subprocess @@ -73,6 +75,14 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env= """Call the given command(s).""" assert isinstance(commands, list) process = None + + popen_kwargs = {} + if sys.platform == "win32": + # This hides the console window if pythonw.exe is used + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + popen_kwargs["startupinfo"] = startupinfo + for command in commands: try: dispcmd = str([command] + args) @@ -83,6 +93,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env= env=env, stdout=subprocess.PIPE, stderr=(subprocess.PIPE if hide_stderr else None), + **popen_kwargs, ) break except OSError: @@ -95,7 +106,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env= return None, None else: if verbose: - print("unable to find command, tried %s" % (commands,)) + print(f"unable to find command, tried {commands}") return None, None stdout = process.communicate()[0].strip().decode() if process.returncode != 0: @@ -145,7 +156,7 @@ def git_get_keywords(versionfile_abs): # _version.py. keywords = {} try: - with open(versionfile_abs, "r") as fobj: + with open(versionfile_abs) as fobj: for line in fobj: if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) @@ -244,12 +255,17 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): version string, meaning we're inside a checked out source tree. """ GITS = ["git"] - TAG_PREFIX_REGEX = "*" if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] - TAG_PREFIX_REGEX = r"\*" - _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) + # GIT_DIR can interfere with correct operation of Versioneer. + # It may be intended to be passed to the Versioneer-versioned project, + # but that should not change where we get our version from. + env = os.environ.copy() + env.pop("GIT_DIR", None) + runner = functools.partial(runner, env=env) + + _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=not verbose) if rc != 0: if verbose: print("Directory %s not under git control" % root) @@ -266,7 +282,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): "--always", "--long", "--match", - "%s%s" % (tag_prefix, TAG_PREFIX_REGEX), + f"{tag_prefix}[[:digit:]]*", ], cwd=root, ) @@ -342,7 +358,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( + pieces["error"] = "tag '{}' doesn't start with prefix '{}'".format( full_tag, tag_prefix, ) @@ -358,8 +374,8 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): else: # HEX: no tags pieces["closest-tag"] = None - count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root) - pieces["distance"] = int(count_out) # total number of commits + out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) + pieces["distance"] = len(out.split()) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() diff --git a/gym_robotics/core.py b/gym_robotics/core.py index 861033ca..7aad66dc 100644 --- a/gym_robotics/core.py +++ b/gym_robotics/core.py @@ -6,15 +6,19 @@ class GoalEnv(gym.Env): - """A goal-based environment. It functions just as any regular OpenAI Gym environment but it + r"""A goal-based environment. It functions just as any regular Gym environment but it imposes a required structure on the observation_space. More concretely, the observation space is required to contain at least three elements, namely `observation`, `desired_goal`, and `achieved_goal`. Here, `desired_goal` specifies the goal that the agent should attempt to achieve. `achieved_goal` is the goal that it currently achieved instead. `observation` contains the actual observations of the environment as per usual. + + - :meth:`compute_reward` - Externalizes the reward function by taking the achieved and desired goal, as well as extra information. Returns reward. + - :meth:`compute_terminated` - Returns boolean termination depending on the achieved and desired goal, as well as extra information. + - :meth:`compute_truncated` - Returns boolean truncation depending on the achieved and desired goal, as well as extra information. """ - def reset(self, options=None, seed: Optional[int] = None, infos=None): + def reset(self, seed: Optional[int] = None): super().reset(seed=seed) # Enforce that each GoalEnv uses a Goal-compatible observation space. if not isinstance(self.observation_space, gym.spaces.Dict): @@ -45,7 +49,53 @@ def compute_reward(self, achieved_goal, desired_goal, info): float: The reward that corresponds to the provided achieved goal w.r.t. to the desired goal. Note that the following should always hold true: - ob, reward, done, info = env.step() + ob, reward, terminated, truncated, info = env.step() assert reward == env.compute_reward(ob['achieved_goal'], ob['desired_goal'], info) """ raise NotImplementedError + + @abstractmethod + def compute_terminated(self, achieved_goal, desired_goal, info): + """Compute the step termination. Allows to customize the termination states depending on the + desired and the achieved goal. If you wish to determine termination states independent of the goal, + you can include necessary values to derive it in 'info' and compute it accordingly. The envirtonment reaches + a termination state when this state leads to an episode ending in an episodic task thus breaking . + More information can be found in: https://farama.org/New-Step-API#theory + + Termination states are + + Args: + achieved_goal (object): the goal that was achieved during execution + desired_goal (object): the desired goal that we asked the agent to attempt to achieve + info (dict): an info dictionary with additional information + + Returns: + bool: The termination state that corresponds to the provided achieved goal w.r.t. to the desired + goal. Note that the following should always hold true: + + ob, reward, terminated, truncated, info = env.step() + assert terminated == env.compute_terminated(ob['achieved_goal'], ob['desired_goal'], info) + """ + raise NotImplementedError + + @abstractmethod + def compute_truncated(self, achieved_goal, desired_goal, info): + """Compute the step truncation. Allows to customize the truncated states depending on the + desired and the achieved goal. If you wish to determine truncated states independent of the goal, + you can include necessary values to derive it in 'info' and compute it accordingly. Truncated states + are those that are out of the scope of the Markov Decision Process (MDP) such as time constraints in a + continuing task. More information can be found in: https://farama.org/New-Step-API#theory + + Args: + achieved_goal (object): the goal that was achieved during execution + desired_goal (object): the desired goal that we asked the agent to attempt to achieve + info (dict): an info dictionary with additional information + + Returns: + bool: The truncated state that corresponds to the provided achieved goal w.r.t. to the desired + goal. Note that the following should always hold true: + + ob, reward, terminated, truncated, info = env.step() + assert truncated == env.compute_truncated(ob['achieved_goal'], ob['desired_goal'], info) + """ + raise NotImplementedError diff --git a/gym_robotics/envs/__init__.py b/gym_robotics/envs/__init__.py index 0936bac0..fbf37785 100644 --- a/gym_robotics/envs/__init__.py +++ b/gym_robotics/envs/__init__.py @@ -1,14 +1,24 @@ -from gym_robotics.envs.fetch_env import FetchEnv -from gym_robotics.envs.fetch.slide import FetchSlideEnv -from gym_robotics.envs.fetch.pick_and_place import FetchPickAndPlaceEnv -from gym_robotics.envs.fetch.push import FetchPushEnv -from gym_robotics.envs.fetch.reach import FetchReachEnv - -from gym_robotics.envs.hand.reach import HandReachEnv -from gym_robotics.envs.hand.manipulate import HandBlockEnv -from gym_robotics.envs.hand.manipulate import HandEggEnv -from gym_robotics.envs.hand.manipulate import HandPenEnv - -from gym_robotics.envs.hand.manipulate_touch_sensors import HandBlockTouchSensorsEnv -from gym_robotics.envs.hand.manipulate_touch_sensors import HandEggTouchSensorsEnv -from gym_robotics.envs.hand.manipulate_touch_sensors import HandPenTouchSensorsEnv +from gym_robotics.envs.fetch.pick_and_place import ( + MujocoFetchPickAndPlaceEnv, + MujocoPyFetchPickAndPlaceEnv, +) +from gym_robotics.envs.fetch.push import MujocoFetchPushEnv, MujocoPyFetchPushEnv +from gym_robotics.envs.fetch.reach import MujocoFetchReachEnv, MujocoPyFetchReachEnv +from gym_robotics.envs.fetch.slide import MujocoFetchSlideEnv, MujocoPyFetchSlideEnv +from gym_robotics.envs.hand.manipulate import ( + MujocoHandBlockEnv, + MujocoHandEggEnv, + MujocoHandPenEnv, + MujocoPyHandBlockEnv, + MujocoPyHandEggEnv, + MujocoPyHandPenEnv, +) +from gym_robotics.envs.hand.manipulate_touch_sensors import ( + MujocoHandBlockTouchSensorsEnv, + MujocoHandEggTouchSensorsEnv, + MujocoHandPenTouchSensorsEnv, + MujocoPyHandBlockTouchSensorsEnv, + MujocoPyHandEggTouchSensorsEnv, + MujocoPyHandPenTouchSensorsEnv, +) +from gym_robotics.envs.hand.reach import MujocoHandReachEnv, MujocoPyHandReachEnv diff --git a/gym_robotics/envs/assets/fetch/reach.xml b/gym_robotics/envs/assets/fetch/reach.xml index c73d6249..2ca621e4 100644 --- a/gym_robotics/envs/assets/fetch/reach.xml +++ b/gym_robotics/envs/assets/fetch/reach.xml @@ -21,6 +21,4 @@ - - diff --git a/gym_robotics/envs/assets/fetch/robot.xml b/gym_robotics/envs/assets/fetch/robot.xml index b627d492..b396ab5d 100644 --- a/gym_robotics/envs/assets/fetch/robot.xml +++ b/gym_robotics/envs/assets/fetch/robot.xml @@ -13,15 +13,15 @@ - + - + - + @@ -43,11 +43,11 @@ - + - + @@ -55,7 +55,7 @@ - + @@ -63,7 +63,7 @@ - + diff --git a/gym_robotics/envs/assets/hand/reach.xml b/gym_robotics/envs/assets/hand/reach.xml index 71f6dfe6..ddc87817 100644 --- a/gym_robotics/envs/assets/hand/reach.xml +++ b/gym_robotics/envs/assets/hand/reach.xml @@ -14,17 +14,17 @@ - - - - - + + + + + - - - - - + + + + + diff --git a/gym_robotics/envs/fetch/pick_and_place.py b/gym_robotics/envs/fetch/pick_and_place.py index a6cdcbed..7b8fe857 100644 --- a/gym_robotics/envs/fetch/pick_and_place.py +++ b/gym_robotics/envs/fetch/pick_and_place.py @@ -1,23 +1,50 @@ import os -from gym import utils -from gym_robotics.envs import fetch_env +from gym.utils.ezpickle import EzPickle + +from gym_robotics.envs.fetch_env import MujocoFetchEnv, MujocoPyFetchEnv -# Ensure we get the path separator correct on windows MODEL_XML_PATH = os.path.join("fetch", "pick_and_place.xml") -class FetchPickAndPlaceEnv(fetch_env.FetchEnv, utils.EzPickle): - def __init__(self, reward_type="sparse"): +class MujocoFetchPickAndPlaceEnv(MujocoFetchEnv, EzPickle): + def __init__(self, reward_type="sparse", **kwargs): + initial_qpos = { + "robot0:slide0": 0.405, + "robot0:slide1": 0.48, + "robot0:slide2": 0.0, + "object0:joint": [1.25, 0.53, 0.4, 1.0, 0.0, 0.0, 0.0], + } + MujocoFetchEnv.__init__( + self, + model_path=MODEL_XML_PATH, + has_object=True, + block_gripper=False, + n_substeps=20, + gripper_extra_height=0.2, + target_in_the_air=True, + target_offset=0.0, + obj_range=0.15, + target_range=0.15, + distance_threshold=0.05, + initial_qpos=initial_qpos, + reward_type=reward_type, + **kwargs, + ) + EzPickle.__init__(self, reward_type=reward_type, **kwargs) + + +class MujocoPyFetchPickAndPlaceEnv(MujocoPyFetchEnv, EzPickle): + def __init__(self, reward_type="sparse", **kwargs): initial_qpos = { "robot0:slide0": 0.405, "robot0:slide1": 0.48, "robot0:slide2": 0.0, "object0:joint": [1.25, 0.53, 0.4, 1.0, 0.0, 0.0, 0.0], } - fetch_env.FetchEnv.__init__( + MujocoPyFetchEnv.__init__( self, - MODEL_XML_PATH, + model_path=MODEL_XML_PATH, has_object=True, block_gripper=False, n_substeps=20, @@ -29,5 +56,6 @@ def __init__(self, reward_type="sparse"): distance_threshold=0.05, initial_qpos=initial_qpos, reward_type=reward_type, + **kwargs, ) - utils.EzPickle.__init__(self, reward_type=reward_type) + EzPickle.__init__(self, reward_type=reward_type, **kwargs) diff --git a/gym_robotics/envs/fetch/push.py b/gym_robotics/envs/fetch/push.py index 44d6d711..8188fb21 100644 --- a/gym_robotics/envs/fetch/push.py +++ b/gym_robotics/envs/fetch/push.py @@ -1,23 +1,51 @@ import os -from gym import utils -from gym_robotics.envs import fetch_env +from gym.utils.ezpickle import EzPickle + +from gym_robotics.envs.fetch_env import MujocoFetchEnv, MujocoPyFetchEnv # Ensure we get the path separator correct on windows MODEL_XML_PATH = os.path.join("fetch", "push.xml") -class FetchPushEnv(fetch_env.FetchEnv, utils.EzPickle): - def __init__(self, reward_type="sparse"): +class MujocoPyFetchPushEnv(MujocoPyFetchEnv, EzPickle): + def __init__(self, reward_type="sparse", **kwargs): + initial_qpos = { + "robot0:slide0": 0.405, + "robot0:slide1": 0.48, + "robot0:slide2": 0.0, + "object0:joint": [1.25, 0.53, 0.4, 1.0, 0.0, 0.0, 0.0], + } + MujocoPyFetchEnv.__init__( + self, + model_path=MODEL_XML_PATH, + has_object=True, + block_gripper=True, + n_substeps=20, + gripper_extra_height=0.0, + target_in_the_air=False, + target_offset=0.0, + obj_range=0.15, + target_range=0.15, + distance_threshold=0.05, + initial_qpos=initial_qpos, + reward_type=reward_type, + **kwargs, + ) + EzPickle.__init__(self, reward_type=reward_type, **kwargs) + + +class MujocoFetchPushEnv(MujocoFetchEnv, EzPickle): + def __init__(self, reward_type="sparse", **kwargs): initial_qpos = { "robot0:slide0": 0.405, "robot0:slide1": 0.48, "robot0:slide2": 0.0, "object0:joint": [1.25, 0.53, 0.4, 1.0, 0.0, 0.0, 0.0], } - fetch_env.FetchEnv.__init__( + MujocoFetchEnv.__init__( self, - MODEL_XML_PATH, + model_path=MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20, @@ -29,5 +57,6 @@ def __init__(self, reward_type="sparse"): distance_threshold=0.05, initial_qpos=initial_qpos, reward_type=reward_type, + **kwargs, ) - utils.EzPickle.__init__(self, reward_type=reward_type) + EzPickle.__init__(self, reward_type=reward_type, **kwargs) diff --git a/gym_robotics/envs/fetch/reach.py b/gym_robotics/envs/fetch/reach.py index d905d4fe..3f728e86 100644 --- a/gym_robotics/envs/fetch/reach.py +++ b/gym_robotics/envs/fetch/reach.py @@ -1,22 +1,49 @@ import os -from gym import utils -from gym_robotics.envs import fetch_env +from gym.utils.ezpickle import EzPickle + +from gym_robotics.envs.fetch_env import MujocoFetchEnv, MujocoPyFetchEnv # Ensure we get the path separator correct on windows MODEL_XML_PATH = os.path.join("fetch", "reach.xml") -class FetchReachEnv(fetch_env.FetchEnv, utils.EzPickle): - def __init__(self, reward_type="sparse"): +class MujocoPyFetchReachEnv(MujocoPyFetchEnv, EzPickle): + def __init__(self, reward_type: str = "sparse", **kwargs): + initial_qpos = { + "robot0:slide0": 0.4049, + "robot0:slide1": 0.48, + "robot0:slide2": 0.0, + } + MujocoPyFetchEnv.__init__( + self, + model_path=MODEL_XML_PATH, + has_object=False, + block_gripper=True, + n_substeps=20, + gripper_extra_height=0.2, + target_in_the_air=True, + target_offset=0.0, + obj_range=0.15, + target_range=0.15, + distance_threshold=0.05, + initial_qpos=initial_qpos, + reward_type=reward_type, + **kwargs, + ) + EzPickle.__init__(self, reward_type=reward_type, **kwargs) + + +class MujocoFetchReachEnv(MujocoFetchEnv, EzPickle): + def __init__(self, reward_type: str = "sparse", **kwargs): initial_qpos = { "robot0:slide0": 0.4049, "robot0:slide1": 0.48, "robot0:slide2": 0.0, } - fetch_env.FetchEnv.__init__( + MujocoFetchEnv.__init__( self, - MODEL_XML_PATH, + model_path=MODEL_XML_PATH, has_object=False, block_gripper=True, n_substeps=20, @@ -28,5 +55,6 @@ def __init__(self, reward_type="sparse"): distance_threshold=0.05, initial_qpos=initial_qpos, reward_type=reward_type, + **kwargs, ) - utils.EzPickle.__init__(self, reward_type=reward_type) + EzPickle.__init__(self, reward_type=reward_type, **kwargs) diff --git a/gym_robotics/envs/fetch/slide.py b/gym_robotics/envs/fetch/slide.py index 1326fd68..889b15a3 100644 --- a/gym_robotics/envs/fetch/slide.py +++ b/gym_robotics/envs/fetch/slide.py @@ -1,25 +1,52 @@ import os -import numpy as np -from gym import utils -from gym_robotics.envs import fetch_env +import numpy as np +from gym.utils.ezpickle import EzPickle +from gym_robotics.envs.fetch_env import MujocoFetchEnv, MujocoPyFetchEnv # Ensure we get the path separator correct on windows MODEL_XML_PATH = os.path.join("fetch", "slide.xml") -class FetchSlideEnv(fetch_env.FetchEnv, utils.EzPickle): - def __init__(self, reward_type="sparse"): +class MujocoPyFetchSlideEnv(MujocoPyFetchEnv, EzPickle): + def __init__(self, reward_type="sparse", **kwargs): + initial_qpos = { + "robot0:slide0": 0.05, + "robot0:slide1": 0.48, + "robot0:slide2": 0.0, + "object0:joint": [1.7, 1.1, 0.41, 1.0, 0.0, 0.0, 0.0], + } + MujocoPyFetchEnv.__init__( + self, + model_path=MODEL_XML_PATH, + has_object=True, + block_gripper=True, + n_substeps=20, + gripper_extra_height=-0.02, + target_in_the_air=False, + target_offset=np.array([0.4, 0.0, 0.0]), + obj_range=0.1, + target_range=0.3, + distance_threshold=0.05, + initial_qpos=initial_qpos, + reward_type=reward_type, + **kwargs, + ) + EzPickle.__init__(self, reward_type=reward_type, **kwargs) + + +class MujocoFetchSlideEnv(MujocoFetchEnv, EzPickle): + def __init__(self, reward_type="sparse", **kwargs): initial_qpos = { "robot0:slide0": 0.05, "robot0:slide1": 0.48, "robot0:slide2": 0.0, "object0:joint": [1.7, 1.1, 0.41, 1.0, 0.0, 0.0, 0.0], } - fetch_env.FetchEnv.__init__( + MujocoFetchEnv.__init__( self, - MODEL_XML_PATH, + model_path=MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20, @@ -31,5 +58,6 @@ def __init__(self, reward_type="sparse"): distance_threshold=0.05, initial_qpos=initial_qpos, reward_type=reward_type, + **kwargs, ) - utils.EzPickle.__init__(self, reward_type=reward_type) + EzPickle.__init__(self, reward_type=reward_type, **kwargs) diff --git a/gym_robotics/envs/fetch_env.py b/gym_robotics/envs/fetch_env.py index 049114d7..4f09db64 100644 --- a/gym_robotics/envs/fetch_env.py +++ b/gym_robotics/envs/fetch_env.py @@ -1,6 +1,9 @@ +from typing import Union + import numpy as np -from gym_robotics.envs import rotations, robot_env, utils +from gym_robotics.envs.robot_env import MujocoPyRobotEnv, MujocoRobotEnv +from gym_robotics.utils import rotations def goal_distance(goal_a, goal_b): @@ -8,71 +11,169 @@ def goal_distance(goal_a, goal_b): return np.linalg.norm(goal_a - goal_b, axis=-1) -class FetchEnv(robot_env.RobotEnv): - """Superclass for all Fetch environments.""" - - def __init__( - self, - model_path, - n_substeps, - gripper_extra_height, - block_gripper, - has_object, - target_in_the_air, - target_offset, - obj_range, - target_range, - distance_threshold, - initial_qpos, - reward_type, - ): - """Initializes a new Fetch environment. - - Args: - model_path (string): path to the environments XML file - n_substeps (int): number of substeps the simulation runs on every call to step - gripper_extra_height (float): additional height above the table when positioning the gripper - block_gripper (boolean): whether or not the gripper is blocked (i.e. not movable) or not - has_object (boolean): whether or not the environment has an object - target_in_the_air (boolean): whether or not the target should be in the air above the table or on the table surface - target_offset (float or array with 3 elements): offset of the target - obj_range (float): range of a uniform distribution for sampling initial object positions - target_range (float): range of a uniform distribution for sampling a target - distance_threshold (float): the threshold after which a goal is considered achieved - initial_qpos (dict): a dictionary of joint names and values that define the initial configuration - reward_type ('sparse' or 'dense'): the reward type, i.e. sparse or dense - """ - self.gripper_extra_height = gripper_extra_height - self.block_gripper = block_gripper - self.has_object = has_object - self.target_in_the_air = target_in_the_air - self.target_offset = target_offset - self.obj_range = obj_range - self.target_range = target_range - self.distance_threshold = distance_threshold - self.reward_type = reward_type - - super().__init__( - model_path=model_path, - n_substeps=n_substeps, - n_actions=4, - initial_qpos=initial_qpos, - ) +def get_base_fetch_env(RobotEnvClass: Union[MujocoPyRobotEnv, MujocoRobotEnv]): + """Factory function that returns a BaseFetchEnv class that inherits + from MujocoPyRobotEnv or MujocoRobotEnv depending on the mujoco python bindings. + """ - # GoalEnv methods - # ---------------------------- + class BaseFetchEnv(RobotEnvClass): + """Superclass for all Fetch environments.""" - def compute_reward(self, achieved_goal, goal, info): - # Compute distance between goal and the achieved goal. - d = goal_distance(achieved_goal, goal) - if self.reward_type == "sparse": - return -(d > self.distance_threshold).astype(np.float32) - else: - return -d + def __init__( + self, + gripper_extra_height, + block_gripper, + has_object: bool, + target_in_the_air, + target_offset, + obj_range, + target_range, + distance_threshold, + reward_type, + **kwargs + ): + """Initializes a new Fetch environment. + + Args: + model_path (string): path to the environments XML file + n_substeps (int): number of substeps the simulation runs on every call to step + gripper_extra_height (float): additional height above the table when positioning the gripper + block_gripper (boolean): whether or not the gripper is blocked (i.e. not movable) or not + has_object (boolean): whether or not the environment has an object + target_in_the_air (boolean): whether or not the target should be in the air above the table or on the table surface + target_offset (float or array with 3 elements): offset of the target + obj_range (float): range of a uniform distribution for sampling initial object positions + target_range (float): range of a uniform distribution for sampling a target + distance_threshold (float): the threshold after which a goal is considered achieved + initial_qpos (dict): a dictionary of joint names and values that define the initial configuration + reward_type ('sparse' or 'dense'): the reward type, i.e. sparse or dense + """ + + self.gripper_extra_height = gripper_extra_height + self.block_gripper = block_gripper + self.has_object = has_object + self.target_in_the_air = target_in_the_air + self.target_offset = target_offset + self.obj_range = obj_range + self.target_range = target_range + self.distance_threshold = distance_threshold + self.reward_type = reward_type + + super().__init__(n_actions=4, **kwargs) + + # GoalEnv methods + # ---------------------------- + + def compute_reward(self, achieved_goal, goal, info): + # Compute distance between goal and the achieved goal. + d = goal_distance(achieved_goal, goal) + if self.reward_type == "sparse": + return -(d > self.distance_threshold).astype(np.float32) + else: + return -d + + # RobotEnv methods + # ---------------------------- + + def _set_action(self, action): + assert action.shape == (4,) + action = ( + action.copy() + ) # ensure that we don't change the action outside of this scope + pos_ctrl, gripper_ctrl = action[:3], action[3] + + pos_ctrl *= 0.05 # limit maximum change in position + rot_ctrl = [ + 1.0, + 0.0, + 1.0, + 0.0, + ] # fixed rotation of the end effector, expressed as a quaternion + gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl]) + assert gripper_ctrl.shape == (2,) + if self.block_gripper: + gripper_ctrl = np.zeros_like(gripper_ctrl) + action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) + + return action + + def _get_obs(self): + ( + grip_pos, + object_pos, + object_rel_pos, + gripper_state, + object_rot, + object_velp, + object_velr, + grip_velp, + gripper_vel, + ) = self.generate_mujoco_observations() + + if not self.has_object: + achieved_goal = grip_pos.copy() + else: + achieved_goal = np.squeeze(object_pos.copy()) + + obs = np.concatenate( + [ + grip_pos, + object_pos.ravel(), + object_rel_pos.ravel(), + gripper_state, + object_rot.ravel(), + object_velp.ravel(), + object_velr.ravel(), + grip_velp, + gripper_vel, + ] + ) + + return { + "observation": obs.copy(), + "achieved_goal": achieved_goal.copy(), + "desired_goal": self.goal.copy(), + } + + def generate_mujoco_observations(self): + + raise NotImplementedError + + def get_gripper_xpos(self): + + raise NotImplementedError + + def _viewer_setup(self): + lookat = self.get_gripper_xpos() + for idx, value in enumerate(lookat): + self.viewer.cam.lookat[idx] = value + self.viewer.cam.distance = 2.5 + self.viewer.cam.azimuth = 132.0 + self.viewer.cam.elevation = -14.0 + + def _sample_goal(self): + if self.has_object: + goal = self.initial_gripper_xpos[:3] + self.np_random.uniform( + -self.target_range, self.target_range, size=3 + ) + goal += self.target_offset + goal[2] = self.height_offset + if self.target_in_the_air and self.np_random.uniform() < 0.5: + goal[2] += self.np_random.uniform(0, 0.45) + else: + goal = self.initial_gripper_xpos[:3] + self.np_random.uniform( + -self.target_range, self.target_range, size=3 + ) + return goal.copy() - # RobotEnv methods - # ---------------------------- + def _is_success(self, achieved_goal, desired_goal): + d = goal_distance(achieved_goal, desired_goal) + return (d < self.distance_threshold).astype(np.float32) + return BaseFetchEnv + + +class MujocoPyFetchEnv(get_base_fetch_env(MujocoPyRobotEnv)): def _step_callback(self): if self.block_gripper: self.sim.data.set_joint_qpos("robot0:l_gripper_finger_joint", 0.0) @@ -80,35 +181,20 @@ def _step_callback(self): self.sim.forward() def _set_action(self, action): - assert action.shape == (4,) - action = ( - action.copy() - ) # ensure that we don't change the action outside of this scope - pos_ctrl, gripper_ctrl = action[:3], action[3] - - pos_ctrl *= 0.05 # limit maximum change in position - rot_ctrl = [ - 1.0, - 0.0, - 1.0, - 0.0, - ] # fixed rotation of the end effector, expressed as a quaternion - gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl]) - assert gripper_ctrl.shape == (2,) - if self.block_gripper: - gripper_ctrl = np.zeros_like(gripper_ctrl) - action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) + action = super()._set_action(action) # Apply action to simulation. - utils.ctrl_set_action(self.sim, action) - utils.mocap_set_action(self.sim, action) + self._utils.ctrl_set_action(self.sim, action) + self._utils.mocap_set_action(self.sim, action) - def _get_obs(self): + def generate_mujoco_observations(self): # positions grip_pos = self.sim.data.get_site_xpos("robot0:grip") + dt = self.sim.nsubsteps * self.sim.model.opt.timestep grip_velp = self.sim.data.get_site_xvelp("robot0:grip") * dt - robot_qpos, robot_qvel = utils.robot_get_obs(self.sim) + + robot_qpos, robot_qvel = self._utils.robot_get_obs(self.sim) if self.has_object: object_pos = self.sim.data.get_site_xpos("object0") # rotations @@ -124,42 +210,26 @@ def _get_obs(self): object_rot ) = object_velp = object_velr = object_rel_pos = np.zeros(0) gripper_state = robot_qpos[-2:] + gripper_vel = ( robot_qvel[-2:] * dt ) # change to a scalar if the gripper is made symmetric - if not self.has_object: - achieved_goal = grip_pos.copy() - else: - achieved_goal = np.squeeze(object_pos.copy()) - obs = np.concatenate( - [ - grip_pos, - object_pos.ravel(), - object_rel_pos.ravel(), - gripper_state, - object_rot.ravel(), - object_velp.ravel(), - object_velr.ravel(), - grip_velp, - gripper_vel, - ] + return ( + grip_pos, + object_pos, + object_rel_pos, + gripper_state, + object_rot, + object_velp, + object_velr, + grip_velp, + gripper_vel, ) - return { - "observation": obs.copy(), - "achieved_goal": achieved_goal.copy(), - "desired_goal": self.goal.copy(), - } - - def _viewer_setup(self): + def get_gripper_xpos(self): body_id = self.sim.model.body_name2id("robot0:gripper_link") - lookat = self.sim.data.body_xpos[body_id] - for idx, value in enumerate(lookat): - self.viewer.cam.lookat[idx] = value - self.viewer.cam.distance = 2.5 - self.viewer.cam.azimuth = 132.0 - self.viewer.cam.elevation = -14.0 + return self.sim.data.body_xpos[body_id] def _render_callback(self): # Visualize target. @@ -186,29 +256,10 @@ def _reset_sim(self): self.sim.forward() return True - def _sample_goal(self): - if self.has_object: - goal = self.initial_gripper_xpos[:3] + self.np_random.uniform( - -self.target_range, self.target_range, size=3 - ) - goal += self.target_offset - goal[2] = self.height_offset - if self.target_in_the_air and self.np_random.uniform() < 0.5: - goal[2] += self.np_random.uniform(0, 0.45) - else: - goal = self.initial_gripper_xpos[:3] + self.np_random.uniform( - -self.target_range, self.target_range, size=3 - ) - return goal.copy() - - def _is_success(self, achieved_goal, desired_goal): - d = goal_distance(achieved_goal, desired_goal) - return (d < self.distance_threshold).astype(np.float32) - def _env_setup(self, initial_qpos): for name, value in initial_qpos.items(): self.sim.data.set_joint_qpos(name, value) - utils.reset_mocap_welds(self.sim) + self._utils.reset_mocap_welds(self.sim) self.sim.forward() # Move end effector into position. @@ -226,5 +277,136 @@ def _env_setup(self, initial_qpos): if self.has_object: self.height_offset = self.sim.data.get_site_xpos("object0")[2] - def render(self, mode="human", width=500, height=500): - return super().render(mode, width, height) + +class MujocoFetchEnv(get_base_fetch_env(MujocoRobotEnv)): + def _step_callback(self): + if self.block_gripper: + self._utils.set_joint_qpos( + self.model, self.data, "robot0:l_gripper_finger_joint", 0.0 + ) + self._utils.set_joint_qpos( + self.model, self.data, "robot0:r_gripper_finger_joint", 0.0 + ) + self._mujoco.mj_forward(self.model, self.data) + + def _set_action(self, action): + action = super()._set_action(action) + + # Apply action to simulation. + self._utils.ctrl_set_action(self.model, self.data, action) + self._utils.mocap_set_action(self.model, self.data, action) + + def generate_mujoco_observations(self): + # positions + grip_pos = self._utils.get_site_xpos(self.model, self.data, "robot0:grip") + + dt = self.n_substeps * self.model.opt.timestep + grip_velp = ( + self._utils.get_site_xvelp(self.model, self.data, "robot0:grip") * dt + ) + + robot_qpos, robot_qvel = self._utils.robot_get_obs( + self.model, self.data, self._model_names.joint_names + ) + if self.has_object: + object_pos = self._utils.get_site_xpos(self.model, self.data, "object0") + # rotations + object_rot = rotations.mat2euler( + self._utils.get_site_xmat(self.model, self.data, "object0") + ) + # velocities + object_velp = ( + self._utils.get_site_xvelp(self.model, self.data, "object0") * dt + ) + object_velr = ( + self._utils.get_site_xvelr(self.model, self.data, "object0") * dt + ) + # gripper state + object_rel_pos = object_pos - grip_pos + object_velp -= grip_velp + else: + object_pos = ( + object_rot + ) = object_velp = object_velr = object_rel_pos = np.zeros(0) + gripper_state = robot_qpos[-2:] + + gripper_vel = ( + robot_qvel[-2:] * dt + ) # change to a scalar if the gripper is made symmetric + + return ( + grip_pos, + object_pos, + object_rel_pos, + gripper_state, + object_rot, + object_velp, + object_velr, + grip_velp, + gripper_vel, + ) + + def get_gripper_xpos(self): + body_id = self._model_names.body_name2id["robot0:gripper_link"] + return self.data.xpos[body_id] + + def _render_callback(self): + # Visualize target. + sites_offset = (self.data.site_xpos - self.model.site_pos).copy() + site_id = self._mujoco.mj_name2id( + self.model, self._mujoco.mjtObj.mjOBJ_SITE, "target0" + ) + self.model.site_pos[site_id] = self.goal - sites_offset[0] + self._mujoco.mj_forward(self.model, self.data) + + def _reset_sim(self): + self.data.time = self.initial_time + self.data.qpos[:] = np.copy(self.initial_qpos) + self.data.qvel[:] = np.copy(self.initial_qvel) + if self.model.na != 0: + self.data.act[:] = None + + # Randomize start position of object. + if self.has_object: + object_xpos = self.initial_gripper_xpos[:2] + while np.linalg.norm(object_xpos - self.initial_gripper_xpos[:2]) < 0.1: + object_xpos = self.initial_gripper_xpos[:2] + self.np_random.uniform( + -self.obj_range, self.obj_range, size=2 + ) + object_qpos = self._utils.get_joint_qpos( + self.model, self.data, "object0:joint" + ) + assert object_qpos.shape == (7,) + object_qpos[:2] = object_xpos + self._utils.set_joint_qpos( + self.model, self.data, "object0:joint", object_qpos + ) + + self._mujoco.mj_forward(self.model, self.data) + return True + + def _env_setup(self, initial_qpos): + for name, value in initial_qpos.items(): + self._utils.set_joint_qpos(self.model, self.data, name, value) + self._utils.reset_mocap_welds(self.model, self.data) + self._mujoco.mj_forward(self.model, self.data) + + # Move end effector into position. + gripper_target = np.array( + [-0.498, 0.005, -0.431 + self.gripper_extra_height] + ) + self._utils.get_site_xpos(self.model, self.data, "robot0:grip") + gripper_rotation = np.array([1.0, 0.0, 1.0, 0.0]) + self._utils.set_mocap_pos(self.model, self.data, "robot0:mocap", gripper_target) + self._utils.set_mocap_quat( + self.model, self.data, "robot0:mocap", gripper_rotation + ) + for _ in range(10): + self._mujoco.mj_step(self.model, self.data, nstep=self.n_substeps) + # Extract information for sampling goals. + self.initial_gripper_xpos = self._utils.get_site_xpos( + self.model, self.data, "robot0:grip" + ).copy() + if self.has_object: + self.height_offset = self._utils.get_site_xpos( + self.model, self.data, "object0" + )[2] diff --git a/gym_robotics/envs/hand/manipulate.py b/gym_robotics/envs/hand/manipulate.py index fe9047fc..b73dc784 100644 --- a/gym_robotics/envs/hand/manipulate.py +++ b/gym_robotics/envs/hand/manipulate.py @@ -1,18 +1,12 @@ import os -import numpy as np +from typing import Union -from gym import utils, error -from gym_robotics.envs import rotations, hand_env -from gym_robotics.envs.utils import robot_get_obs +import numpy as np +from gym import error +from gym.utils.ezpickle import EzPickle -try: - import mujoco_py -except ImportError as e: - raise error.DependencyNotInstalled( - "{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format( - e - ) - ) +from gym_robotics.envs.hand_env import MujocoHandEnv, MujocoPyHandEnv +from gym_robotics.utils import rotations def quat_from_angle_and_axis(angle, axis): @@ -29,131 +23,314 @@ def quat_from_angle_and_axis(angle, axis): MANIPULATE_PEN_XML = os.path.join("hand", "manipulate_pen.xml") -class ManipulateEnv(hand_env.HandEnv): - def __init__( - self, - model_path, - target_position, - target_rotation, - target_position_range, - reward_type, - initial_qpos=None, - randomize_initial_position=True, - randomize_initial_rotation=True, - distance_threshold=0.01, - rotation_threshold=0.1, - n_substeps=20, - relative_control=False, - ignore_z_target_rotation=False, - ): - """Initializes a new Hand manipulation environment. - - Args: - model_path (string): path to the environments XML file - target_position (string): the type of target position: - - ignore: target position is fully ignored, i.e. the object can be positioned arbitrarily - - fixed: target position is set to the initial position of the object - - random: target position is fully randomized according to target_position_range - target_rotation (string): the type of target rotation: - - ignore: target rotation is fully ignored, i.e. the object can be rotated arbitrarily - - fixed: target rotation is set to the initial rotation of the object - - xyz: fully randomized target rotation around the X, Y and Z axis - - z: fully randomized target rotation around the Z axis - - parallel: fully randomized target rotation around Z and axis-aligned rotation around X, Y - ignore_z_target_rotation (boolean): whether or not the Z axis of the target rotation is ignored - target_position_range (np.array of shape (3, 2)): range of the target_position randomization - reward_type ('sparse' or 'dense'): the reward type, i.e. sparse or dense - initial_qpos (dict): a dictionary of joint names and values that define the initial configuration - randomize_initial_position (boolean): whether or not to randomize the initial position of the object - randomize_initial_rotation (boolean): whether or not to randomize the initial rotation of the object - distance_threshold (float, in meters): the threshold after which the position of a goal is considered achieved - rotation_threshold (float, in radians): the threshold after which the rotation of a goal is considered achieved - n_substeps (int): number of substeps the simulation runs on every call to step - relative_control (boolean): whether or not the hand is actuated in absolute joint positions or relative to the current state - """ - self.target_position = target_position - self.target_rotation = target_rotation - self.target_position_range = target_position_range - self.parallel_quats = [ - rotations.euler2quat(r) for r in rotations.get_parallel_rotations() - ] - self.randomize_initial_rotation = randomize_initial_rotation - self.randomize_initial_position = randomize_initial_position - self.distance_threshold = distance_threshold - self.rotation_threshold = rotation_threshold - self.reward_type = reward_type - self.ignore_z_target_rotation = ignore_z_target_rotation - - assert self.target_position in ["ignore", "fixed", "random"] - assert self.target_rotation in ["ignore", "fixed", "xyz", "z", "parallel"] - initial_qpos = initial_qpos or {} - - hand_env.HandEnv.__init__( +def get_base_manipulate_env(HandEnvClass: Union[MujocoHandEnv, MujocoPyHandEnv]): + """Factory function that returns a BaseManipulateEnv class that inherits + from MujocoPyHandEnv or MujocoHandEnv depending on the mujoco python bindings. + """ + + class BaseManipulateEnv(HandEnvClass): + def __init__( self, - model_path, - n_substeps=n_substeps, - initial_qpos=initial_qpos, - relative_control=relative_control, - ) + target_position, + target_rotation, + target_position_range, + reward_type, + initial_qpos=None, + randomize_initial_position=True, + randomize_initial_rotation=True, + distance_threshold=0.01, + rotation_threshold=0.1, + n_substeps=20, + relative_control=False, + ignore_z_target_rotation=False, + **kwargs, + ): + """Initializes a new Hand manipulation environment. + + Args: + model_path (string): path to the environments XML file + target_position (string): the type of target position: + - ignore: target position is fully ignored, i.e. the object can be positioned arbitrarily + - fixed: target position is set to the initial position of the object + - random: target position is fully randomized according to target_position_range + target_rotation (string): the type of target rotation: + - ignore: target rotation is fully ignored, i.e. the object can be rotated arbitrarily + - fixed: target rotation is set to the initial rotation of the object + - xyz: fully randomized target rotation around the X, Y and Z axis + - z: fully randomized target rotation around the Z axis + - parallel: fully randomized target rotation around Z and axis-aligned rotation around X, Y + ignore_z_target_rotation (boolean): whether or not the Z axis of the target rotation is ignored + target_position_range (np.array of shape (3, 2)): range of the target_position randomization + reward_type ('sparse' or 'dense'): the reward type, i.e. sparse or dense + initial_qpos (dict): a dictionary of joint names and values that define the initial configuration + randomize_initial_position (boolean): whether or not to randomize the initial position of the object + randomize_initial_rotation (boolean): whether or not to randomize the initial rotation of the object + distance_threshold (float, in meters): the threshold after which the position of a goal is considered achieved + rotation_threshold (float, in radians): the threshold after which the rotation of a goal is considered achieved + n_substeps (int): number of substeps the simulation runs on every call to step + relative_control (boolean): whether or not the hand is actuated in absolute joint positions or relative to the current state + """ + self.target_position = target_position + self.target_rotation = target_rotation + self.target_position_range = target_position_range + self.parallel_quats = [ + rotations.euler2quat(r) for r in rotations.get_parallel_rotations() + ] + self.randomize_initial_rotation = randomize_initial_rotation + self.randomize_initial_position = randomize_initial_position + self.distance_threshold = distance_threshold + self.rotation_threshold = rotation_threshold + self.reward_type = reward_type + self.ignore_z_target_rotation = ignore_z_target_rotation + + assert self.target_position in ["ignore", "fixed", "random"] + assert self.target_rotation in ["ignore", "fixed", "xyz", "z", "parallel"] + initial_qpos = initial_qpos or {} + + super().__init__( + n_substeps=n_substeps, + initial_qpos=initial_qpos, + relative_control=relative_control, + **kwargs, + ) + + def _goal_distance(self, goal_a, goal_b): + assert goal_a.shape == goal_b.shape + assert goal_a.shape[-1] == 7 + + d_pos = np.zeros_like(goal_a[..., 0]) + d_rot = np.zeros_like(goal_b[..., 0]) + if self.target_position != "ignore": + delta_pos = goal_a[..., :3] - goal_b[..., :3] + d_pos = np.linalg.norm(delta_pos, axis=-1) + + if self.target_rotation != "ignore": + quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:] + + if self.ignore_z_target_rotation: + # Special case: We want to ignore the Z component of the rotation. + # This code here assumes Euler angles with xyz convention. We first transform + # to euler, then set the Z component to be equal between the two, and finally + # transform back into quaternions. + euler_a = rotations.quat2euler(quat_a) + euler_b = rotations.quat2euler(quat_b) + euler_a[2] = euler_b[2] + quat_a = rotations.euler2quat(euler_a) + + # Subtract quaternions and extract angle between them. + quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b)) + angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1.0, 1.0)) + d_rot = angle_diff + assert d_pos.shape == d_rot.shape + return d_pos, d_rot + + # GoalEnv methods + # ---------------------------- + + def compute_reward(self, achieved_goal, goal, info): + if self.reward_type == "sparse": + success = self._is_success(achieved_goal, goal).astype(np.float32) + return success - 1.0 + else: + d_pos, d_rot = self._goal_distance(achieved_goal, goal) + # We weigh the difference in position to avoid that `d_pos` (in meters) is completely + # dominated by `d_rot` (in radians). + return -(10.0 * d_pos + d_rot) + + # RobotEnv methods + # ---------------------------- + def _is_success(self, achieved_goal, desired_goal): + d_pos, d_rot = self._goal_distance(achieved_goal, desired_goal) + achieved_pos = (d_pos < self.distance_threshold).astype(np.float32) + achieved_rot = (d_rot < self.rotation_threshold).astype(np.float32) + achieved_both = achieved_pos * achieved_rot + return achieved_both + + return BaseManipulateEnv + + +class MujocoManipulateEnv(get_base_manipulate_env(MujocoHandEnv)): def _get_achieved_goal(self): - # Object position and rotation. - object_qpos = self.sim.data.get_joint_qpos("object:joint") + object_qpos = self._utils.get_joint_qpos(self.model, self.data, "object:joint") assert object_qpos.shape == (7,) return object_qpos - def _goal_distance(self, goal_a, goal_b): - assert goal_a.shape == goal_b.shape - assert goal_a.shape[-1] == 7 - - d_pos = np.zeros_like(goal_a[..., 0]) - d_rot = np.zeros_like(goal_b[..., 0]) - if self.target_position != "ignore": - delta_pos = goal_a[..., :3] - goal_b[..., :3] - d_pos = np.linalg.norm(delta_pos, axis=-1) - - if self.target_rotation != "ignore": - quat_a, quat_b = goal_a[..., 3:], goal_b[..., 3:] - - if self.ignore_z_target_rotation: - # Special case: We want to ignore the Z component of the rotation. - # This code here assumes Euler angles with xyz convention. We first transform - # to euler, then set the Z component to be equal between the two, and finally - # transform back into quaternions. - euler_a = rotations.quat2euler(quat_a) - euler_b = rotations.quat2euler(quat_b) - euler_a[2] = euler_b[2] - quat_a = rotations.euler2quat(euler_a) - - # Subtract quaternions and extract angle between them. - quat_diff = rotations.quat_mul(quat_a, rotations.quat_conjugate(quat_b)) - angle_diff = 2 * np.arccos(np.clip(quat_diff[..., 0], -1.0, 1.0)) - d_rot = angle_diff - assert d_pos.shape == d_rot.shape - return d_pos, d_rot - - # GoalEnv methods - # ---------------------------- - - def compute_reward(self, achieved_goal, goal, info): - if self.reward_type == "sparse": - success = self._is_success(achieved_goal, goal).astype(np.float32) - return success - 1.0 + def _env_setup(self, initial_qpos): + for name, value in initial_qpos.items(): + self.data.set_joint_qpos(name, value) + self._mujoco.mj_forward(self.model, self.data) + + def _reset_sim(self): + self.data.time = self.initial_time + self.data.qpos[:] = np.copy(self.initial_qpos) + self.data.qvel[:] = np.copy(self.initial_qvel) + if self.model.na != 0: + self.data.act[:] = None + + self._mujoco.mj_forward(self.model, self.data) + initial_qpos = self._utils.get_joint_qpos( + self.model, self.data, "object:joint" + ).copy() + + initial_pos, initial_quat = initial_qpos[:3], initial_qpos[3:] + assert initial_qpos.shape == (7,) + assert initial_pos.shape == (3,) + assert initial_quat.shape == (4,) + initial_qpos = None + + # Randomization initial rotation. + if self.randomize_initial_rotation: + if self.target_rotation == "z": + angle = self.np_random.uniform(-np.pi, np.pi) + axis = np.array([0.0, 0.0, 1.0]) + offset_quat = quat_from_angle_and_axis(angle, axis) + initial_quat = rotations.quat_mul(initial_quat, offset_quat) + elif self.target_rotation == "parallel": + angle = self.np_random.uniform(-np.pi, np.pi) + axis = np.array([0.0, 0.0, 1.0]) + z_quat = quat_from_angle_and_axis(angle, axis) + parallel_quat = self.parallel_quats[ + self.np_random.integers(len(self.parallel_quats)) + ] + offset_quat = rotations.quat_mul(z_quat, parallel_quat) + initial_quat = rotations.quat_mul(initial_quat, offset_quat) + elif self.target_rotation in ["xyz", "ignore"]: + angle = self.np_random.uniform(-np.pi, np.pi) + axis = self.np_random.uniform(-1.0, 1.0, size=3) + offset_quat = quat_from_angle_and_axis(angle, axis) + initial_quat = rotations.quat_mul(initial_quat, offset_quat) + elif self.target_rotation == "fixed": + pass + else: + raise error.Error( + f'Unknown target_rotation option "{self.target_rotation}".' + ) + + # Randomize initial position. + if self.randomize_initial_position: + if self.target_position != "fixed": + initial_pos += self.np_random.normal(size=3, scale=0.005) + + initial_quat /= np.linalg.norm(initial_quat) + initial_qpos = np.concatenate([initial_pos, initial_quat]) + + self._utils.set_joint_qpos(self.model, self.data, "object:joint", initial_qpos) + + def is_on_palm(): + self._mujoco.mj_forward(self.model, self.data) + cube_middle_idx = self._model_names._site_name2id["object:center"] + cube_middle_pos = self.data.site_xpos[cube_middle_idx] + is_on_palm = cube_middle_pos[2] > 0.04 + return is_on_palm + + # Run the simulation for a bunch of timesteps to let everything settle in. + for _ in range(10): + self._set_action(np.zeros(20)) + try: + self._mujoco.mj_step(self.model, self.data, nstep=self.n_substeps) + except Exception: + return False + + return is_on_palm() + + def _sample_goal(self): + # Select a goal for the object position. + target_pos = None + if self.target_position == "random": + assert self.target_position_range.shape == (3, 2) + offset = self.np_random.uniform( + self.target_position_range[:, 0], self.target_position_range[:, 1] + ) + assert offset.shape == (3,) + target_pos = ( + self._utils.get_joint_qpos(self.model, self.data, "object:joint")[:3] + + offset + ) + elif self.target_position in ["ignore", "fixed"]: + target_pos = self._utils.get_joint_qpos( + self.model, self.data, "object:joint" + )[:3] + else: + raise error.Error( + f'Unknown target_position option "{self.target_position}".' + ) + assert target_pos is not None + assert target_pos.shape == (3,) + + # Select a goal for the object rotation. + target_quat = None + if self.target_rotation == "z": + angle = self.np_random.uniform(-np.pi, np.pi) + axis = np.array([0.0, 0.0, 1.0]) + target_quat = quat_from_angle_and_axis(angle, axis) + elif self.target_rotation == "parallel": + angle = self.np_random.uniform(-np.pi, np.pi) + axis = np.array([0.0, 0.0, 1.0]) + target_quat = quat_from_angle_and_axis(angle, axis) + parallel_quat = self.parallel_quats[ + self.np_random.integers(len(self.parallel_quats)) + ] + target_quat = rotations.quat_mul(target_quat, parallel_quat) + elif self.target_rotation == "xyz": + angle = self.np_random.uniform(-np.pi, np.pi) + axis = self.np_random.uniform(-1.0, 1.0, size=3) + target_quat = quat_from_angle_and_axis(angle, axis) + elif self.target_rotation in ["ignore", "fixed"]: + target_quat = self.data.get_joint_qpos("object:joint") else: - d_pos, d_rot = self._goal_distance(achieved_goal, goal) - # We weigh the difference in position to avoid that `d_pos` (in meters) is completely - # dominated by `d_rot` (in radians). - return -(10.0 * d_pos + d_rot) + raise error.Error( + f'Unknown target_rotation option "{self.target_rotation}".' + ) + assert target_quat is not None + assert target_quat.shape == (4,) - # RobotEnv methods - # ---------------------------- + target_quat /= np.linalg.norm(target_quat) # normalized quaternion + goal = np.concatenate([target_pos, target_quat]) + return goal + + def _render_callback(self): + # Assign current state to target object but offset a bit so that the actual object + # is not obscured. + goal = self.goal.copy() + assert goal.shape == (7,) + if self.target_position == "ignore": + # Move the object to the side since we do not care about it's position. + goal[0] += 0.15 + + self._utils.set_joint_qpos(self.model, self.data, "target:joint", goal) + self._utils.set_joint_qvel(self.model, self.data, "target:joint", np.zeros(6)) + + if "object_hidden" in self._model_names.geom_names: + hidden_id = self._model_names.geom_name2id["object_hidden"] + self.model.geom_rgba[hidden_id, 3] = 1.0 + self._mujoco.mj_forward(self.model, self.data) + + def _get_obs(self): + robot_qpos, robot_qvel = self._utils.robot_get_obs( + self.model, self.data, self._model_names.joint_names + ) + object_qvel = self._utils.get_joint_qvel(self.model, self.data, "object:joint") + achieved_goal = ( + self._get_achieved_goal().ravel() + ) # this contains the object position + rotation + observation = np.concatenate( + [robot_qpos, robot_qvel, object_qvel, achieved_goal] + ) + return { + "observation": observation.copy(), + "achieved_goal": achieved_goal.copy(), + "desired_goal": self.goal.ravel().copy(), + } + + +class MujocoPyManipulateEnv(get_base_manipulate_env(MujocoPyHandEnv)): + def _get_achieved_goal(self): + # Object position and rotation. + object_qpos = self.sim.data.get_joint_qpos("object:joint") - def _is_success(self, achieved_goal, desired_goal): - d_pos, d_rot = self._goal_distance(achieved_goal, desired_goal) - achieved_pos = (d_pos < self.distance_threshold).astype(np.float32) - achieved_rot = (d_rot < self.rotation_threshold).astype(np.float32) - achieved_both = achieved_pos * achieved_rot - return achieved_both + assert object_qpos.shape == (7,) + return object_qpos def _env_setup(self, initial_qpos): for name, value in initial_qpos.items(): @@ -165,6 +342,7 @@ def _reset_sim(self): self.sim.forward() initial_qpos = self.sim.data.get_joint_qpos("object:joint").copy() + initial_pos, initial_quat = initial_qpos[:3], initial_qpos[3:] assert initial_qpos.shape == (7,) assert initial_pos.shape == (3,) @@ -206,12 +384,14 @@ def _reset_sim(self): initial_quat /= np.linalg.norm(initial_quat) initial_qpos = np.concatenate([initial_pos, initial_quat]) + self.sim.data.set_joint_qpos("object:joint", initial_qpos) def is_on_palm(): self.sim.forward() cube_middle_idx = self.sim.model.site_name2id("object:center") cube_middle_pos = self.sim.data.site_xpos[cube_middle_idx] + is_on_palm = cube_middle_pos[2] > 0.04 return is_on_palm @@ -220,8 +400,9 @@ def is_on_palm(): self._set_action(np.zeros(20)) try: self.sim.step() - except mujoco_py.MujocoException: + except self._mujoco_py.MujocoException: return False + return is_on_palm() def _sample_goal(self): @@ -234,6 +415,7 @@ def _sample_goal(self): ) assert offset.shape == (3,) target_pos = self.sim.data.get_joint_qpos("object:joint")[:3] + offset + elif self.target_position in ["ignore", "fixed"]: target_pos = self.sim.data.get_joint_qpos("object:joint")[:3] else: @@ -291,8 +473,9 @@ def _render_callback(self): self.sim.forward() def _get_obs(self): - robot_qpos, robot_qvel = robot_get_obs(self.sim) + robot_qpos, robot_qvel = self._utils.robot_get_obs(self.sim) object_qvel = self.sim.data.get_joint_qvel("object:joint") + achieved_goal = ( self._get_achieved_goal().ravel() ) # this contains the object position + rotation @@ -306,42 +489,118 @@ def _get_obs(self): } -class HandBlockEnv(ManipulateEnv, utils.EzPickle): +class MujocoHandBlockEnv(MujocoManipulateEnv): + def __init__( + self, + target_position="random", + target_rotation="xyz", + reward_type="sparse", + **kwargs, + ): + MujocoManipulateEnv.__init__( + self, + model_path=MANIPULATE_BLOCK_XML, + target_position=target_position, + target_rotation=target_rotation, + target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]), + reward_type=reward_type, + **kwargs, + ) + EzPickle.__init__(self, target_position, target_rotation, reward_type, **kwargs) + + +class MujocoPyHandBlockEnv(MujocoPyManipulateEnv, EzPickle): def __init__( - self, target_position="random", target_rotation="xyz", reward_type="sparse" + self, + target_position="random", + target_rotation="xyz", + reward_type="sparse", + **kwargs, ): - utils.EzPickle.__init__(self, target_position, target_rotation, reward_type) - ManipulateEnv.__init__( + MujocoPyManipulateEnv.__init__( self, model_path=MANIPULATE_BLOCK_XML, target_position=target_position, target_rotation=target_rotation, target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]), reward_type=reward_type, + **kwargs, ) + EzPickle.__init__(self, target_position, target_rotation, reward_type, **kwargs) -class HandEggEnv(ManipulateEnv, utils.EzPickle): +class MujocoHandEggEnv(MujocoManipulateEnv, EzPickle): def __init__( - self, target_position="random", target_rotation="xyz", reward_type="sparse" + self, + target_position="random", + target_rotation="xyz", + reward_type="sparse", + **kwargs, ): - utils.EzPickle.__init__(self, target_position, target_rotation, reward_type) - ManipulateEnv.__init__( + MujocoManipulateEnv.__init__( self, model_path=MANIPULATE_EGG_XML, target_position=target_position, target_rotation=target_rotation, target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]), reward_type=reward_type, + **kwargs, ) + EzPickle.__init__(self, target_position, target_rotation, reward_type, **kwargs) -class HandPenEnv(ManipulateEnv, utils.EzPickle): +class MujocoPyHandEggEnv(MujocoPyManipulateEnv, EzPickle): def __init__( - self, target_position="random", target_rotation="xyz", reward_type="sparse" + self, + target_position="random", + target_rotation="xyz", + reward_type="sparse", + **kwargs, + ): + MujocoPyManipulateEnv.__init__( + self, + model_path=MANIPULATE_EGG_XML, + target_position=target_position, + target_rotation=target_rotation, + target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]), + reward_type=reward_type, + **kwargs, + ) + EzPickle.__init__(self, target_position, target_rotation, reward_type, **kwargs) + + +class MujocoHandPenEnv(MujocoManipulateEnv, EzPickle): + def __init__( + self, + target_position="random", + target_rotation="xyz", + reward_type="sparse", + **kwargs, + ): + MujocoManipulateEnv.__init__( + self, + model_path=MANIPULATE_PEN_XML, + target_position=target_position, + target_rotation=target_rotation, + target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]), + randomize_initial_rotation=False, + reward_type=reward_type, + ignore_z_target_rotation=True, + distance_threshold=0.05, + **kwargs, + ) + EzPickle.__init__(self, target_position, target_rotation, reward_type, **kwargs) + + +class MujocoPyHandPenEnv(MujocoPyManipulateEnv, EzPickle): + def __init__( + self, + target_position="random", + target_rotation="xyz", + reward_type="sparse", + **kwargs, ): - utils.EzPickle.__init__(self, target_position, target_rotation, reward_type) - ManipulateEnv.__init__( + MujocoPyManipulateEnv.__init__( self, model_path=MANIPULATE_PEN_XML, target_position=target_position, @@ -351,4 +610,6 @@ def __init__( reward_type=reward_type, ignore_z_target_rotation=True, distance_threshold=0.05, + **kwargs, ) + EzPickle.__init__(self, target_position, target_rotation, reward_type, **kwargs) diff --git a/gym_robotics/envs/hand/manipulate_touch_sensors.py b/gym_robotics/envs/hand/manipulate_touch_sensors.py index 36e8049f..ca280d5a 100644 --- a/gym_robotics/envs/hand/manipulate_touch_sensors.py +++ b/gym_robotics/envs/hand/manipulate_touch_sensors.py @@ -1,8 +1,10 @@ import os + import numpy as np +from gym import spaces +from gym.utils.ezpickle import EzPickle -from gym import utils, error, spaces -from gym_robotics.envs.hand import manipulate +from gym_robotics.envs.hand.manipulate import MujocoManipulateEnv, MujocoPyManipulateEnv # Ensure we get the path separator correct on windows MANIPULATE_BLOCK_XML = os.path.join("hand", "manipulate_block_touch_sensors.xml") @@ -10,10 +12,9 @@ MANIPULATE_PEN_XML = os.path.join("hand", "manipulate_pen_touch_sensors.xml") -class ManipulateTouchSensorsEnv(manipulate.ManipulateEnv): +class MujocoManipulateTouchSensorsEnv(MujocoManipulateEnv): def __init__( self, - model_path, target_position, target_rotation, target_position_range, @@ -28,6 +29,7 @@ def __init__( ignore_z_target_rotation=False, touch_visualisation="on_touch", touch_get_obs="sensordata", + **kwargs, ): """Initializes a new Hand manipulation environment with touch sensors. @@ -50,13 +52,11 @@ def __init__( self.touch_color = [1, 0, 0, 0.5] self.notouch_color = [0, 0.5, 0, 0.2] - manipulate.ManipulateEnv.__init__( - self, - model_path, - target_position, - target_rotation, - target_position_range, - reward_type, + super().__init__( + target_position=target_position, + target_rotation=target_rotation, + target_position_range=target_position_range, + reward_type=reward_type, initial_qpos=initial_qpos, randomize_initial_position=randomize_initial_position, randomize_initial_rotation=randomize_initial_rotation, @@ -65,6 +65,138 @@ def __init__( n_substeps=n_substeps, relative_control=relative_control, ignore_z_target_rotation=ignore_z_target_rotation, + **kwargs, + ) + + for ( + k, + v, + ) in ( + self._model_names.sensor_name2id.items() + ): # get touch sensor site names and their ids + if "robot0:TS_" in k: + self._touch_sensor_id_site_id.append( + ( + v, + self._model_names.site_name2id[ + k.replace("robot0:TS_", "robot0:T_") + ], + ) + ) + self._touch_sensor_id.append(v) + + if self.touch_visualisation == "off": # set touch sensors rgba values + for _, site_id in self._touch_sensor_id_site_id: + self.model.site_rgba[site_id][3] = 0.0 + elif self.touch_visualisation == "always": + pass + + obs = self._get_obs() + self.observation_space = spaces.Dict( + dict( + desired_goal=spaces.Box( + -np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float64" + ), + achieved_goal=spaces.Box( + -np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float64" + ), + observation=spaces.Box( + -np.inf, np.inf, shape=obs["observation"].shape, dtype="float64" + ), + ) + ) + + def _render_callback(self): + super()._render_callback() + if self.touch_visualisation == "on_touch": + for touch_sensor_id, site_id in self._touch_sensor_id_site_id: + if self.data.sensordata[touch_sensor_id] != 0.0: + self.model.site_rgba[site_id] = self.touch_color + else: + self.model.site_rgba[site_id] = self.notouch_color + + def _get_obs(self): + robot_qpos, robot_qvel = self._utils.robot_get_obs( + self.model, self.data, self._model_names.joint_names + ) + object_qvel = self._utils.get_joint_qvel(self.model, self.data, "object:joint") + + achieved_goal = ( + self._get_achieved_goal().ravel() + ) # this contains the object position + rotation + touch_values = [] # get touch sensor readings. if there is one, set value to 1 + + if self.touch_get_obs == "sensordata": + touch_values = self.data.sensordata[self._touch_sensor_id] + elif self.touch_get_obs == "boolean": + touch_values = self.data.sensordata[self._touch_sensor_id] > 0.0 + elif self.touch_get_obs == "log": + touch_values = np.log(self.data.sensordata[self._touch_sensor_id] + 1.0) + observation = np.concatenate( + [robot_qpos, robot_qvel, object_qvel, touch_values, achieved_goal] + ) + + return { + "observation": observation.copy(), + "achieved_goal": achieved_goal.copy(), + "desired_goal": self.goal.ravel().copy(), + } + + +class MujocoPyManipulateTouchSensorsEnv(MujocoPyManipulateEnv): + def __init__( + self, + target_position, + target_rotation, + target_position_range, + reward_type, + initial_qpos={}, + randomize_initial_position=True, + randomize_initial_rotation=True, + distance_threshold=0.01, + rotation_threshold=0.1, + n_substeps=20, + relative_control=False, + ignore_z_target_rotation=False, + touch_visualisation="on_touch", + touch_get_obs="sensordata", + **kwargs, + ): + """Initializes a new Hand manipulation environment with touch sensors. + + Args: + touch_visualisation (string): how touch sensor sites are visualised + - "on_touch": shows touch sensor sites only when touch values > 0 + - "always": always shows touch sensor sites + - "off" or else: does not show touch sensor sites + touch_get_obs (string): touch sensor readings + - "boolean": returns 1 if touch sensor reading != 0.0 else 0 + - "sensordata": returns original touch sensor readings from self.sim.data.sensordata[id] + - "log": returns log(x+1) touch sensor readings from self.sim.data.sensordata[id] + - "off" or else: does not add touch sensor readings to the observation + + """ + self.touch_visualisation = touch_visualisation + self.touch_get_obs = touch_get_obs + self._touch_sensor_id_site_id = [] + self._touch_sensor_id = [] + self.touch_color = [1, 0, 0, 0.5] + self.notouch_color = [0, 0.5, 0, 0.2] + + super().__init__( + target_position=target_position, + target_rotation=target_rotation, + target_position_range=target_position_range, + reward_type=reward_type, + initial_qpos=initial_qpos, + randomize_initial_position=randomize_initial_position, + randomize_initial_rotation=randomize_initial_rotation, + distance_threshold=distance_threshold, + rotation_threshold=rotation_threshold, + n_substeps=n_substeps, + relative_control=relative_control, + ignore_z_target_rotation=ignore_z_target_rotation, + **kwargs, ) for ( @@ -94,13 +226,13 @@ def __init__( self.observation_space = spaces.Dict( dict( desired_goal=spaces.Box( - -np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float32" + -np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float64" ), achieved_goal=spaces.Box( - -np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float32" + -np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float64" ), observation=spaces.Box( - -np.inf, np.inf, shape=obs["observation"].shape, dtype="float32" + -np.inf, np.inf, shape=obs["observation"].shape, dtype="float64" ), ) ) @@ -115,18 +247,21 @@ def _render_callback(self): self.sim.model.site_rgba[site_id] = self.notouch_color def _get_obs(self): - robot_qpos, robot_qvel = manipulate.robot_get_obs(self.sim) + robot_qpos, robot_qvel = self._utils.robot_get_obs(self.sim) object_qvel = self.sim.data.get_joint_qvel("object:joint") + achieved_goal = ( self._get_achieved_goal().ravel() ) # this contains the object position + rotation touch_values = [] # get touch sensor readings. if there is one, set value to 1 + if self.touch_get_obs == "sensordata": touch_values = self.sim.data.sensordata[self._touch_sensor_id] elif self.touch_get_obs == "boolean": touch_values = self.sim.data.sensordata[self._touch_sensor_id] > 0.0 elif self.touch_get_obs == "log": touch_values = np.log(self.sim.data.sensordata[self._touch_sensor_id] + 1.0) + observation = np.concatenate( [robot_qpos, robot_qvel, object_qvel, touch_values, achieved_goal] ) @@ -138,18 +273,16 @@ def _get_obs(self): } -class HandBlockTouchSensorsEnv(ManipulateTouchSensorsEnv, utils.EzPickle): +class MujocoHandBlockTouchSensorsEnv(MujocoManipulateTouchSensorsEnv, EzPickle): def __init__( self, target_position="random", target_rotation="xyz", touch_get_obs="sensordata", reward_type="sparse", + **kwargs, ): - utils.EzPickle.__init__( - self, target_position, target_rotation, touch_get_obs, reward_type - ) - ManipulateTouchSensorsEnv.__init__( + MujocoManipulateTouchSensorsEnv.__init__( self, model_path=MANIPULATE_BLOCK_XML, touch_get_obs=touch_get_obs, @@ -157,21 +290,23 @@ def __init__( target_position=target_position, target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]), reward_type=reward_type, + **kwargs, + ) + EzPickle.__init__( + self, target_position, target_rotation, touch_get_obs, reward_type, **kwargs ) -class HandEggTouchSensorsEnv(ManipulateTouchSensorsEnv, utils.EzPickle): +class MujocoHandEggTouchSensorsEnv(MujocoManipulateTouchSensorsEnv, EzPickle): def __init__( self, target_position="random", target_rotation="xyz", touch_get_obs="sensordata", reward_type="sparse", + **kwargs, ): - utils.EzPickle.__init__( - self, target_position, target_rotation, touch_get_obs, reward_type - ) - ManipulateTouchSensorsEnv.__init__( + MujocoManipulateTouchSensorsEnv.__init__( self, model_path=MANIPULATE_EGG_XML, touch_get_obs=touch_get_obs, @@ -179,21 +314,98 @@ def __init__( target_position=target_position, target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]), reward_type=reward_type, + **kwargs, + ) + EzPickle.__init__( + self, target_position, target_rotation, touch_get_obs, reward_type, **kwargs + ) + + +class MujocoHandPenTouchSensorsEnv(MujocoManipulateTouchSensorsEnv, EzPickle): + def __init__( + self, + target_position="random", + target_rotation="xyz", + touch_get_obs="sensordata", + reward_type="sparse", + **kwargs, + ): + MujocoManipulateTouchSensorsEnv.__init__( + self, + model_path=MANIPULATE_PEN_XML, + touch_get_obs=touch_get_obs, + target_rotation=target_rotation, + target_position=target_position, + target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]), + randomize_initial_rotation=False, + reward_type=reward_type, + ignore_z_target_rotation=True, + distance_threshold=0.05, + **kwargs, + ) + EzPickle.__init__( + self, target_position, target_rotation, touch_get_obs, reward_type, **kwargs ) -class HandPenTouchSensorsEnv(ManipulateTouchSensorsEnv, utils.EzPickle): +class MujocoPyHandBlockTouchSensorsEnv(MujocoPyManipulateTouchSensorsEnv, EzPickle): def __init__( self, target_position="random", target_rotation="xyz", touch_get_obs="sensordata", reward_type="sparse", + **kwargs, ): - utils.EzPickle.__init__( - self, target_position, target_rotation, touch_get_obs, reward_type + MujocoPyManipulateTouchSensorsEnv.__init__( + self, + model_path=MANIPULATE_BLOCK_XML, + touch_get_obs=touch_get_obs, + target_rotation=target_rotation, + target_position=target_position, + target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]), + reward_type=reward_type, + **kwargs, ) - ManipulateTouchSensorsEnv.__init__( + EzPickle.__init__( + self, target_position, target_rotation, touch_get_obs, reward_type, **kwargs + ) + + +class MujocoPyHandEggTouchSensorsEnv(MujocoPyManipulateTouchSensorsEnv, EzPickle): + def __init__( + self, + target_position="random", + target_rotation="xyz", + touch_get_obs="sensordata", + reward_type="sparse", + **kwargs, + ): + MujocoPyManipulateTouchSensorsEnv.__init__( + self, + model_path=MANIPULATE_EGG_XML, + touch_get_obs=touch_get_obs, + target_rotation=target_rotation, + target_position=target_position, + target_position_range=np.array([(-0.04, 0.04), (-0.06, 0.02), (0.0, 0.06)]), + reward_type=reward_type, + **kwargs, + ) + EzPickle.__init__( + self, target_position, target_rotation, touch_get_obs, reward_type, **kwargs + ) + + +class MujocoPyHandPenTouchSensorsEnv(MujocoPyManipulateTouchSensorsEnv, EzPickle): + def __init__( + self, + target_position="random", + target_rotation="xyz", + touch_get_obs="sensordata", + reward_type="sparse", + **kwargs, + ): + MujocoPyManipulateTouchSensorsEnv.__init__( self, model_path=MANIPULATE_PEN_XML, touch_get_obs=touch_get_obs, @@ -204,4 +416,9 @@ def __init__( reward_type=reward_type, ignore_z_target_rotation=True, distance_threshold=0.05, + **kwargs, + ) + + EzPickle.__init__( + self, target_position, target_rotation, touch_get_obs, reward_type, **kwargs ) diff --git a/gym_robotics/envs/hand/reach.py b/gym_robotics/envs/hand/reach.py index 1b7911b2..ffda2322 100644 --- a/gym_robotics/envs/hand/reach.py +++ b/gym_robotics/envs/hand/reach.py @@ -1,10 +1,10 @@ import os -import numpy as np +from typing import Union -from gym import utils -from gym_robotics.envs import hand_env -from gym_robotics.envs.utils import robot_get_obs +import numpy as np +from gym.utils.ezpickle import EzPickle +from gym_robotics.envs.hand_env import MujocoHandEnv, MujocoPyHandEnv FINGERTIP_SITE_NAMES = [ "robot0:S_fftip", @@ -52,40 +52,147 @@ def goal_distance(goal_a, goal_b): return np.linalg.norm(goal_a - goal_b, axis=-1) -class HandReachEnv(hand_env.HandEnv, utils.EzPickle): - def __init__( - self, - distance_threshold=0.01, - n_substeps=20, - relative_control=False, - initial_qpos=DEFAULT_INITIAL_QPOS, - reward_type="sparse", - ): - utils.EzPickle.__init__(**locals()) - self.distance_threshold = distance_threshold - self.reward_type = reward_type - - hand_env.HandEnv.__init__( +def get_base_hand_reanch_env(HandEnvClass: Union[MujocoHandEnv, MujocoPyHandEnv]): + class BaseHandReachEnv(HandEnvClass, EzPickle): + def __init__( self, - MODEL_XML_PATH, - n_substeps=n_substeps, - initial_qpos=initial_qpos, - relative_control=relative_control, - ) + distance_threshold=0.01, + n_substeps=20, + relative_control=False, + initial_qpos=DEFAULT_INITIAL_QPOS, + reward_type="sparse", + **kwargs, + ): + + self.distance_threshold = distance_threshold + self.reward_type = reward_type + + HandEnvClass.__init__( + self, + model_path=MODEL_XML_PATH, + n_substeps=n_substeps, + initial_qpos=initial_qpos, + relative_control=relative_control, + **kwargs, + ) + + EzPickle.__init__( + self, + distance_threshold, + n_substeps, + relative_control, + initial_qpos, + reward_type, + **kwargs, + ) + + # GoalEnv methods + # ---------------------------- + + def compute_reward(self, achieved_goal, goal, info): + d = goal_distance(achieved_goal, goal) + if self.reward_type == "sparse": + return -(d > self.distance_threshold).astype(np.float32) + else: + return -d + + def _sample_goal(self): + + thumb_name = "robot0:S_thtip" + finger_names = [name for name in FINGERTIP_SITE_NAMES if name != thumb_name] + finger_name = self.np_random.choice(finger_names) + + thumb_idx = FINGERTIP_SITE_NAMES.index(thumb_name) + finger_idx = FINGERTIP_SITE_NAMES.index(finger_name) + assert thumb_idx != finger_idx + + # Pick a meeting point above the hand. + meeting_pos = self.palm_xpos + np.array([0.0, -0.09, 0.05]) + meeting_pos += self.np_random.normal(scale=0.005, size=meeting_pos.shape) + + # Slightly move meeting goal towards the respective finger to avoid that they + # overlap. + goal = self.initial_goal.copy().reshape(-1, 3) + for idx in [thumb_idx, finger_idx]: + offset_direction = meeting_pos - goal[idx] + offset_direction /= np.linalg.norm(offset_direction) + goal[idx] = meeting_pos - 0.005 * offset_direction + + if self.np_random.uniform() < 0.1: + # With some probability, ask all fingers to move back to the origin. + # This avoids that the thumb constantly stays near the goal position already. + goal = self.initial_goal.copy() + + return goal.flatten() + def _is_success(self, achieved_goal, desired_goal): + d = goal_distance(achieved_goal, desired_goal) + return (d < self.distance_threshold).astype(np.float32) + + def _get_achieved_goal(self): + raise NotImplementedError + + return BaseHandReachEnv + + +class MujocoHandReachEnv(get_base_hand_reanch_env(MujocoHandEnv)): def _get_achieved_goal(self): - goal = [self.sim.data.get_site_xpos(name) for name in FINGERTIP_SITE_NAMES] + goal = [ + self._utils.get_site_xpos(self.model, self.data, name) + for name in FINGERTIP_SITE_NAMES + ] return np.array(goal).flatten() - # GoalEnv methods + # RobotEnv methods # ---------------------------- - def compute_reward(self, achieved_goal, goal, info): - d = goal_distance(achieved_goal, goal) - if self.reward_type == "sparse": - return -(d > self.distance_threshold).astype(np.float32) - else: - return -d + def _env_setup(self, initial_qpos): + for name, value in initial_qpos.items(): + self._utils.set_joint_qpos(self.model, self.data, name, value) + self._mujoco.mj_forward(self.model, self.data) + + self.initial_goal = self._get_achieved_goal().copy() + self.palm_xpos = self.data.xpos[ + self._model_names.body_name2id["robot0:palm"] + ].copy() + + def _get_obs(self): + robot_qpos, robot_qvel = self._utils.robot_get_obs( + self.model, self.data, self._model_names.joint_names + ) + achieved_goal = self._get_achieved_goal().ravel() + observation = np.concatenate([robot_qpos, robot_qvel, achieved_goal]) + return { + "observation": observation.copy(), + "achieved_goal": achieved_goal.copy(), + "desired_goal": self.goal.copy(), + } + + def _render_callback(self): + # Visualize targets. + sites_offset = (self.data.site_xpos - self.model.site_pos).copy() + goal = self.goal.reshape(5, 3) + for finger_idx in range(5): + site_name = f"target{finger_idx}" + site_id = self._model_names.site_name2id[site_name] + self.model.site_pos[site_id] = goal[finger_idx] - sites_offset[site_id] + + # Visualize finger positions. + achieved_goal = self._get_achieved_goal().reshape(5, 3) + for finger_idx in range(5): + site_name = f"finger{finger_idx}" + site_id = self._model_names.site_name2id[site_name] + self.model.site_pos[site_id] = ( + achieved_goal[finger_idx] - sites_offset[site_id] + ) + self._mujoco.mj_forward(self.model, self.data) + + +class MujocoPyHandReachEnv(get_base_hand_reanch_env(MujocoPyHandEnv)): + def _get_achieved_goal(self): + goal = [self.sim.data.get_site_xpos(name) for name in FINGERTIP_SITE_NAMES] + + return np.array(goal).flatten() # RobotEnv methods # ---------------------------- @@ -101,7 +208,8 @@ def _env_setup(self, initial_qpos): ].copy() def _get_obs(self): - robot_qpos, robot_qvel = robot_get_obs(self.sim) + robot_qpos, robot_qvel = self._utils.robot_get_obs(self.sim) + achieved_goal = self._get_achieved_goal().ravel() observation = np.concatenate([robot_qpos, robot_qvel, achieved_goal]) return { @@ -110,37 +218,6 @@ def _get_obs(self): "desired_goal": self.goal.copy(), } - def _sample_goal(self): - thumb_name = "robot0:S_thtip" - finger_names = [name for name in FINGERTIP_SITE_NAMES if name != thumb_name] - finger_name = self.np_random.choice(finger_names) - - thumb_idx = FINGERTIP_SITE_NAMES.index(thumb_name) - finger_idx = FINGERTIP_SITE_NAMES.index(finger_name) - assert thumb_idx != finger_idx - - # Pick a meeting point above the hand. - meeting_pos = self.palm_xpos + np.array([0.0, -0.09, 0.05]) - meeting_pos += self.np_random.normal(scale=0.005, size=meeting_pos.shape) - - # Slightly move meeting goal towards the respective finger to avoid that they - # overlap. - goal = self.initial_goal.copy().reshape(-1, 3) - for idx in [thumb_idx, finger_idx]: - offset_direction = meeting_pos - goal[idx] - offset_direction /= np.linalg.norm(offset_direction) - goal[idx] = meeting_pos - 0.005 * offset_direction - - if self.np_random.uniform() < 0.1: - # With some probability, ask all fingers to move back to the origin. - # This avoids that the thumb constantly stays near the goal position already. - goal = self.initial_goal.copy() - return goal.flatten() - - def _is_success(self, achieved_goal, desired_goal): - d = goal_distance(achieved_goal, desired_goal) - return (d < self.distance_threshold).astype(np.float32) - def _render_callback(self): # Visualize targets. sites_offset = (self.sim.data.site_xpos - self.sim.model.site_pos).copy() diff --git a/gym_robotics/envs/hand_env.py b/gym_robotics/envs/hand_env.py index cf35dcf1..6b721295 100644 --- a/gym_robotics/envs/hand_env.py +++ b/gym_robotics/envs/hand_env.py @@ -1,29 +1,72 @@ -import os -import copy +from typing import Union + import numpy as np -import gym -from gym import error, spaces -from gym.utils import seeding -from gym_robotics.envs import robot_env +from gym_robotics.envs.robot_env import MujocoPyRobotEnv, MujocoRobotEnv -class HandEnv(robot_env.RobotEnv): - def __init__(self, model_path, n_substeps, initial_qpos, relative_control): - self.relative_control = relative_control +def get_base_hand_env( + RobotEnvClass: Union[MujocoPyRobotEnv, MujocoRobotEnv] +) -> Union[MujocoPyRobotEnv, MujocoRobotEnv]: + """Factory function that returns a BaseHandEnv class that inherits + from MujocoPyRobotEnv or MujocoRobotEnv depending on the mujoco python bindings. + """ - super().__init__( - model_path=model_path, - n_substeps=n_substeps, - n_actions=20, - initial_qpos=initial_qpos, - ) + class BaseHandEnv(RobotEnvClass): + """Base class for all robotic hand environments.""" + + def __init__(self, relative_control, **kwargs): + self.relative_control = relative_control + super().__init__(n_actions=20, **kwargs) + + # RobotEnv methods + # ---------------------------- + def _get_palm_xpos(self): + raise NotImplementedError + + def _set_action(self, action): + assert action.shape == (20,) + + def _viewer_setup(self): + lookat = self._get_palm_xpos() + for idx, value in enumerate(lookat): + self.viewer.cam.lookat[idx] = value + self.viewer.cam.distance = 0.5 + self.viewer.cam.azimuth = 55.0 + self.viewer.cam.elevation = -25.0 + + return BaseHandEnv + + +class MujocoHandEnv(get_base_hand_env(MujocoRobotEnv)): + def _set_action(self, action): + super()._set_action(action) + ctrlrange = self.model.actuator_ctrlrange + actuation_range = (ctrlrange[:, 1] - ctrlrange[:, 0]) / 2.0 + if self.relative_control: + actuation_center = np.zeros_like(action) + for i in range(self.data.ctrl.shape[0]): + actuation_center[i] = self.data.get_joint_qpos( + self.model.actuator_names[i].replace(":A_", ":") + ) + for joint_name in ["FF", "MF", "RF", "LF"]: + act_idx = self.model.actuator_name2id(f"robot0:A_{joint_name}J1") + actuation_center[act_idx] += self.data.get_joint_qpos( + f"robot0:{joint_name}J0" + ) + else: + actuation_center = (ctrlrange[:, 1] + ctrlrange[:, 0]) / 2.0 + self.data.ctrl[:] = actuation_center + action * actuation_range + self.data.ctrl[:] = np.clip(self.data.ctrl, ctrlrange[:, 0], ctrlrange[:, 1]) + + def _get_palm_xpos(self): + body_id = self._model_names.body_name2id["robot0:palm"] + return self.data.xpos[body_id] - # RobotEnv methods - # ---------------------------- +class MujocoPyHandEnv(get_base_hand_env(MujocoPyRobotEnv)): def _set_action(self, action): - assert action.shape == (20,) + super()._set_action(action) ctrlrange = self.sim.model.actuator_ctrlrange actuation_range = (ctrlrange[:, 1] - ctrlrange[:, 0]) / 2.0 @@ -45,14 +88,6 @@ def _set_action(self, action): self.sim.data.ctrl, ctrlrange[:, 0], ctrlrange[:, 1] ) - def _viewer_setup(self): + def _get_palm_xpos(self): body_id = self.sim.model.body_name2id("robot0:palm") - lookat = self.sim.data.body_xpos[body_id] - for idx, value in enumerate(lookat): - self.viewer.cam.lookat[idx] = value - self.viewer.cam.distance = 0.5 - self.viewer.cam.azimuth = 55.0 - self.viewer.cam.elevation = -25.0 - - def render(self, mode="human", width=500, height=500): - return super().render(mode, width, height) + return self.sim.data.body_xpos[body_id] diff --git a/gym_robotics/envs/robot_env.py b/gym_robotics/envs/robot_env.py index c2fd250b..8c24c541 100644 --- a/gym_robotics/envs/robot_env.py +++ b/gym_robotics/envs/robot_env.py @@ -1,72 +1,109 @@ -import os import copy -from typing import Optional - -import numpy as np +import os +from typing import Optional, Union import gym -from gym import error, spaces -from gym.utils import seeding +import numpy as np +from gym import error, logger, spaces from gym_robotics import GoalEnv try: import mujoco_py + + from gym_robotics.utils import mujoco_py_utils except ImportError as e: - raise error.DependencyNotInstalled( - "{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format( - e - ) - ) + MUJOCO_PY_IMPORT_ERROR = e +else: + MUJOCO_PY_IMPORT_ERROR = None -DEFAULT_SIZE = 500 +try: + import mujoco + from gym_robotics.utils import mujoco_utils +except ImportError as e: + MUJOCO_IMPORT_ERROR = e +else: + MUJOCO_IMPORT_ERROR = None + +DEFAULT_SIZE = 480 + + +class BaseRobotEnv(GoalEnv): + """Superclass for all MuJoCo robotic environments.""" -class RobotEnv(GoalEnv): - def __init__(self, model_path, initial_qpos, n_actions, n_substeps): + metadata = { + "render_modes": [ + "human", + "rgb_array", + "rgb_array_list", + ], + "render_fps": 25, + } + + def __init__( + self, + model_path: str, + initial_qpos, + n_actions: int, + n_substeps: int, + render_mode: Optional[str] = None, + width: int = DEFAULT_SIZE, + height: int = DEFAULT_SIZE, + ): if model_path.startswith("/"): - fullpath = model_path + self.fullpath = model_path else: - fullpath = os.path.join(os.path.dirname(__file__), "assets", model_path) - if not os.path.exists(fullpath): - raise OSError(f"File {fullpath} does not exist") + self.fullpath = os.path.join( + os.path.dirname(__file__), "assets", model_path + ) + if not os.path.exists(self.fullpath): + raise OSError(f"File {self.fullpath} does not exist") - model = mujoco_py.load_model_from_path(fullpath) - self.sim = mujoco_py.MjSim(model, nsubsteps=n_substeps) - self.viewer = None - self._viewers = {} + self.n_substeps = n_substeps - self.metadata = { - "render.modes": ["human", "rgb_array"], - "video.frames_per_second": int(np.round(1.0 / self.dt)), - } + self.initial_qpos = initial_qpos - self._env_setup(initial_qpos=initial_qpos) - self.initial_state = copy.deepcopy(self.sim.get_state()) + self.width = width + self.height = height + self._initialize_simulation() + + self.viewer = None + self._viewers = {} self.goal = np.zeros(0) obs = self._get_obs() + + assert ( + int(np.round(1.0 / self.dt)) == self.metadata["render_fps"] + ), f'Expected value: {int(np.round(1.0 / self.dt))}, Actual value: {self.metadata["render_fps"]}' + self.action_space = spaces.Box(-1.0, 1.0, shape=(n_actions,), dtype="float32") self.observation_space = spaces.Dict( dict( desired_goal=spaces.Box( - -np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float32" + -np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float64" ), achieved_goal=spaces.Box( - -np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float32" + -np.inf, np.inf, shape=obs["achieved_goal"].shape, dtype="float64" ), observation=spaces.Box( - -np.inf, np.inf, shape=obs["observation"].shape, dtype="float32" + -np.inf, np.inf, shape=obs["observation"].shape, dtype="float64" ), ) ) - @property - def dt(self): - return self.sim.model.opt.timestep * self.sim.nsubsteps + self.render_mode = render_mode # Env methods # ---------------------------- + def compute_terminated(self, achieved_goal, desired_goal, info): + """All the available environments are currently continuing tasks and non-time dependent. The objective is to reach the goal for an indefinite period of time.""" + return False + + def compute_truncated(self, achievec_goal, desired_goal, info): + """The environments will be truncated only if setting a time limit with max_steps which will automatically wrap the environment in a gym TimeLimit wrapper.""" + return False def step(self, action): if np.array(action).shape != self.action_space.shape: @@ -74,18 +111,32 @@ def step(self, action): action = np.clip(action, self.action_space.low, self.action_space.high) self._set_action(action) - self.sim.step() + + self._mujoco_step(action) + self._step_callback() + + if self.render_mode == "human": + self.render() obs = self._get_obs() - done = False info = { "is_success": self._is_success(obs["achieved_goal"], self.goal), } + + terminated = self.compute_terminated(obs["achieved_goal"], self.goal, info) + truncated = self.compute_truncated(obs["achieved_goal"], self.goal, info) + reward = self.compute_reward(obs["achieved_goal"], self.goal, info) - return obs, reward, done, info - def reset(self, seed: Optional[int] = None): + return obs, reward, terminated, truncated, info + + def reset( + self, + *, + seed: Optional[int] = None, + options: Optional[dict] = None, + ): # Attempt to reset the simulator. Since we randomize initial conditions, it # is possible to get into a state with numerical issues (e.g. due to penetration or # Gimbel lock) or we may not achieve an initial condition (e.g. an object is within the hand). @@ -97,38 +148,23 @@ def reset(self, seed: Optional[int] = None): did_reset_sim = self._reset_sim() self.goal = self._sample_goal().copy() obs = self._get_obs() - return obs + if self.render_mode == "human": + self.render() + + return obs, {} def close(self): if self.viewer is not None: - # self.viewer.finish() self.viewer = None self._viewers = {} - def render(self, mode="human", width=DEFAULT_SIZE, height=DEFAULT_SIZE): - self._render_callback() - if mode == "rgb_array": - self._get_viewer(mode).render(width, height) - # window size used for old mujoco-py: - data = self._get_viewer(mode).read_pixels(width, height, depth=False) - # original image is upside-down, so flip it - return data[::-1, :, :] - elif mode == "human": - self._get_viewer(mode).render() - - def _get_viewer(self, mode): - self.viewer = self._viewers.get(mode) - if self.viewer is None: - if mode == "human": - self.viewer = mujoco_py.MjViewer(self.sim) - elif mode == "rgb_array": - self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, device_id=-1) - self._viewer_setup() - self._viewers[mode] = self.viewer - return self.viewer - # Extension methods # ---------------------------- + def _mujoco_step(self, action): + """Advance the mujoco simulation. Override depending on the python binginds, + either mujoco or mujoco_py + """ + raise NotImplementedError def _reset_sim(self): """Resets a simulation and indicates whether or not it was successful. @@ -136,10 +172,14 @@ def _reset_sim(self): simulation), this method should indicate such a failure by returning False. In such a case, this method will be called again to attempt a the reset again. """ - self.sim.set_state(self.initial_state) - self.sim.forward() return True + def _initialize_simulation(self): + """ + Initialize MuJoCo simulation data structures mjModel and mjData. + """ + raise NotImplementedError + def _get_obs(self): """Returns the observation.""" raise NotImplementedError() @@ -179,3 +219,151 @@ def _step_callback(self): to enforce additional constraints on the simulation state. """ pass + + +class MujocoRobotEnv(BaseRobotEnv): + def __init__(self, **kwargs): + if MUJOCO_IMPORT_ERROR is not None: + raise error.DependencyNotInstalled( + f"{MUJOCO_IMPORT_ERROR}. (HINT: you need to install mujoco)" + ) + + self._mujoco = mujoco + self._utils = mujoco_utils + + super().__init__(**kwargs) + + def _initialize_simulation(self): + self.model = self._mujoco.MjModel.from_xml_path(self.fullpath) + self.data = self._mujoco.MjData(self.model) + self._model_names = self._utils.MujocoModelNames(self.model) + + self.model.vis.global_.offwidth = self.width + self.model.vis.global_.offheight = self.height + + self._env_setup(initial_qpos=self.initial_qpos) + self.initial_time = self.data.time + self.initial_qpos = np.copy(self.data.qpos) + self.initial_qvel = np.copy(self.data.qvel) + + def _reset_sim(self): + self.data.time = self.initial_time + self.data.qpos[:] = np.copy(self.initial_qpos) + self.data.qvel[:] = np.copy(self.initial_qvel) + if self.model.na != 0: + self.data.act[:] = None + + mujoco.mj_forward(self.model, self.data) + return super()._reset_sim() + + def render(self): + self._render_callback() + if self.render_mode == "rgb_array": + self._get_viewer(self.render_mode).render() + data = self._get_viewer(self.render_mode).read_pixels(depth=False) + # original image is upside-down, so flip it + return data[::-1, :, :] + elif self.render_mode == "human": + self._get_viewer(self.render_mode).render() + + def _get_viewer( + self, mode + ) -> Union["gym.envs.mujoco.Viewer", "gym.envs.mujoco.RenderContextOffscreen"]: + self.viewer = self._viewers.get(mode) + if self.viewer is None: + if mode == "human": + from gym.envs.mujoco.mujoco_rendering import Viewer + + self.viewer = Viewer(self.model, self.data) + elif mode in { + "rgb_array", + "rgb_array_list", + }: + from gym.envs.mujoco.mujoco_rendering import RenderContextOffscreen + + self.viewer = RenderContextOffscreen(model=self.model, data=self.data) + self._viewer_setup() + self._viewers[mode] = self.viewer + return self.viewer + + @property + def dt(self): + return self.model.opt.timestep * self.n_substeps + + def _mujoco_step(self, action): + self._mujoco.mj_step(self.model, self.data, nstep=self.n_substeps) + + +class MujocoPyRobotEnv(BaseRobotEnv): + def __init__(self, **kwargs): + if MUJOCO_PY_IMPORT_ERROR is not None: + raise error.DependencyNotInstalled( + f"{MUJOCO_PY_IMPORT_ERROR}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)" + ) + self._mujoco_py = mujoco_py + self._utils = mujoco_py_utils + + logger.warn( + "This version of the mujoco environments depends " + "on the mujoco-py bindings, which are no longer maintained " + "and may stop working. Please upgrade to the v4 versions of " + "the environments (which depend on the mujoco python bindings instead), unless " + "you are trying to precisely replicate previous works)." + ) + + super().__init__(**kwargs) + + def _initialize_simulation(self): + self.model = self._mujoco_py.load_model_from_path(self.fullpath) + self.sim = self._mujoco_py.MjSim(self.model, nsubsteps=self.n_substeps) + self.data = self.sim.data + + self._env_setup(initial_qpos=self.initial_qpos) + self.initial_state = copy.deepcopy(self.sim.get_state()) + + def _reset_sim(self): + self.sim.set_state(self.initial_state) + self.sim.forward() + return super()._reset_sim() + + def render(self): + width, height = self.width, self.height + assert self.render_mode in self.metadata["render_modes"] + self._render_callback() + if self.render_mode in { + "rgb_array", + "rgb_array_list", + }: + self._get_viewer(self.render_mode).render(width, height) + # window size used for old mujoco-py: + data = self._get_viewer(self.render_mode).read_pixels( + width, height, depth=False + ) + # original image is upside-down, so flip it + return data[::-1, :, :] + elif self.render_mode == "human": + self._get_viewer(self.render_mode).render() + + def _get_viewer( + self, mode + ) -> Union["mujoco_py.MjViewer", "mujoco_py.MjRenderContextOffscreen"]: + self.viewer = self._viewers.get(mode) + if self.viewer is None: + if mode == "human": + self.viewer = self._mujoco_py.MjViewer(self.sim) + + elif mode in { + "rgb_array", + "rgb_array_list", + }: + self.viewer = self._mujoco_py.MjRenderContextOffscreen(self.sim, -1) + self._viewer_setup() + self._viewers[mode] = self.viewer + return self.viewer + + @property + def dt(self): + return self.sim.model.opt.timestep * self.sim.nsubsteps + + def _mujoco_step(self, action): + self.sim.step() diff --git a/gym_robotics/utils/__init__.py b/gym_robotics/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/gym_robotics/envs/utils.py b/gym_robotics/utils/mujoco_py_utils.py similarity index 99% rename from gym_robotics/envs/utils.py rename to gym_robotics/utils/mujoco_py_utils.py index baa76720..2d7b8299 100644 --- a/gym_robotics/envs/utils.py +++ b/gym_robotics/utils/mujoco_py_utils.py @@ -1,5 +1,4 @@ import numpy as np - from gym import error try: diff --git a/gym_robotics/utils/mujoco_utils.py b/gym_robotics/utils/mujoco_utils.py new file mode 100644 index 00000000..92a809f9 --- /dev/null +++ b/gym_robotics/utils/mujoco_utils.py @@ -0,0 +1,458 @@ +from typing import Dict, Tuple, Union + +import numpy as np +from gym import error + +try: + import mujoco + from mujoco import MjData, MjModel, mjtObj +except ImportError as e: + raise error.DependencyNotInstalled(f"{e}. (HINT: you need to install mujoco") + +MJ_OBJ_TYPES = [ + "mjOBJ_BODY", + "mjOBJ_JOINT", + "mjOBJ_GEOM", + "mjOBJ_SITE", + "mjOBJ_CAMERA", + "mjOBJ_ACTUATOR", + "mjOBJ_SENSOR", +] + + +def robot_get_obs(model, data, joint_names): + """Returns all joint positions and velocities associated with + a robot. + """ + if data.qpos is not None and joint_names: + names = [n for n in joint_names if n.startswith("robot")] + return ( + np.squeeze(np.array([get_joint_qpos(model, data, name) for name in names])), + np.squeeze(np.array([get_joint_qvel(model, data, name) for name in names])), + ) + return np.zeros(0), np.zeros(0) + + +def ctrl_set_action(model, data, action): + """For torque actuators it copies the action into mujoco ctrl field. + For position actuators it sets the target relative to the current qpos. + """ + if model.nmocap > 0: + _, action = np.split(action, (model.nmocap * 7,)) + + if len(data.ctrl) > 0: + for i in range(action.shape[0]): + if model.actuator_biastype[i] == 0: + data.ctrl[i] = action[i] + else: + idx = model.jnt_qposadr[model.actuator_trnid[i, 0]] + data.ctrl[i] = data.qpos[idx] + action[i] + + +def mocap_set_action(model, data, action): + """The action controls the robot using mocaps. Specifically, bodies + on the robot (for example the gripper wrist) is controlled with + mocap bodies. In this case the action is the desired difference + in position and orientation (quaternion), in world coordinates, + of the target body. The mocap is positioned relative to + the target body according to the delta, and the MuJoCo equality + constraint optimizer tries to center the welded body on the mocap. + """ + if model.nmocap > 0: + action, _ = np.split(action, (model.nmocap * 7,)) + action = action.reshape(model.nmocap, 7) + + pos_delta = action[:, :3] + quat_delta = action[:, 3:] + + reset_mocap2body_xpos(model, data) + data.mocap_pos[:] = data.mocap_pos + pos_delta + data.mocap_quat[:] = data.mocap_quat + quat_delta + + +def reset_mocap_welds(model, data): + """Resets the mocap welds that we use for actuation.""" + if model.nmocap > 0 and model.eq_data is not None: + for i in range(model.eq_data.shape[0]): + if model.eq_type[i] == mujoco.mjtEq.mjEQ_WELD: + model.eq_data[i, :7] = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]) + mujoco.mj_forward(model, data) + + +def reset_mocap2body_xpos(model, data): + """Resets the position and orientation of the mocap bodies to the same + values as the bodies they're welded to. + """ + + if model.eq_type is None or model.eq_obj1id is None or model.eq_obj2id is None: + return + for eq_type, obj1_id, obj2_id in zip( + model.eq_type, model.eq_obj1id, model.eq_obj2id + ): + if eq_type != mujoco.mjtEq.mjEQ_WELD: + continue + + mocap_id = model.body_mocapid[obj1_id] + if mocap_id != -1: + # obj1 is the mocap, obj2 is the welded body + body_idx = obj2_id + else: + # obj2 is the mocap, obj1 is the welded body + mocap_id = model.body_mocapid[obj2_id] + body_idx = obj1_id + + assert mocap_id != -1 + data.mocap_pos[mocap_id][:] = data.xpos[body_idx] + data.mocap_quat[mocap_id][:] = data.xquat[body_idx] + + +def get_site_jacp(model, data, site_id): + """Return the Jacobian' translational component of the end-effector of + the corresponding site id. + """ + jacp = np.zeros((3, model.nv)) + mujoco.mj_jacSite(model, data, jacp, None, site_id) + + return jacp + + +def get_site_jacr(model, data, site_id): + """Return the Jacobian' rotational component of the end-effector of + the corresponding site id. + """ + jacr = np.zeros((3, model.nv)) + mujoco.mj_jacSite(model, data, None, jacr, site_id) + + return jacr + + +def set_joint_qpos(model, data, name, value): + """Set the joint positions (qpos) of the model.""" + joint_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_JOINT, name) + joint_type = model.jnt_type[joint_id] + joint_addr = model.jnt_qposadr[joint_id] + + if joint_type == mujoco.mjtJoint.mjJNT_FREE: + ndim = 7 + elif joint_type == mujoco.mjtJoint.mjJNT_BALL: + ndim = 4 + else: + assert joint_type in (mujoco.mjtJoint.mjJNT_HINGE, mujoco.mjtJoint.mjJNT_SLIDE) + ndim = 1 + + start_idx = joint_addr + end_idx = joint_addr + ndim + value = np.array(value) + if ndim > 1: + assert value.shape == ( + end_idx - start_idx + ), f"Value has incorrect shape {name}: {value}" + data.qpos[start_idx:end_idx] = value + + +def set_joint_qvel(model, data, name, value): + """Set the joints linear and angular (qvel) of the model.""" + joint_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_JOINT, name) + joint_type = model.jnt_type[joint_id] + joint_addr = model.jnt_dofadr[joint_id] + + if joint_type == mujoco.mjtJoint.mjJNT_FREE: + ndim = 6 + elif joint_type == mujoco.mjtJoint.mjJNT_BALL: + ndim = 3 + else: + assert joint_type in (mujoco.mjtJoint.mjJNT_HINGE, mujoco.mjtJoint.mjJNT_SLIDE) + ndim = 1 + + start_idx = joint_addr + end_idx = joint_addr + ndim + value = np.array(value) + if ndim > 1: + assert value.shape == ( + end_idx - start_idx + ), f"Value has incorrect shape {name}: {value}" + data.qvel[start_idx:end_idx] = value + + +def get_joint_qpos(model, data, name): + """Return the joints position and orientation (qpos) of the model.""" + joint_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_JOINT, name) + joint_type = model.jnt_type[joint_id] + joint_addr = model.jnt_qposadr[joint_id] + + if joint_type == mujoco.mjtJoint.mjJNT_FREE: + ndim = 7 + elif joint_type == mujoco.mjtJoint.mjJNT_BALL: + ndim = 4 + else: + assert joint_type in (mujoco.mjtJoint.mjJNT_HINGE, mujoco.mjtJoint.mjJNT_SLIDE) + ndim = 1 + + start_idx = joint_addr + end_idx = joint_addr + ndim + + return data.qpos[start_idx:end_idx] + + +def get_joint_qvel(model, data, name): + """Return the joints linear and angular velocities (qvel) of the model.""" + joint_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_JOINT, name) + joint_type = model.jnt_type[joint_id] + joint_addr = model.jnt_qposadr[joint_id] + + if joint_type == mujoco.mjtJoint.mjJNT_FREE: + ndim = 7 + elif joint_type == mujoco.mjtJoint.mjJNT_BALL: + ndim = 4 + else: + assert joint_type in (mujoco.mjtJoint.mjJNT_HINGE, mujoco.mjtJoint.mjJNT_SLIDE) + ndim = 1 + + start_idx = joint_addr + end_idx = joint_addr + ndim + + return data.qvel[start_idx:end_idx] + + +def get_site_xpos(model, data, name): + site_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_SITE, name) + return data.site_xpos[site_id] + + +def get_site_xvelp(model, data, name): + site_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_SITE, name) + jacp = get_site_jacp(model, data, site_id) + xvelp = jacp @ data.qvel + return xvelp + + +def get_site_xvelr(model, data, name): + site_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_SITE, name) + jacp = get_site_jacr(model, data, site_id) + xvelp = jacp @ data.qvel + return xvelp + + +def set_mocap_pos(model, data, name, value): + body_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, name) + mocap_id = model.body_mocapid[body_id] + data.mocap_pos[mocap_id] = value + + +def set_mocap_quat(model: MjModel, data: MjData, name: str, value): + body_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, name) + mocap_id = model.body_mocapid[body_id] + data.mocap_quat[mocap_id] = value + + +def get_site_xmat(model: MjModel, data: MjData, name: str): + site_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_SITE, name) + return data.site_xmat[site_id].reshape(3, 3) + + +def extract_mj_names( + model: MjModel, obj_type: mjtObj +) -> Tuple[Union[Tuple[str, ...], Tuple[()]], Dict[str, int], Dict[int, str]]: + + if obj_type == mujoco.mjtObj.mjOBJ_BODY: + name_addr = model.name_bodyadr + n_obj = model.nbody + + elif obj_type == mujoco.mjtObj.mjOBJ_JOINT: + name_addr = model.name_jntadr + n_obj = model.njnt + + elif obj_type == mujoco.mjtObj.mjOBJ_GEOM: + name_addr = model.name_geomadr + n_obj = model.ngeom + + elif obj_type == mujoco.mjtObj.mjOBJ_SITE: + name_addr = model.name_siteadr + n_obj = model.nsite + + elif obj_type == mujoco.mjtObj.mjOBJ_LIGHT: + name_addr = model.name_lightadr + n_obj = model.nlight + + elif obj_type == mujoco.mjtObj.mjOBJ_CAMERA: + name_addr = model.name_camadr + n_obj = model.ncam + + elif obj_type == mujoco.mjtObj.mjOBJ_ACTUATOR: + name_addr = model.name_actuatoradr + n_obj = model.nu + + elif obj_type == mujoco.mjtObj.mjOBJ_SENSOR: + name_addr = model.name_sensoradr + n_obj = model.nsensor + + elif obj_type == mujoco.mjtObj.mjOBJ_TENDON: + name_addr = model.name_tendonadr + n_obj = model.ntendon + + elif obj_type == mujoco.mjtObj.mjOBJ_MESH: + name_addr = model.name_meshadr + n_obj = model.nmesh + else: + raise ValueError( + "`{}` was passed as the MuJoCo model object type. The MuJoCo model object type can only be of the following mjtObj enum types: {}.".format( + obj_type, MJ_OBJ_TYPES + ) + ) + + id2name = {i: None for i in range(n_obj)} + name2id = {} + for addr in name_addr: + name = model.names[addr:].split(b"\x00")[0].decode() + if name: + obj_id = mujoco.mj_name2id(model, obj_type, name) + assert 0 <= obj_id < n_obj and id2name[obj_id] is None + name2id[name] = obj_id + id2name[obj_id] = name + + return tuple(id2name[id] for id in sorted(name2id.values())), name2id, id2name + + +class MujocoModelNames: + """Access mjtObj object names and ids of the current MuJoCo model. + + This class supports access to the names and ids of the following mjObj types: + mjOBJ_BODY + mjOBJ_JOINT + mjOBJ_GEOM + mjOBJ_SITE + mjOBJ_CAMERA + mjOBJ_ACTUATOR + mjOBJ_SENSOR + + The properties provided for each ``mjObj`` are: + ``mjObj``_names: list of the mjObj names in the model of type mjOBJ_FOO. + ``mjObj``_name2id: dictionary with name of the mjObj as keys and id of the mjObj as values. + ``mjObj``_id2name: dictionary with id of the mjObj as keys and name of the mjObj as values. + """ + + def __init__(self, model: MjModel): + """Access mjtObj object names and ids of the current MuJoCo model. + + Args: + model: mjModel of the MuJoCo environment. + """ + ( + self._body_names, + self._body_name2id, + self._body_id2name, + ) = extract_mj_names(model, mujoco.mjtObj.mjOBJ_BODY) + ( + self._joint_names, + self._joint_name2id, + self._joint_id2name, + ) = extract_mj_names(model, mujoco.mjtObj.mjOBJ_JOINT) + ( + self._geom_names, + self._geom_name2id, + self._geom_id2name, + ) = extract_mj_names(model, mujoco.mjtObj.mjOBJ_GEOM) + ( + self._site_names, + self._site_name2id, + self._site_id2name, + ) = extract_mj_names(model, mujoco.mjtObj.mjOBJ_SITE) + ( + self._camera_names, + self._camera_name2id, + self._camera_id2name, + ) = extract_mj_names(model, mujoco.mjtObj.mjOBJ_CAMERA) + ( + self._actuator_names, + self._actuator_name2id, + self._actuator_id2name, + ) = extract_mj_names(model, mujoco.mjtObj.mjOBJ_ACTUATOR) + ( + self._sensor_names, + self._sensor_name2id, + self._sensor_id2name, + ) = extract_mj_names(model, mujoco.mjtObj.mjOBJ_SENSOR) + + @property + def body_names(self): + return self._body_names + + @property + def body_name2id(self): + return self._body_name2id + + @property + def body_id2name(self): + return self._body_id2name + + @property + def joint_names(self): + return self._joint_names + + @property + def joint_name2id(self): + return self._joint_name2id + + @property + def joint_id2name(self): + return self._joint_id2name + + @property + def geom_names(self): + return self._geom_names + + @property + def geom_name2id(self): + return self._geom_name2id + + @property + def geom_id2name(self): + return self._geom_id2name + + @property + def site_names(self): + return self._site_names + + @property + def site_name2id(self): + return self._site_name2id + + @property + def site_id2name(self): + return self._site_id2name + + @property + def camera_names(self): + return self._camera_names + + @property + def camera_name2id(self): + return self._camera_name2id + + @property + def camera_id2name(self): + return self._camera_id2name + + @property + def actuator_names(self): + return self._actuator_names + + @property + def actuator_name2id(self): + return self._actuator_name2id + + @property + def actuator_id2name(self): + return self._actuator_id2name + + @property + def sensor_names(self): + return self._sensor_names + + @property + def sensor_name2id(self): + return self._sensor_name2id + + @property + def sensor_id2name(self): + return self._sensor_id2name diff --git a/gym_robotics/envs/rotations.py b/gym_robotics/utils/rotations.py similarity index 99% rename from gym_robotics/envs/rotations.py rename to gym_robotics/utils/rotations.py index 0d4ce82d..203b1532 100644 --- a/gym_robotics/envs/rotations.py +++ b/gym_robotics/utils/rotations.py @@ -28,9 +28,10 @@ # https://github.com/matthew-brett/transforms3d # They have mostly been modified to support batched operations. -import numpy as np import itertools +import numpy as np + """ Rotations ========= diff --git a/py.Dockerfile b/py.Dockerfile index 19d90eaa..f37a6a01 100644 --- a/py.Dockerfile +++ b/py.Dockerfile @@ -1,21 +1,21 @@ # A Dockerfile that sets up a full gym-robotics install with test dependencies ARG PYTHON_VERSION FROM python:$PYTHON_VERSION + +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + RUN apt-get -y update && apt-get install -y unzip libglu1-mesa-dev libgl1-mesa-dev libosmesa6-dev xvfb patchelf ffmpeg cmake swig # Download mujoco -RUN mkdir /root/.mujoco && \ - cd /root/.mujoco && \ - curl -O https://www.roboti.us/download/mjpro150_linux.zip && \ - unzip mjpro150_linux.zip && \ - curl -o /root/.mujoco/mjkey.txt https://roboti.us/file/mjkey.txt +RUN mkdir /root/.mujoco \ + && cd /root/.mujoco \ + && wget -qO- 'https://github.com/deepmind/mujoco/releases/download/2.1.0/mujoco210-linux-x86_64.tar.gz' | tar -xzvf - -ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/root/.mujoco/mjpro150/bin +ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/root/.mujoco/mujoco210/bin" -COPY . /usr/local/gym/ -WORKDIR /usr/local/gym/ +COPY . /usr/local/gym-robotics/ +WORKDIR /usr/local/gym-robotics/ -RUN pip install "mujoco_py>=1.50, <2.0" -RUN pip install . && pip install -r test_requirements.txt +RUN pip install .[testing] --no-cache-dir -ENTRYPOINT ["/usr/local/gym/bin/docker_entrypoint"] +ENTRYPOINT ["/usr/local/gym-robotics/bin/docker_entrypoint"] diff --git a/pyproject.toml b/pyproject.toml index 41a5e491..a090a32d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,12 +1,33 @@ [tool.pyright] +include = [ + "gym_robotics/**", + "tests/**" +] -include = ["gym/version.py", "gym/logger.py"] exclude = [ "**/node_modules", "**/__pycache__", + "**/_version.py" +] + + +strict = [ + ] -strict = ["gym/version.py", "gym/logger.py"] -reportMissingImports = true -reportMissingTypeStubs = false -verboseOutput = true +# This is required as the CI pre-commit does not download the module (i.e. numpy) +# Therefore, we have to ignore missing imports +reportMissingImports = "none" +typeCheckingMode = "basic" +pythonVersion = "3.7" +typeshedPath = "typeshed" +enableTypeIgnoreComments = true + + +reportGeneralTypeIssues = "none" # -> commented out raises 489 errors +# reportUntypedFunctionDecorator = "none" # -> pytest.mark.parameterize issues +reportOptionalMemberAccess = "none" # -> raise _version.py error + +[build-system] +requires = ["setuptools", "versioneer==0.24"] +build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index a7094ce6..e8292c47 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ cloudpickle>=1.2.0 -mujoco_py>=1.50, <2.0 +mujoco_py<2.2,>=2.1 numpy>=1.18.0 gym>=0.22 diff --git a/setup.py b/setup.py index bc5c1311..fb58e886 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,3 @@ -import os.path -import sys - from setuptools import find_packages, setup import versioneer @@ -16,11 +13,21 @@ else: break +# pytest is pinned to 7.0.1 as this is last version for python 3.6 +extras = { + "testing": [ + "pytest==7.0.1", + "mujoco_py<2.2,>=2.1", + ], + "mujoco_py": ["mujoco_py<2.2,>=2.1"], +} + setup( name="gym-robotics", version=versioneer.get_version(), cmdclass=versioneer.get_cmdclass(), description="Legacy robotics environments from Gym repo", + extras_require=extras, long_description=long_description, long_description_content_type="text/markdown", url="https://github.com/Farama-Foundation/gym-robotics", @@ -32,10 +39,9 @@ ], zip_safe=False, install_requires=[ + "mujoco==2.2.2", "numpy>=1.18.0", - "cloudpickle>=1.2.0", - "importlib_metadata>=4.8.1; python_version < '3.8'", - "gym>=0.22", + "gym>=0.26", ], package_data={ "gym_robotics": [ @@ -47,8 +53,7 @@ "envs/assets/textures/*.png", ] }, - entry_points={"gym.envs": ["__root__=gym_robotics:register_robotics_envs"]}, - tests_require=["pytest", "mock"], + entry_points={"gym.envs": ["__root__ = gym_robotics:register_robotics_envs"]}, python_requires=">=3.7", classifiers=[ "Programming Language :: Python :: 3", @@ -57,4 +62,5 @@ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", ], + tests_require=extras["testing"], ) diff --git a/test_requirements.txt b/test_requirements.txt index 6f0ee06a..e83a1724 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -1,2 +1,3 @@ -pytest~=6.2 -pytest-forked~=1.3 +mujoco==2.2.0 +mujoco_py<2.2,>=2.1 +pytest==7.0.1 \ No newline at end of file diff --git a/tests/envs/hand/test_manipulate.py b/tests/envs/hand/test_manipulate.py index 3aa19ce3..c8114d22 100644 --- a/tests/envs/hand/test_manipulate.py +++ b/tests/envs/hand/test_manipulate.py @@ -1,11 +1,8 @@ import pickle +import gym import pytest -from gym import envs -import gym_robotics - - ENVIRONMENT_IDS = ( "HandManipulateEgg-v0", "HandManipulatePen-v0", @@ -15,7 +12,7 @@ @pytest.mark.parametrize("environment_id", ENVIRONMENT_IDS) def test_serialize_deserialize(environment_id): - env1 = envs.make(environment_id, target_position="fixed") + env1 = gym.make(environment_id, target_position="fixed") env1.reset() env2 = pickle.loads(pickle.dumps(env1)) diff --git a/tests/envs/hand/test_manipulate_touch_sensors.py b/tests/envs/hand/test_manipulate_touch_sensors.py index dd2329eb..f48940aa 100644 --- a/tests/envs/hand/test_manipulate_touch_sensors.py +++ b/tests/envs/hand/test_manipulate_touch_sensors.py @@ -1,21 +1,18 @@ import pickle +import gym import pytest -from gym import envs -import gym_robotics - - ENVIRONMENT_IDS = ( - "HandManipulateEggTouchSensors-v1", - "HandManipulatePenTouchSensors-v0", - "HandManipulateBlockTouchSensors-v0", + "HandManipulateEgg_ContinuousTouchSensors-v1", + "HandManipulatePen_BooleanTouchSensors-v1", + "HandManipulateBlock_BooleanTouchSensors-v1", ) @pytest.mark.parametrize("environment_id", ENVIRONMENT_IDS) def test_serialize_deserialize(environment_id): - env1 = envs.make(environment_id, target_position="fixed") + env1 = gym.make(environment_id, target_position="fixed") env1.reset() env2 = pickle.loads(pickle.dumps(env1)) diff --git a/tests/envs/hand/test_reach.py b/tests/envs/hand/test_reach.py index 1cb4c2af..a759ed3f 100644 --- a/tests/envs/hand/test_reach.py +++ b/tests/envs/hand/test_reach.py @@ -1,13 +1,10 @@ import pickle -import pytest - -from gym import envs -import gym_robotics +import gym def test_serialize_deserialize(): - env1 = envs.make("HandReach-v0", distance_threshold=1e-6) + env1 = gym.make("HandReach-v0", distance_threshold=1e-6) env1.reset() env2 = pickle.loads(pickle.dumps(env1)) diff --git a/tests/test_envs.py b/tests/test_envs.py new file mode 100644 index 00000000..6ccaf941 --- /dev/null +++ b/tests/test_envs.py @@ -0,0 +1,119 @@ +import warnings + +import pytest +from gym.envs.registration import EnvSpec +from gym.error import Error +from gym.utils.env_checker import check_env + +from tests.utils import all_testing_env_specs, assert_equals + +CHECK_ENV_IGNORE_WARNINGS = [ + f"\x1b[33mWARN: {message}\x1b[0m" + for message in [ + "This version of the mujoco environments depends on the mujoco-py bindings, which are no longer maintained and may stop working. Please upgrade to the v4 versions of the environments (which depend on the mujoco python bindings instead), unless you are trying to precisely replicate previous works).", + "A Box observation space minimum value is -infinity. This is probably too low.", + "A Box observation space maximum value is -infinity. This is probably too high.", + "For Box action spaces, we recommend using a symmetric and normalized space (range=[-1, 1] or [0, 1]). See https://stable-baselines3.readthedocs.io/en/master/guide/rl_tips.html for more information.", + ] +] + +# Exclude mujoco_py environments in test_render_modes test due to OpenGL error. +non_mujoco_py_env_specs = [ + spec for spec in all_testing_env_specs if "MujocoPy" not in spec.entry_point +] + + +@pytest.mark.parametrize( + "spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs] +) +def test_env(spec): + # Capture warnings + env = spec.make(disable_env_checker=True).unwrapped + warnings.simplefilter("always") + # Test if env adheres to Gym API + with warnings.catch_warnings(record=True) as w: + check_env(env) + for warning in w: + if warning.message.args[0] not in CHECK_ENV_IGNORE_WARNINGS: + raise Error(f"Unexpected warning: {warning.message}") + + +# Note that this precludes running this test in multiple threads. +# However, we probably already can't do multithreading due to some environments. +SEED = 0 +NUM_STEPS = 50 + + +@pytest.mark.parametrize( + "env_spec", all_testing_env_specs, ids=[env.id for env in all_testing_env_specs] +) +def test_env_determinism_rollout(env_spec: EnvSpec): + """Run a rollout with two environments and assert equality. + + This test run a rollout of NUM_STEPS steps with two environments + initialized with the same seed and assert that: + + - observation after first reset are the same + - same actions are sampled by the two envs + - observations are contained in the observation space + - obs, rew, terminated, truncated and info are equals between the two envs + """ + # Don't check rollout equality if it's a nondeterministic environment. + if env_spec.nondeterministic is True: + return + + env_1 = env_spec.make(disable_env_checker=True) + env_2 = env_spec.make(disable_env_checker=True) + + initial_obs_1 = env_1.reset(seed=SEED) + initial_obs_2 = env_2.reset(seed=SEED) + assert_equals(initial_obs_1, initial_obs_2) + + env_1.action_space.seed(SEED) + + for time_step in range(NUM_STEPS): + # We don't evaluate the determinism of actions + action = env_1.action_space.sample() + + obs_1, rew_1, terminated_1, truncated_1, info_1 = env_1.step(action) + obs_2, rew_2, terminated_2, truncated_2, info_2 = env_2.step(action) + + assert_equals(obs_1, obs_2, f"[{time_step}] ") + assert env_1.observation_space.contains( + obs_1 + ) # obs_2 verified by previous assertion + + assert rew_1 == rew_2, f"[{time_step}] reward 1={rew_1}, reward 2={rew_2}" + assert ( + terminated_1 == terminated_2 + ), f"[{time_step}] terminated 1={terminated_1}, terminated 2={terminated_2}" + assert ( + truncated_1 == truncated_2 + ), f"[{time_step}] truncated 1={truncated_1}, truncated 2={truncated_2}" + assert_equals(info_1, info_2, f"[{time_step}] ") + + if ( + terminated_1 or truncated_1 + ): # terminated_2 and truncated_2 verified by previous assertion + env_1.reset(seed=SEED) + env_2.reset(seed=SEED) + + env_1.close() + env_2.close() + + +@pytest.mark.parametrize( + "spec", non_mujoco_py_env_specs, ids=[spec.id for spec in non_mujoco_py_env_specs] +) +def test_render_modes(spec): + env = spec.make() + + for mode in env.metadata.get("render_modes", []): + if mode != "human": + new_env = spec.make(render_mode=mode) + + new_env.reset() + new_env.step(new_env.action_space.sample()) + new_env.render() + + new_env.close diff --git a/tests/utils.py b/tests/utils.py new file mode 100644 index 00000000..70af23bf --- /dev/null +++ b/tests/utils.py @@ -0,0 +1,34 @@ +"""Finds all the specs that we can test with""" +import gym +import numpy as np + +all_testing_env_specs = [ + env_spec + for env_spec in gym.envs.registry.values() + if env_spec.entry_point.startswith("gym_robotics.envs") +] + + +def assert_equals(a, b, prefix=None): + """Assert equality of data structures `a` and `b`. + + Args: + a: first data structure + b: second data structure + prefix: prefix for failed assertion message for types and dicts + """ + assert type(a) == type(b), f"{prefix}Differing types: {a} and {b}" + if isinstance(a, dict): + assert list(a.keys()) == list(b.keys()), f"{prefix}Key sets differ: {a} and {b}" + + for k in a.keys(): + v_a = a[k] + v_b = b[k] + assert_equals(v_a, v_b) + elif isinstance(a, np.ndarray): + np.testing.assert_array_equal(a, b) + elif isinstance(a, tuple): + for elem_from_a, elem_from_b in zip(a, b): + assert_equals(elem_from_a, elem_from_b) + else: + assert a == b diff --git a/versioneer.py b/versioneer.py index d70f31b1..8e4d4f57 100644 --- a/versioneer.py +++ b/versioneer.py @@ -1,4 +1,4 @@ -# Version: 0.21 +# Version: 0.26 """The Versioneer - like a rocketeer, but for versions. @@ -8,12 +8,12 @@ * like a rocketeer, but for versions! * https://github.com/python-versioneer/python-versioneer * Brian Warner -* License: Public Domain -* Compatible with: Python 3.6, 3.7, 3.8, 3.9 and pypy3 +* License: Public Domain (Unlicense) +* Compatible with: Python 3.7, 3.8, 3.9, 3.10 and pypy3 * [![Latest Version][pypi-image]][pypi-url] * [![Build Status][travis-image]][travis-url] -This is a tool for managing a recorded version number in distutils-based +This is a tool for managing a recorded version number in setuptools-based python projects. The goal is to remove the tedious and error-prone "update the embedded version string" step from your release process. Making a new release should be as easy as recording a new tag in your version-control @@ -22,10 +22,32 @@ ## Quick Install +Versioneer provides two installation modes. The "classic" vendored mode installs +a copy of versioneer into your repository. The experimental build-time dependency mode +is intended to allow you to skip this step and simplify the process of upgrading. + +### Vendored mode + +* `pip install versioneer` to somewhere in your $PATH +* add a `[tool.versioneer]` section to your `pyproject.toml or a + `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md)) +* run `versioneer install --vendor` in your source tree, commit the results +* verify version information with `python setup.py version` + +### Build-time dependency mode + * `pip install versioneer` to somewhere in your $PATH -* add a `[versioneer]` section to your setup.cfg (see [Install](INSTALL.md)) -* run `versioneer install` in your source tree, commit the results -* Verify version information with `python setup.py version` +* add a `[tool.versioneer]` section to your `pyproject.toml or a + `[versioneer]` section to your `setup.cfg` (see [Install](INSTALL.md)) +* add `versioneer` to the `requires` key of the `build-system` table in + `pyproject.toml`: + ```toml + [build-system] + requires = ["setuptools", "versioneer"] + build-backend = "setuptools.build_meta" + ``` +* run `versioneer install --no-vendor` in your source tree, commit the results +* verify version information with `python setup.py version` ## Version Identifiers @@ -230,9 +252,10 @@ To upgrade your project to a new release of Versioneer, do the following: * install the new Versioneer (`pip install -U versioneer` or equivalent) -* edit `setup.cfg`, if necessary, to include any new configuration settings - indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details. -* re-run `versioneer install` in your source tree, to replace +* edit `setup.cfg` and `pyproject.toml`, if necessary, + to include any new configuration settings indicated by the release notes. + See [UPGRADING](./UPGRADING.md) for details. +* re-run `versioneer install --[no-]vendor` in your source tree, to replace `SRC/_version.py` * commit any changed files @@ -281,13 +304,22 @@ import configparser import errno +import functools import json import os import re import subprocess import sys +from pathlib import Path from typing import Callable, Dict +try: + import tomli + + have_tomli = True +except ImportError: + have_tomli = False + class VersioneerConfig: """Container for Versioneer configuration parameters.""" @@ -326,7 +358,7 @@ def get_root(): my_path = os.path.realpath(os.path.abspath(__file__)) me_dir = os.path.normcase(os.path.splitext(my_path)[0]) vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) - if me_dir != vsr_dir: + if me_dir != vsr_dir and "VERSIONEER_PEP518" not in globals(): print( "Warning: build in %s is using versioneer.py from %s" % (os.path.dirname(my_path), versioneer_py) @@ -342,22 +374,32 @@ def get_config_from_root(root): # configparser.NoSectionError (if it lacks a [versioneer] section), or # configparser.NoOptionError (if it lacks "VCS="). See the docstring at # the top of versioneer.py for instructions on writing your setup.cfg . - setup_cfg = os.path.join(root, "setup.cfg") - parser = configparser.ConfigParser() - with open(setup_cfg, "r") as cfg_file: - parser.read_file(cfg_file) - VCS = parser.get("versioneer", "VCS") # mandatory + root = Path(root) + pyproject_toml = root / "pyproject.toml" + setup_cfg = root / "setup.cfg" + section = None + if pyproject_toml.exists() and have_tomli: + try: + with open(pyproject_toml, "rb") as fobj: + pp = tomli.load(fobj) + section = pp["tool"]["versioneer"] + except (tomli.TOMLDecodeError, KeyError): + pass + if not section: + parser = configparser.ConfigParser() + with open(setup_cfg) as cfg_file: + parser.read_file(cfg_file) + parser.get("versioneer", "VCS") # raise error if missing - # Dict-like interface for non-mandatory entries - section = parser["versioneer"] + section = parser["versioneer"] cfg = VersioneerConfig() - cfg.VCS = VCS + cfg.VCS = section["VCS"] cfg.style = section.get("style", "") cfg.versionfile_source = section.get("versionfile_source") cfg.versionfile_build = section.get("versionfile_build") cfg.tag_prefix = section.get("tag_prefix") - if cfg.tag_prefix in ("''", '""'): + if cfg.tag_prefix in ("''", '""', None): cfg.tag_prefix = "" cfg.parentdir_prefix = section.get("parentdir_prefix") cfg.verbose = section.get("verbose") @@ -388,6 +430,14 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env= """Call the given command(s).""" assert isinstance(commands, list) process = None + + popen_kwargs = {} + if sys.platform == "win32": + # This hides the console window if pythonw.exe is used + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + popen_kwargs["startupinfo"] = startupinfo + for command in commands: try: dispcmd = str([command] + args) @@ -398,6 +448,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env= env=env, stdout=subprocess.PIPE, stderr=(subprocess.PIPE if hide_stderr else None), + **popen_kwargs, ) break except OSError: @@ -410,7 +461,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env= return None, None else: if verbose: - print("unable to find command, tried %s" % (commands,)) + print(f"unable to find command, tried {commands}") return None, None stdout = process.communicate()[0].strip().decode() if process.returncode != 0: @@ -430,8 +481,9 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env= # directories (produced by setup.py build) will contain a much shorter file # that just contains the computed version number. -# This file is released into the public domain. Generated by -# versioneer-0.21 (https://github.com/python-versioneer/python-versioneer) +# This file is released into the public domain. +# Generated by versioneer-0.26 +# https://github.com/python-versioneer/python-versioneer """Git implementation of _version.py.""" @@ -441,6 +493,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env= import subprocess import sys from typing import Callable, Dict +import functools def get_keywords(): @@ -498,6 +551,14 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, """Call the given command(s).""" assert isinstance(commands, list) process = None + + popen_kwargs = {} + if sys.platform == "win32": + # This hides the console window if pythonw.exe is used + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + popen_kwargs["startupinfo"] = startupinfo + for command in commands: try: dispcmd = str([command] + args) @@ -505,7 +566,7 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, process = subprocess.Popen([command] + args, cwd=cwd, env=env, stdout=subprocess.PIPE, stderr=(subprocess.PIPE if hide_stderr - else None)) + else None), **popen_kwargs) break except OSError: e = sys.exc_info()[1] @@ -653,13 +714,18 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): version string, meaning we're inside a checked out source tree. """ GITS = ["git"] - TAG_PREFIX_REGEX = "*" if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] - TAG_PREFIX_REGEX = r"\*" + + # GIT_DIR can interfere with correct operation of Versioneer. + # It may be intended to be passed to the Versioneer-versioned project, + # but that should not change where we get our version from. + env = os.environ.copy() + env.pop("GIT_DIR", None) + runner = functools.partial(runner, env=env) _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) + hide_stderr=not verbose) if rc != 0: if verbose: print("Directory %%s not under git control" %% root) @@ -667,11 +733,10 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty", - "--always", "--long", - "--match", - "%%s%%s" %% (tag_prefix, TAG_PREFIX_REGEX)], - cwd=root) + describe_out, rc = runner(GITS, [ + "describe", "--tags", "--dirty", "--always", "--long", + "--match", f"{tag_prefix}[[:digit:]]*" + ], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") @@ -760,8 +825,8 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): else: # HEX: no tags pieces["closest-tag"] = None - count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root) - pieces["distance"] = int(count_out) # total number of commits + out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) + pieces["distance"] = len(out.split()) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() date = runner(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip() @@ -857,7 +922,7 @@ def render_pep440_pre(pieces): tag_version, post_version = pep440_split_post(pieces["closest-tag"]) rendered = tag_version if post_version is not None: - rendered += ".post%%d.dev%%d" %% (post_version+1, pieces["distance"]) + rendered += ".post%%d.dev%%d" %% (post_version + 1, pieces["distance"]) else: rendered += ".post0.dev%%d" %% (pieces["distance"]) else: @@ -1079,7 +1144,7 @@ def git_get_keywords(versionfile_abs): # _version.py. keywords = {} try: - with open(versionfile_abs, "r") as fobj: + with open(versionfile_abs) as fobj: for line in fobj: if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) @@ -1178,12 +1243,17 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): version string, meaning we're inside a checked out source tree. """ GITS = ["git"] - TAG_PREFIX_REGEX = "*" if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] - TAG_PREFIX_REGEX = r"\*" - _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) + # GIT_DIR can interfere with correct operation of Versioneer. + # It may be intended to be passed to the Versioneer-versioned project, + # but that should not change where we get our version from. + env = os.environ.copy() + env.pop("GIT_DIR", None) + runner = functools.partial(runner, env=env) + + _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=not verbose) if rc != 0: if verbose: print("Directory %s not under git control" % root) @@ -1200,7 +1270,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): "--always", "--long", "--match", - "%s%s" % (tag_prefix, TAG_PREFIX_REGEX), + f"{tag_prefix}[[:digit:]]*", ], cwd=root, ) @@ -1276,7 +1346,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( + pieces["error"] = "tag '{}' doesn't start with prefix '{}'".format( full_tag, tag_prefix, ) @@ -1292,8 +1362,8 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): else: # HEX: no tags pieces["closest-tag"] = None - count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root) - pieces["distance"] = int(count_out) # total number of commits + out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) + pieces["distance"] = len(out.split()) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() @@ -1305,7 +1375,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): return pieces -def do_vcs_install(manifest_in, versionfile_source, ipy): +def do_vcs_install(versionfile_source, ipy): """Git-specific installation logic for Versioneer. For Git, this means creating/changing .gitattributes to mark _version.py @@ -1314,20 +1384,21 @@ def do_vcs_install(manifest_in, versionfile_source, ipy): GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] - files = [manifest_in, versionfile_source] + files = [versionfile_source] if ipy: files.append(ipy) - try: - my_path = __file__ - if my_path.endswith(".pyc") or my_path.endswith(".pyo"): - my_path = os.path.splitext(my_path)[0] + ".py" - versioneer_file = os.path.relpath(my_path) - except NameError: - versioneer_file = "versioneer.py" - files.append(versioneer_file) + if "VERSIONEER_PEP518" not in globals(): + try: + my_path = __file__ + if my_path.endswith(".pyc") or my_path.endswith(".pyo"): + my_path = os.path.splitext(my_path)[0] + ".py" + versioneer_file = os.path.relpath(my_path) + except NameError: + versioneer_file = "versioneer.py" + files.append(versioneer_file) present = False try: - with open(".gitattributes", "r") as fobj: + with open(".gitattributes") as fobj: for line in fobj: if line.strip().startswith(versionfile_source): if "export-subst" in line.strip().split()[1:]: @@ -1373,7 +1444,7 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): SHORT_VERSION_PY = """ -# This file was generated by 'versioneer.py' (0.21) from +# This file was generated by 'versioneer.py' (0.26) from # revision-control system data, or from the parent directory name of an # unpacked source archive. Distribution tarballs contain a pre-generated copy # of this file. @@ -1416,7 +1487,7 @@ def write_to_version_file(filename, versions): with open(filename, "w") as f: f.write(SHORT_VERSION_PY % contents) - print("set %s to '%s'" % (filename, versions["version"])) + print("set {} to '{}'".format(filename, versions["version"])) def plus_or_dot(pieces): @@ -1721,7 +1792,7 @@ def get_versions(verbose=False): try: ver = versions_from_file(versionfile_abs) if verbose: - print("got version from file %s %s" % (versionfile_abs, ver)) + print(f"got version from file {versionfile_abs} {ver}") return ver except NotThisMethod: pass @@ -1764,7 +1835,7 @@ def get_version(): def get_cmdclass(cmdclass=None): - """Get the custom setuptools/distutils subclasses used by Versioneer. + """Get the custom setuptools subclasses used by Versioneer. If the package uses a different cmdclass (e.g. one from numpy), it should be provide as an argument. @@ -1786,8 +1857,8 @@ def get_cmdclass(cmdclass=None): cmds = {} if cmdclass is None else cmdclass.copy() - # we add "version" to both distutils and setuptools - from distutils.core import Command + # we add "version" to setuptools + from setuptools import Command class cmd_version(Command): description = "report generated version string" @@ -1811,7 +1882,7 @@ def run(self): cmds["version"] = cmd_version - # we override "build_py" in both distutils and setuptools + # we override "build_py" in setuptools # # most invocation pathways end up running build_py: # distutils/build -> build_py @@ -1826,13 +1897,14 @@ def run(self): # then does setup.py bdist_wheel, or sometimes setup.py install # setup.py egg_info -> ? + # pip install -e . and setuptool/editable_wheel will invoke build_py + # but the build_py command is not expected to copy any files. + # we override different "build_py" commands for both environments if "build_py" in cmds: _build_py = cmds["build_py"] - elif "setuptools" in sys.modules: - from setuptools.command.build_py import build_py as _build_py else: - from distutils.command.build_py import build_py as _build_py + from setuptools.command.build_py import build_py as _build_py class cmd_build_py(_build_py): def run(self): @@ -1840,6 +1912,10 @@ def run(self): cfg = get_config_from_root(root) versions = get_versions() _build_py.run(self) + if getattr(self, "editable_mode", False): + # During editable installs `.py` and data files are + # not copied to build_lib + return # now locate _version.py in the new build/ directory and replace # it with an updated value if cfg.versionfile_build: @@ -1851,10 +1927,8 @@ def run(self): if "build_ext" in cmds: _build_ext = cmds["build_ext"] - elif "setuptools" in sys.modules: - from setuptools.command.build_ext import build_ext as _build_ext else: - from distutils.command.build_ext import build_ext as _build_ext + from setuptools.command.build_ext import build_ext as _build_ext class cmd_build_ext(_build_ext): def run(self): @@ -1871,6 +1945,13 @@ def run(self): # now locate _version.py in the new build/ directory and replace # it with an updated value target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build) + if not os.path.exists(target_versionfile): + print( + f"Warning: {target_versionfile} does not exist, skipping " + "version update. This can happen if you are running build_ext " + "without first running build_py." + ) + return print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, versions) @@ -1914,7 +1995,10 @@ def run(self): del cmds["build_py"] if "py2exe" in sys.modules: # py2exe enabled? - from py2exe.distutils_buildexe import py2exe as _py2exe + try: + from py2exe.setuptools_buildexe import py2exe as _py2exe + except ImportError: + from py2exe.distutils_buildexe import py2exe as _py2exe class cmd_py2exe(_py2exe): def run(self): @@ -1942,13 +2026,51 @@ def run(self): cmds["py2exe"] = cmd_py2exe + # sdist farms its file list building out to egg_info + if "egg_info" in cmds: + _sdist = cmds["egg_info"] + else: + from setuptools.command.egg_info import egg_info as _egg_info + + class cmd_egg_info(_egg_info): + def find_sources(self): + # egg_info.find_sources builds the manifest list and writes it + # in one shot + super().find_sources() + + # Modify the filelist and normalize it + root = get_root() + cfg = get_config_from_root(root) + self.filelist.append("versioneer.py") + if cfg.versionfile_source: + # There are rare cases where versionfile_source might not be + # included by default, so we must be explicit + self.filelist.append(cfg.versionfile_source) + self.filelist.sort() + self.filelist.remove_duplicates() + + # The write method is hidden in the manifest_maker instance that + # generated the filelist and was thrown away + # We will instead replicate their final normalization (to unicode, + # and POSIX-style paths) + from setuptools import unicode_utils + + normalized = [ + unicode_utils.filesys_decode(f).replace(os.sep, "/") + for f in self.filelist.files + ] + + manifest_filename = os.path.join(self.egg_info, "SOURCES.txt") + with open(manifest_filename, "w") as fobj: + fobj.write("\n".join(normalized)) + + cmds["egg_info"] = cmd_egg_info + # we override different "sdist" commands for both environments if "sdist" in cmds: _sdist = cmds["sdist"] - elif "setuptools" in sys.modules: - from setuptools.command.sdist import sdist as _sdist else: - from distutils.command.sdist import sdist as _sdist + from setuptools.command.sdist import sdist as _sdist class cmd_sdist(_sdist): def run(self): @@ -2056,7 +2178,7 @@ def do_setup(): ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") if os.path.exists(ipy): try: - with open(ipy, "r") as f: + with open(ipy) as f: old = f.read() except OSError: old = "" @@ -2076,44 +2198,10 @@ def do_setup(): print(" %s doesn't exist, ok" % ipy) ipy = None - # Make sure both the top-level "versioneer.py" and versionfile_source - # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so - # they'll be copied into source distributions. Pip won't be able to - # install the package without this. - manifest_in = os.path.join(root, "MANIFEST.in") - simple_includes = set() - try: - with open(manifest_in, "r") as f: - for line in f: - if line.startswith("include "): - for include in line.split()[1:]: - simple_includes.add(include) - except OSError: - pass - # That doesn't cover everything MANIFEST.in can do - # (http://docs.python.org/2/distutils/sourcedist.html#commands), so - # it might give some false negatives. Appending redundant 'include' - # lines is safe, though. - if "versioneer.py" not in simple_includes: - print(" appending 'versioneer.py' to MANIFEST.in") - with open(manifest_in, "a") as f: - f.write("include versioneer.py\n") - else: - print(" 'versioneer.py' already in MANIFEST.in") - if cfg.versionfile_source not in simple_includes: - print( - " appending versionfile_source ('%s') to MANIFEST.in" - % cfg.versionfile_source - ) - with open(manifest_in, "a") as f: - f.write("include %s\n" % cfg.versionfile_source) - else: - print(" versionfile_source already in MANIFEST.in") - # Make VCS-specific changes. For git, this means creating/changing # .gitattributes to mark _version.py for export-subst keyword # substitution. - do_vcs_install(manifest_in, cfg.versionfile_source, ipy) + do_vcs_install(cfg.versionfile_source, ipy) return 0 @@ -2122,7 +2210,7 @@ def scan_setup_py(): found = set() setters = False errors = 0 - with open("setup.py", "r") as f: + with open("setup.py") as f: for line in f.readlines(): if "import versioneer" in line: found.add("import") @@ -2154,10 +2242,14 @@ def scan_setup_py(): return errors +def setup_command(): + """Set up Versioneer and exit with appropriate error code.""" + errors = do_setup() + errors += scan_setup_py() + sys.exit(1 if errors else 0) + + if __name__ == "__main__": cmd = sys.argv[1] if cmd == "setup": - errors = do_setup() - errors += scan_setup_py() - if errors: - sys.exit(1) + setup_command()