[BugFix] Make sure ParallelEnv does not overflow mem when policy requires grad #1909
Dr.CI classification results
{"FAILED":[],"FLAKY":[{"workflowId":7917745407,"id":21614537008,"runnerName":"i-0f290a2cba8ad7005","authorEmail":"[email protected]","name":"Habitat Tests on Linux / tests (3.9, 11.6) / linux-job","jobName":"tests (3.9, 11.6) / linux-job","conclusion":"failure","completed_at":"2024-02-15T15:01:56Z","html_url":"https://github.com/pytorch/rl/actions/runs/7917745407/job/21614537008","head_branch":"fix-penv","pr_number":1909,"head_sha":"a6dc5ee8db2660ad0690fd281dc96dff6cdb0616","failure_captures":["RuntimeError: Command docker exec -t af562d456fa1705ee7f6e6b1c583f0893c500b3556edecfb3bb66eec0fadc187 /exec failed with exit code 139"],"failure_lines":["RuntimeError: Command docker exec -t af562d456fa1705ee7f6e6b1c583f0893c500b3556edecfb3bb66eec0fadc187 /exec failed with exit code 139"],"failure_context":["+ bash /pytorch/rl/.github/unittest/linux_libs/scripts_habitat/run_test.sh","+ bash /pytorch/rl/.github/unittest/linux_libs/scripts_habitat/install.sh","+ bash /pytorch/rl/.github/unittest/linux_libs/scripts_habitat/setup_env.sh","+ cp /pytorch/rl/.github/unittest/linux_libs/scripts_habitat/10_nvidia.json /usr/share/glvnd/egl_vendor.d/10_nvidia.json","+ this_dir=/pytorch/rl/.github/unittest/linux_libs/scripts_habitat","++ pwd","++ cd .github/unittest/linux_libs/scripts_habitat","+++ dirname .github/unittest/linux_libs/scripts_habitat/run_all.sh","+ apt-get dist-upgrade -y","+ apt-get install -y g++ gcc","+ apt-get install -y libglvnd0 libgl1 libglx0 libegl1 libgles2","+ apt-get install -y libglfw3 libgl1-mesa-glx libosmesa6 libglew-dev"],"time":"2024-02-15T15:03:07.398331Z"},{"workflowId":7917745409,"id":21614540471,"runnerName":"i-017e8633b29fd5a54","authorEmail":"[email protected]","name":"Unit-tests on Linux / tests-gpu (3.8, 12.1) / linux-job","jobName":"tests-gpu (3.8, 12.1) / linux-job","conclusion":"failure","completed_at":"2024-02-15T15:35:15Z","html_url":"https://github.com/pytorch/rl/actions/runs/7917745409/job/21614540471","head_branch":"fix-penv","pr_number":1909,"head_sha":"a6dc5ee8db2660ad0690fd281dc96dff6cdb0616","failure_captures":["test/test_env.py::TestLibThreading::test_auto_num_threads"],"failure_lines":["FAILED test/test_env.py::TestLibThreading::test_auto_num_threads - assert 5 == 3"],"failure_context":["+ python .github/unittest/helpers/coverage_run_parallel.py -m pytest test --instafail --durations 200 -vv --capture no --ignore test/test_rlhf.py --timeout=120","+ '[' cu121 '!=' cpu ']'","+ pytest test/smoke_test_deps.py -v --durations 200 -k 'test_gym or test_dm_control_pixels or test_dm_control or test_tb'","+ pytest test/smoke_test.py -v --durations 200","+ BATCHED_PIPE_TIMEOUT=60","+ export BATCHED_PIPE_TIMEOUT=60","+ MAX_IDLE_COUNT=100","+ export MAX_IDLE_COUNT=100","+ CKPT_BACKEND=torch","+ export CKPT_BACKEND=torch","+ MKL_THREADING_LAYER=GNU","+ export MKL_THREADING_LAYER=GNU"],"time":"2024-02-15T15:35:19.005148Z"}],"BROKEN_TRUNK":[{"workflowId":7917745410,"id":21614535882,"runnerName":"i-015b936ccad84d33e","authorEmail":"[email protected]","name":"Unit-tests on Windows / unittests-cpu / windows-job","jobName":"unittests-cpu / windows-job","conclusion":"failure","completed_at":"2024-02-15T15:41:36Z","html_url":"https://github.com/pytorch/rl/actions/runs/7917745410/job/21614535882","head_branch":"fix-penv","pr_number":1909,"head_sha":"a6dc5ee8db2660ad0690fd281dc96dff6cdb0616","failure_captures":["##[error]The operation was canceled."],"failure_lines":["##[error]The operation was canceled."],"failure_context":["+ pytest --junitxml=test-results/junit.xml -v --durations 200 --ignore test/test_distributed.py --ignore test/test_rlhf.py","+ python -m torch.utils.collect_env","+ LAZY_LEGACY_OP=False","+ export LAZY_LEGACY_OP=False","+ BATCHED_PIPE_TIMEOUT=60","+ export BATCHED_PIPE_TIMEOUT=60","+ MAX_IDLE_COUNT=60","+ export MAX_IDLE_COUNT=60","+ CKPT_BACKEND=torch","+ export CKPT_BACKEND=torch","++ [[ cpu != \c\p\u ]]","++ [[ 3 -eq 5 ]]"],"time":"2024-02-15T15:41:41.333109Z"},{"workflowId":7917745410,"id":21614536507,"runnerName":"i-0ddd6d6259287a1bf","authorEmail":"[email protected]","name":"Unit-tests on Windows / unittests-gpu / windows-job","jobName":"unittests-gpu / windows-job","conclusion":"failure","completed_at":"2024-02-15T15:27:56Z","html_url":"https://github.com/pytorch/rl/actions/runs/7917745410/job/21614536507","head_branch":"fix-penv","pr_number":1909,"head_sha":"a6dc5ee8db2660ad0690fd281dc96dff6cdb0616","failure_captures":["##[error]The operation was canceled."],"failure_lines":["##[error]The operation was canceled."],"failure_context":["+ python -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu118","+ True","+ printf 'Installing PyTorch with %s\n' pytorch-cuda=11.6","+ git submodule update --init --recursive","+ git submodule sync","+ cudatoolkit=pytorch-cuda=11.6","+ version=11.6","++ python -c 'print('\''.'\''.join("11.6".split('\''.'\'')[:2]))'","+ echo 'Using CUDA 11.6 as determined by CU_VERSION'","+ cuda_toolkit_pckg=pytorch-cuda","+ [[ 11.6 == 11.6 ]]","+ cuda_toolkit_pckg=cudatoolkit"],"time":"2024-02-15T15:28:00.450511Z"}],"UNSTABLE":[]}