From 97c37298df9e997b86ca9efed824e27024f3bd60 Mon Sep 17 00:00:00 2001 From: Cuong Nguyen <128072568+can-anyscale@users.noreply.github.com> Date: Mon, 17 Jun 2024 20:36:29 -0700 Subject: [PATCH] [ci] deflake rllib release tests (#45901) (#46114) Some of the rllib release tests are still flaky; let's first narrow down the impact to the dreamerv3 test only to deflake other tests; also mark dreamerv3 as non-release-blocking, would love your confirmation here @sven1977 thanks Test: - release tests Signed-off-by: can --- release/ray_release/byod/byod_rllib_dreamerv3_test.sh | 9 +++++++++ release/ray_release/byod/byod_rllib_test.sh | 4 ---- release/release_tests.yaml | 4 ++-- 3 files changed, 11 insertions(+), 6 deletions(-) create mode 100755 release/ray_release/byod/byod_rllib_dreamerv3_test.sh diff --git a/release/ray_release/byod/byod_rllib_dreamerv3_test.sh b/release/ray_release/byod/byod_rllib_dreamerv3_test.sh new file mode 100755 index 000000000000..838d014b1288 --- /dev/null +++ b/release/ray_release/byod/byod_rllib_dreamerv3_test.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# This script is used to build an extra layer on top of the base anyscale/ray image +# to run RLlib dreamerv3 release tests. + +set -exo pipefail + +# Only DreamerV3 still uses tf on the new API stack. But requires tf==2.11.1 to run. +pip uninstall -y tensorflow tensorflow_probability +pip install tensorflow==2.11.1 tensorflow_probability==0.19.0 diff --git a/release/ray_release/byod/byod_rllib_test.sh b/release/ray_release/byod/byod_rllib_test.sh index 296b3ffaea66..d0e73d4d516d 100755 --- a/release/ray_release/byod/byod_rllib_test.sh +++ b/release/ray_release/byod/byod_rllib_test.sh @@ -14,7 +14,3 @@ pip install werkzeug==2.3.8 # not strictly necessary, but makes debugging easier git clone https://github.com/ray-project/ray.git - -# Only DreamerV3 still uses tf on the new API stack. But requires tf==2.11.1 to run. -pip uninstall -y tensorflow tensorflow_probability -pip install tensorflow==2.11.1 tensorflow_probability==0.19.0 diff --git a/release/release_tests.yaml b/release/release_tests.yaml index ad67364a360d..3e21a8b772b2 100644 --- a/release/release_tests.yaml +++ b/release/release_tests.yaml @@ -2747,14 +2747,14 @@ group: RLlib tests working_dir: rllib_tests - stable: true + stable: false frequency: weekly team: rllib cluster: byod: type: gpu - post_build_script: byod_rllib_test.sh + post_build_script: byod_rllib_dreamerv3_test.sh runtime_env: - RLLIB_TEST_NO_JAX_IMPORT=1 - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/ray/.mujoco/mujoco210/bin