diff --git a/tensorflow/tools/ci_build/linux/rocm/run_cpu.sh b/tensorflow/tools/ci_build/linux/rocm/run_cpu.sh index 49543c92a91bc5..cb9d003a349798 100755 --- a/tensorflow/tools/ci_build/linux/rocm/run_cpu.sh +++ b/tensorflow/tools/ci_build/linux/rocm/run_cpu.sh @@ -28,55 +28,17 @@ export PYTHON_BIN_PATH=`which python3` PYTHON_VERSION=`python3 -c "import sys;print(f'{sys.version_info.major}.{sys.version_info.minor}')"` export TF_PYTHON_VERSION=$PYTHON_VERSION -export TF_NEED_ROCM=0 +# Use the bazelrc files in /usertools if available +if [ ! -d /tf ];then + # The bazelrc files in /usertools expect /tf to exist + mkdir /tf +fi -if [ -f /usertools/cpu.bazelrc ]; then - # Use the bazelrc files in /usertools if available - if [ ! -d /tf ];then - # The bazelrc files in /usertools expect /tf to exist - mkdir /tf - fi - bazel \ - --bazelrc=/usertools/cpu.bazelrc \ - test \ +bazel --bazelrc=tensorflow/tools/tf_sig_build_dockerfiles/devel.usertools/cpu.bazelrc test \ --config=sigbuild_local_cache \ - --verbose_failures \ + --verbose_failures \ --config=pycpp \ - --test_env=HIP_VISIBLE_DEVICES=\"\" \ - --repo_env=USE_PYWRAP_RULES=${usePywrapRules} \ --action_env=TF_NEED_ROCM=0 \ --action_env=TF_PYTHON_VERSION=$PYTHON_VERSION \ --local_test_jobs=${N_BUILD_JOBS} \ - --test_timeout 920,2400,7200,9600 \ --jobs=${N_BUILD_JOBS} -else - yes "" | $PYTHON_BIN_PATH configure.py - - - # Run bazel test command. Double test timeouts to avoid flakes. - # xla/mlir_hlo/tests/Dialect/gml_st tests disabled in 09/08/22 sync - bazel test \ - -k \ - --verbose_failures \ - --test_tag_filters=-no_oss,-oss_excluded,-oss_serial,-gpu,-multi_gpu,-multi_and_single_gpu,-tpu,-cuda-only,-benchmark-test,-v1only \ - --test_lang_filters=cc,py \ - --jobs=30 \ - --local_ram_resources=60000 \ - --local_cpu_resources=15 \ - --local_test_jobs=${N_BUILD_JOBS} \ - --test_timeout 920,2400,7200,9600 \ - --build_tests_only \ - --test_output=errors \ - --test_sharding_strategy=disabled \ - --test_size_filters=small,medium \ - --test_env=TF_PYTHON_VERSION=$PYTHON_VERSION \ - --test_env=HIP_VISIBLE_DEVICES=\"\" \ - --repo_env=USE_PYWRAP_RULES=${usePywrapRules} \ - --action_env=TF_NEED_ROCM=0 \ - -- \ - //tensorflow/... \ - -//tensorflow/compiler/tf2tensorrt/... \ - -//tensorflow/core/tpu/... \ - -//tensorflow/lite/... \ - -//tensorflow/tools/toolchains/... -fi diff --git a/tensorflow/tools/ci_build/linux/rocm/run_gpu_multi.sh b/tensorflow/tools/ci_build/linux/rocm/run_gpu_multi.sh index 30750047780a04..0088f2cc78faa8 100755 --- a/tensorflow/tools/ci_build/linux/rocm/run_gpu_multi.sh +++ b/tensorflow/tools/ci_build/linux/rocm/run_gpu_multi.sh @@ -43,67 +43,23 @@ export TF_PYTHON_VERSION=$PYTHON_VERSION export TF_NEED_ROCM=1 export ROCM_PATH=$ROCM_INSTALL_DIR -if [ -f /usertools/rocm.bazelrc ]; then - # Use the bazelrc files in /usertools if available - if [ ! -d /tf ];then - # The bazelrc files in /usertools expect /tf to exist - mkdir /tf - fi - bazel \ - --bazelrc=/usertools/rocm.bazelrc \ - test \ - --local_test_jobs=${N_TEST_JOBS} \ - --jobs=30 \ - --local_ram_resources=60000 \ - --local_cpu_resources=15 \ - --config=sigbuild_local_cache \ - --config=rocm \ - --config=nonpip_multi_gpu \ - --action_env=TF_PYTHON_VERSION=$PYTHON_VERSION -else - # Legacy style: run configure then build - yes "" | $PYTHON_BIN_PATH configure.py - - # Run bazel test command. Double test timeouts to avoid flakes. - bazel test \ - --config=rocm \ - -k \ - --test_tag_filters=-no_gpu,-cuda-only \ - --jobs=30 \ - --local_ram_resources=60000 \ - --local_cpu_resources=15 \ - --local_test_jobs=${N_TEST_JOBS} \ - --test_timeout 920,2400,7200,9600 \ - --build_tests_only \ - --test_output=errors \ - --test_sharding_strategy=disabled \ - --test_size_filters=small,medium,large \ - --cache_test_results=no \ - --test_env=TF_PER_DEVICE_MEMORY_LIMIT_MB=2048 \ - --test_env=TF_PYTHON_VERSION=$PYTHON_VERSION \ - -- \ - //tensorflow/core/nccl:nccl_manager_test_2gpu \ - //tensorflow/python/distribute/integration_test:mwms_peer_failure_test_2gpu \ - //tensorflow/python/distribute:checkpoint_utils_test_2gpu \ - //tensorflow/python/distribute:checkpointing_test_2gpu \ - //tensorflow/python/distribute:collective_all_reduce_strategy_test_xla_2gpu \ - //tensorflow/python/distribute:custom_training_loop_gradient_test_2gpu \ - //tensorflow/python/distribute:custom_training_loop_input_test_2gpu \ - //tensorflow/python/distribute:distribute_utils_test_2gpu \ - //tensorflow/python/distribute:input_lib_test_2gpu \ - //tensorflow/python/distribute:input_lib_type_spec_test_2gpu \ - //tensorflow/python/distribute:metrics_v1_test_2gpu \ - //tensorflow/python/distribute:mirrored_variable_test_2gpu \ - //tensorflow/python/distribute:parameter_server_strategy_test_2gpu \ - //tensorflow/python/distribute:ps_values_test_2gpu \ - //tensorflow/python/distribute:random_generator_test_2gpu \ - //tensorflow/python/distribute:test_util_test_2gpu \ - //tensorflow/python/distribute:tf_function_test_2gpu \ - //tensorflow/python/distribute:vars_test_2gpu \ - //tensorflow/python/distribute:warm_starting_util_test_2gpu \ - //tensorflow/python/training:saver_test_2gpu +if [ ! -d /tf ];then + # The bazelrc files in /usertools expect /tf to exist + mkdir /tf fi +bazel --bazelrc=tensorflow/tools/tf_sig_build_dockerfiles/devel.usertools/rocm.bazelrc test \ + --local_test_jobs=${N_TEST_JOBS} \ + --jobs=30 \ + --local_ram_resources=60000 \ + --local_cpu_resources=15 \ + --verbose_failures \ + --config=rocm \ + --config=nonpip_multi_gpu \ + --config=sigbuild_local_cache \ + --action_env=TF_PYTHON_VERSION=$PYTHON_VERSION \ + --repo_env="ROCM_PATH=$ROCM_PATH" \ + # Started failing with 210906 sync # FAILED : //tensorflow/core/kernels:collective_nccl_test_2gpu \ diff --git a/tensorflow/tools/ci_build/linux/rocm/run_gpu_single.sh b/tensorflow/tools/ci_build/linux/rocm/run_gpu_single.sh index c329ebce35c7cc..572d2c7dcbf154 100755 --- a/tensorflow/tools/ci_build/linux/rocm/run_gpu_single.sh +++ b/tensorflow/tools/ci_build/linux/rocm/run_gpu_single.sh @@ -59,25 +59,26 @@ if [ -z "$TARGET_ARCHS" ]; then exit 1 fi +if [ ! -d /tf ];then + # The bazelrc files in /usertools expect /tf to exist + mkdir /tf +fi + # Run bazel test command. Double test timeouts to avoid flakes. -bazel test \ +bazel --bazelrc=tensorflow/tools/tf_sig_build_dockerfiles/devel.usertools/rocm.bazelrc test \ --config=rocm \ + --config=sigbuild_local_cache \ + --config=pycpp \ -k \ - --test_tag_filters=gpu,-no_oss,-oss_excluded,-oss_serial,-no_gpu,-cuda-only,-benchmark-test,-rocm_multi_gpu,-tpu,-v1only \ --jobs=${N_BUILD_JOBS} \ --local_test_jobs=${N_TEST_JOBS} \ --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \ --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \ --test_env=MIOPEN_DEBUG_CONV_WINOGRAD=0 \ - --test_timeout 600,900,2400,7200 \ --repo_env="TF_ROCM_AMDGPU_TARGETS=$TARGET_ARCHS" \ + --repo_env="ROCM_PATH=$ROCM_PATH" \ --build_tests_only \ --test_output=errors \ + --verbose_failures \ --test_sharding_strategy=disabled \ - --test_size_filters=small,medium,large \ - --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \ - -- \ - //tensorflow/... \ - -//tensorflow/core/tpu/... \ - -//tensorflow/lite/... \ - -//tensorflow/compiler/tf2tensorrt/... \ + --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute