Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 7 additions & 45 deletions tensorflow/tools/ci_build/linux/rocm/run_cpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,55 +28,17 @@ export PYTHON_BIN_PATH=`which python3`
PYTHON_VERSION=`python3 -c "import sys;print(f'{sys.version_info.major}.{sys.version_info.minor}')"`
export TF_PYTHON_VERSION=$PYTHON_VERSION

export TF_NEED_ROCM=0
# Use the bazelrc files in /usertools if available
if [ ! -d /tf ];then
# The bazelrc files in /usertools expect /tf to exist
mkdir /tf
fi

if [ -f /usertools/cpu.bazelrc ]; then
# Use the bazelrc files in /usertools if available
if [ ! -d /tf ];then
# The bazelrc files in /usertools expect /tf to exist
mkdir /tf
fi
bazel \
--bazelrc=/usertools/cpu.bazelrc \
test \
bazel --bazelrc=tensorflow/tools/tf_sig_build_dockerfiles/devel.usertools/cpu.bazelrc test \
--config=sigbuild_local_cache \
--verbose_failures \
--verbose_failures \
--config=pycpp \
--test_env=HIP_VISIBLE_DEVICES=\"\" \
--repo_env=USE_PYWRAP_RULES=${usePywrapRules} \
--action_env=TF_NEED_ROCM=0 \
--action_env=TF_PYTHON_VERSION=$PYTHON_VERSION \
--local_test_jobs=${N_BUILD_JOBS} \
--test_timeout 920,2400,7200,9600 \
--jobs=${N_BUILD_JOBS}
else
yes "" | $PYTHON_BIN_PATH configure.py


# Run bazel test command. Double test timeouts to avoid flakes.
# xla/mlir_hlo/tests/Dialect/gml_st tests disabled in 09/08/22 sync
bazel test \
-k \
--verbose_failures \
--test_tag_filters=-no_oss,-oss_excluded,-oss_serial,-gpu,-multi_gpu,-multi_and_single_gpu,-tpu,-cuda-only,-benchmark-test,-v1only \
--test_lang_filters=cc,py \
--jobs=30 \
--local_ram_resources=60000 \
--local_cpu_resources=15 \
--local_test_jobs=${N_BUILD_JOBS} \
--test_timeout 920,2400,7200,9600 \
--build_tests_only \
--test_output=errors \
--test_sharding_strategy=disabled \
--test_size_filters=small,medium \
--test_env=TF_PYTHON_VERSION=$PYTHON_VERSION \
--test_env=HIP_VISIBLE_DEVICES=\"\" \
--repo_env=USE_PYWRAP_RULES=${usePywrapRules} \
--action_env=TF_NEED_ROCM=0 \
-- \
//tensorflow/... \
-//tensorflow/compiler/tf2tensorrt/... \
-//tensorflow/core/tpu/... \
-//tensorflow/lite/... \
-//tensorflow/tools/toolchains/...
fi
74 changes: 15 additions & 59 deletions tensorflow/tools/ci_build/linux/rocm/run_gpu_multi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,67 +43,23 @@ export TF_PYTHON_VERSION=$PYTHON_VERSION
export TF_NEED_ROCM=1
export ROCM_PATH=$ROCM_INSTALL_DIR

if [ -f /usertools/rocm.bazelrc ]; then
# Use the bazelrc files in /usertools if available
if [ ! -d /tf ];then
# The bazelrc files in /usertools expect /tf to exist
mkdir /tf
fi
bazel \
--bazelrc=/usertools/rocm.bazelrc \
test \
--local_test_jobs=${N_TEST_JOBS} \
--jobs=30 \
--local_ram_resources=60000 \
--local_cpu_resources=15 \
--config=sigbuild_local_cache \
--config=rocm \
--config=nonpip_multi_gpu \
--action_env=TF_PYTHON_VERSION=$PYTHON_VERSION
else
# Legacy style: run configure then build
yes "" | $PYTHON_BIN_PATH configure.py

# Run bazel test command. Double test timeouts to avoid flakes.
bazel test \
--config=rocm \
-k \
--test_tag_filters=-no_gpu,-cuda-only \
--jobs=30 \
--local_ram_resources=60000 \
--local_cpu_resources=15 \
--local_test_jobs=${N_TEST_JOBS} \
--test_timeout 920,2400,7200,9600 \
--build_tests_only \
--test_output=errors \
--test_sharding_strategy=disabled \
--test_size_filters=small,medium,large \
--cache_test_results=no \
--test_env=TF_PER_DEVICE_MEMORY_LIMIT_MB=2048 \
--test_env=TF_PYTHON_VERSION=$PYTHON_VERSION \
-- \
//tensorflow/core/nccl:nccl_manager_test_2gpu \
//tensorflow/python/distribute/integration_test:mwms_peer_failure_test_2gpu \
//tensorflow/python/distribute:checkpoint_utils_test_2gpu \
//tensorflow/python/distribute:checkpointing_test_2gpu \
//tensorflow/python/distribute:collective_all_reduce_strategy_test_xla_2gpu \
//tensorflow/python/distribute:custom_training_loop_gradient_test_2gpu \
//tensorflow/python/distribute:custom_training_loop_input_test_2gpu \
//tensorflow/python/distribute:distribute_utils_test_2gpu \
//tensorflow/python/distribute:input_lib_test_2gpu \
//tensorflow/python/distribute:input_lib_type_spec_test_2gpu \
//tensorflow/python/distribute:metrics_v1_test_2gpu \
//tensorflow/python/distribute:mirrored_variable_test_2gpu \
//tensorflow/python/distribute:parameter_server_strategy_test_2gpu \
//tensorflow/python/distribute:ps_values_test_2gpu \
//tensorflow/python/distribute:random_generator_test_2gpu \
//tensorflow/python/distribute:test_util_test_2gpu \
//tensorflow/python/distribute:tf_function_test_2gpu \
//tensorflow/python/distribute:vars_test_2gpu \
//tensorflow/python/distribute:warm_starting_util_test_2gpu \
//tensorflow/python/training:saver_test_2gpu
if [ ! -d /tf ];then
# The bazelrc files in /usertools expect /tf to exist
mkdir /tf
fi

bazel --bazelrc=tensorflow/tools/tf_sig_build_dockerfiles/devel.usertools/rocm.bazelrc test \
--local_test_jobs=${N_TEST_JOBS} \
--jobs=30 \
--local_ram_resources=60000 \
--local_cpu_resources=15 \
--verbose_failures \
--config=rocm \
--config=nonpip_multi_gpu \
--config=sigbuild_local_cache \
--action_env=TF_PYTHON_VERSION=$PYTHON_VERSION \
--repo_env="ROCM_PATH=$ROCM_PATH" \


# Started failing with 210906 sync
# FAILED : //tensorflow/core/kernels:collective_nccl_test_2gpu \
Expand Down
21 changes: 11 additions & 10 deletions tensorflow/tools/ci_build/linux/rocm/run_gpu_single.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,25 +59,26 @@ if [ -z "$TARGET_ARCHS" ]; then
exit 1
fi

if [ ! -d /tf ];then
# The bazelrc files in /usertools expect /tf to exist
mkdir /tf
fi

# Run bazel test command. Double test timeouts to avoid flakes.
bazel test \
bazel --bazelrc=tensorflow/tools/tf_sig_build_dockerfiles/devel.usertools/rocm.bazelrc test \
--config=rocm \
--config=sigbuild_local_cache \
--config=pycpp \
-k \
--test_tag_filters=gpu,-no_oss,-oss_excluded,-oss_serial,-no_gpu,-cuda-only,-benchmark-test,-rocm_multi_gpu,-tpu,-v1only \
--jobs=${N_BUILD_JOBS} \
--local_test_jobs=${N_TEST_JOBS} \
--test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
--test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
--test_env=MIOPEN_DEBUG_CONV_WINOGRAD=0 \
--test_timeout 600,900,2400,7200 \
--repo_env="TF_ROCM_AMDGPU_TARGETS=$TARGET_ARCHS" \
--repo_env="ROCM_PATH=$ROCM_PATH" \
--build_tests_only \
--test_output=errors \
--verbose_failures \
--test_sharding_strategy=disabled \
--test_size_filters=small,medium,large \
--run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
-- \
//tensorflow/... \
-//tensorflow/core/tpu/... \
-//tensorflow/lite/... \
-//tensorflow/compiler/tf2tensorrt/... \
--run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute