diff --git a/run_tensordot_test.sh b/run_tensordot_test.sh new file mode 100755 index 00000000000000..d8b56f854dbe6e --- /dev/null +++ b/run_tensordot_test.sh @@ -0,0 +1,99 @@ +#!/usr/bin/env bash +# Copyright 2020 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ============================================================================== +set -e +set -x + +N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo) +#TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l) +TF_GPU_COUNT=1 +TF_TESTS_PER_GPU=1 +N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU}) + +echo "" +echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)." +echo "" + +# First positional argument (if any) specifies the ROCM_INSTALL_DIR +ROCM_INSTALL_DIR=/opt/rocm-5.6.0 +if [[ -n $1 ]]; then + ROCM_INSTALL_DIR=$1 +fi + +# Run configure. +export PYTHON_BIN_PATH=`which python3` + +PYTHON_VERSION=`python3 -c "import sys;print(f'{sys.version_info.major}.{sys.version_info.minor}')"` +export TF_PYTHON_VERSION=$PYTHON_VERSION +export TF_NEED_ROCM=1 +export TF_NEED_CLANG=0 +export ROCM_PATH=$ROCM_INSTALL_DIR + +yes "" | $PYTHON_BIN_PATH configure.py + + #--test_env=MIOPEN_DEBUG_CONV_WINOGRAD=0 \ + #--test_env=MIOPEN_DEBUG_CONV_FFT=0 \ + #--test_env=MIOPEN_DEBUG_CONV_GEMM=0 \ + #--test_env=MIOPEN_DEBUG_CONV_IMPLICIT_GEMM=0 \ + #--test_env=MIOPEN_DEBUG_CONV_DIRECT=1 \ +# Run bazel test command. Double test timeouts to avoid flakes. +bazel test \ + --config=rocm \ + -k \ + --test_tag_filters=gpu,-no_oss,-oss_excluded,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-tpu,-v1only \ + --jobs=${N_BUILD_JOBS} \ + --local_test_jobs=${N_TEST_JOBS} \ + --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \ + --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \ + --test_env=HSA_TOOLS_LIB=libroctracer64.so \ + --test_env=AMD_SERIALIZE_KERNEL=3 \ + --test_env=AMD_SERIALIZE_COPY=3 \ + --test_env=ROCBLAS_LAYER=2 \ + --test_env=TENSILE_DB=0x8000 \ + --test_env=MIOPEN_ENABLE_LOGGING_CMD=1 \ + --test_env=MIOPEN_ENABLE_LOGGING=1 \ + --test_env=MIOPEN_LOG_LEVEL=7 \ + --test_env=TF_PYTHON_VERSION=$PYTHON_VERSION \ + --test_timeout 920,2400,7200,9600 \ + --build_tests_only \ + --test_output=errors \ + --test_sharding_strategy=disabled \ + --test_size_filters=small,medium,large \ + --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \ + -- \ +//tensorflow/python/kernel_tests/math_ops:tensordot_op_test_gpu \ +#//tensorflow/compiler/tests:qr_op_test_gpu \ +#//tensorflow/python/kernel_tests/linalg:matrix_solve_ls_op_test_gpu \ +#//tensorflow/python/kernel_tests/array_ops:init_ops_test_gpu \ +#//tensorflow/compiler/xla/tests:convolution_test_cudnn_frontend_disabled_gpu \ +#//tensorflow/compiler/xla/tests:convolution_test_gpu \ +#//tensorflow/compiler/xla/tests:convolution_test_gpu_alternative_layout_gpu \ +#//tensorflow/compiler/xla/service/gpu/tests:gpu_kernel_tiling_test_gpu \ +#//tensorflow/compiler/xla/tests:conv_depthwise_test_gpu \ +#//tensorflow/core/grappler/optimizers:remapper_test_gpu \ +#//tensorflow/dtensor/python/tests:multi_client_test_2gpus \ +#//tensorflow/python/distribute/experimental:multi_worker_mirrored_strategy_test_2gpus \ +#//tensorflow/python/grappler:auto_mixed_precision_test_gpu \ +#//tensorflow/python/grappler:remapper_test_gpu \ + + + + #//tensorflow/... \ + #-//tensorflow/python/integration_testing/... \ + #-//tensorflow/core/tpu/... \ + #-//tensorflow/lite/... \ + #-//tensorflow/compiler/tf2tensorrt/... \ + #-//tensorflow/dtensor/python/tests:multi_client_test_nccl_2gpus diff --git a/tensorflow/compiler/tests/qr_op_test.py b/tensorflow/compiler/tests/qr_op_test.py index 4eac4a970bf199..e35c3cdc2496a8 100644 --- a/tensorflow/compiler/tests/qr_op_test.py +++ b/tensorflow/compiler/tests/qr_op_test.py @@ -114,6 +114,7 @@ def _test(self, x_np, full_matrices, full_rank=True): DTYPES = [np.float32, np.complex64] PARAMS = itertools.product(SIZES, SIZES, DTYPES) + ''' @parameterized.parameters(*PARAMS) def testQR(self, rows, cols, dtype): for full_matrices in [True, False]: @@ -121,11 +122,13 @@ def testQR(self, rows, cols, dtype): for batch_dims in [(), (3,)] + [(3, 2)] * (max(rows, cols) < 10): x_np = self._random_matrix(dtype, batch_dims + (rows, cols)) self._test(x_np, full_matrices) + ''' def testLarge2000x2000(self): x_np = self._random_matrix(np.float32, (2000, 2000)) self._test(x_np, full_matrices=True) + ''' @unittest.skip("Test times out on CI") def testLarge17500x128(self): x_np = self._random_matrix(np.float32, (17500, 128)) @@ -142,6 +145,7 @@ def testRepeatedColumn(self, rows, cols): x_np = self._random_matrix(np.complex64, (rows, cols)) x_np[:, 1] = x_np[:, 2] self._test(x_np, full_matrices=True, full_rank=False) + ''' if __name__ == "__main__": diff --git a/tensorflow/python/kernel_tests/math_ops/tensordot_op_test.py b/tensorflow/python/kernel_tests/math_ops/tensordot_op_test.py index fa9034a08fd794..172d65435b2314 100644 --- a/tensorflow/python/kernel_tests/math_ops/tensordot_op_test.py +++ b/tensorflow/python/kernel_tests/math_ops/tensordot_op_test.py @@ -37,6 +37,7 @@ def _add_test(test, test_name, fn): class TensordotTest(test_lib.TestCase): + ''' @test_util.run_in_graph_and_eager_modes(use_gpu=True) def test_invalid_shape(self): a = [[1, 2], [3, 4]] @@ -134,6 +135,7 @@ def test_partial_shape_inference(self): output_shape = output_shape.as_list() self.assertEqual(output_shape[0], 2) self.assertEqual(output_shape[1], None) + ''' def _get_tensordot_tests(dtype_, rank_a_, rank_b_, num_dims_, dynamic_shape_): @@ -149,9 +151,21 @@ def _generate_random_tensors_and_dims(): shared_shape = np.random.random_integers(1, _MAXDIM, num_dims_) a_dims = _random_subset(num_dims_, rank_a_) b_dims = _random_subset(num_dims_, rank_b_) + ''' + print('a_shape=', a_shape) + print('b_shape=', b_shape) + print('shared_shape=', shared_shape) + print('a_dims=', a_dims) + print('b_dims=', b_dims) + ''' for i in range(num_dims_): a_shape[a_dims[i]] = shared_shape[i] b_shape[b_dims[i]] = shared_shape[i] + ''' + print('Rearranging') + print('a_shape=', a_shape) + print('b_shape=', b_shape) + ''' a = np.random.uniform( low=-1.0, high=1.0, size=np.prod(a_shape)).reshape(a_shape).astype(dtype_) @@ -160,6 +174,7 @@ def _generate_random_tensors_and_dims(): size=np.prod(b_shape)).reshape(b_shape).astype(dtype_) return a, b, a_dims, b_dims + ''' @test_util.run_in_graph_and_eager_modes(use_gpu=True) @test_util.run_without_tensor_float_32("Tests tensordot, which calls matmul") def test_tensordot(self): @@ -191,8 +206,9 @@ def test_tensordot(self): tf_ans = math_ops.tensordot(a_np, b_np, (a_dims_np, b_dims_np)) self.assertAllClose(tf_ans, np_ans, rtol=tol, atol=tol) self.assertAllEqual(tf_ans.shape, np_ans.shape) + ''' - @test_util.run_in_graph_and_eager_modes(use_gpu=True) + #@test_util.run_in_graph_and_eager_modes(use_gpu=True) @test_util.run_without_tensor_float_32("Tests tensordot, which calls matmul") def test_tensordot_scalar_axes(self): if dynamic_shape_ and context.executing_eagerly(): @@ -210,11 +226,21 @@ def test_tensordot_scalar_axes(self): low=-1.0, high=1.0, size=np.prod(shape)).reshape(shape).astype(dtype_) b_np = np.random.uniform( low=-1.0, high=1.0, size=np.prod(shape)).reshape(shape).astype(dtype_) - all_axes = [0, 1] + #all_axes = [0, 1] + all_axes = [ 1] if a_np.ndim > 2: all_axes.append(a_np.ndim - 1) + with open('/tmp/a.bin', 'wb') as fo: + a_np.tofile(fo) + with open('/tmp/b.bin', 'wb') as fo: + b_np.tofile(fo) + print('a_np.shape=', a_np.shape) + print('b_np.shape=', b_np.shape) for axes in all_axes: + print('axes=', axes) np_ans = np.tensordot(a_np, b_np, axes=axes) + with open('/tmp/np_out.bin', 'wb') as fo: + np_ans.tofile(fo) with self.cached_session() as sess: if dynamic_shape_: a = array_ops.placeholder(dtype_) @@ -223,10 +249,13 @@ def test_tensordot_scalar_axes(self): tf_ans = sess.run(c, feed_dict={a: a_np, b: b_np}) else: tf_ans = math_ops.tensordot(a_np, b_np, axes=axes) + with open('/tmp/tf_out.bin', 'wb') as fo: + tf_ans.numpy().tofile(fo) self.assertAllClose(tf_ans, np_ans, rtol=tol, atol=tol) self.assertAllEqual(tf_ans.shape, np_ans.shape) - return [test_tensordot, test_tensordot_scalar_axes] + #return [test_tensordot, test_tensordot_scalar_axes] + return [test_tensordot_scalar_axes] if __name__ == "__main__": @@ -244,5 +273,6 @@ def test_tensordot_scalar_axes(self): name = "%s_%s_%s_%s_%s_%s" % (testcase.__name__, dtype.__name__, rank_a, rank_b, num_dims, dynamic_shape) - _add_test(TensordotTest, name, testcase) + if dtype == np.float64 and rank_a == 5 and rank_b == 5 and num_dims == 5 and dynamic_shape == False: + _add_test(TensordotTest, name, testcase) test_lib.main() diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index a5230f5e299e35..b85601fe98c150 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -5238,8 +5238,15 @@ def _tensordot_axes(a, axes): a_reshape, a_free_dims, a_free_dims_static = _tensordot_reshape(a, a_axes) b_reshape, b_free_dims, b_free_dims_static = _tensordot_reshape( b, b_axes, True) + print('a_reshape.shape=', a_reshape.shape) + print('a_free_dims=', a_free_dims) + print('a_free_dims_static=', a_free_dims_static) + print('b_reshape.shape=', b_reshape.shape) + print('b_free_dims=', b_free_dims) + print('b_free_dims_static=', b_free_dims_static) ab_matmul = matmul(a_reshape, b_reshape) if isinstance(a_free_dims, list) and isinstance(b_free_dims, list): + print('If path') if (ab_matmul.get_shape().is_fully_defined() and ab_matmul.get_shape().as_list() == a_free_dims + b_free_dims): return ab_matmul @@ -5247,6 +5254,7 @@ def _tensordot_axes(a, axes): return array_ops.reshape( ab_matmul, a_free_dims + b_free_dims, name=name) else: + print('Else path') a_free_dims = ops.convert_to_tensor(a_free_dims, dtype=dtypes.int32) b_free_dims = ops.convert_to_tensor(b_free_dims, dtype=dtypes.int32) product = array_ops.reshape(