Debug tensordot test.

wenchenvincent · Oct 9, 2023 · 0b98bd9 · 0b98bd9
1 parent 5b0eb43
commit 0b98bd9
Show file tree

Hide file tree

Showing 4 changed files with 145 additions and 4 deletions.
diff --git a/run_tensordot_test.sh b/run_tensordot_test.sh
@@ -0,0 +1,99 @@
+#!/usr/bin/env bash
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# ==============================================================================
+set -e
+set -x
+
+N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
+#TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
+TF_GPU_COUNT=1
+TF_TESTS_PER_GPU=1
+N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})
+
+echo ""
+echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
+echo ""
+
+# First positional argument (if any) specifies the ROCM_INSTALL_DIR
+ROCM_INSTALL_DIR=/opt/rocm-5.6.0
+if [[ -n $1 ]]; then
+    ROCM_INSTALL_DIR=$1
+fi
+
+# Run configure.
+export PYTHON_BIN_PATH=`which python3`
+
+PYTHON_VERSION=`python3 -c "import sys;print(f'{sys.version_info.major}.{sys.version_info.minor}')"`
+export TF_PYTHON_VERSION=$PYTHON_VERSION
+export TF_NEED_ROCM=1
+export TF_NEED_CLANG=0
+export ROCM_PATH=$ROCM_INSTALL_DIR
+
+yes "" | $PYTHON_BIN_PATH configure.py
+
+      #--test_env=MIOPEN_DEBUG_CONV_WINOGRAD=0 \
+      #--test_env=MIOPEN_DEBUG_CONV_FFT=0 \
+      #--test_env=MIOPEN_DEBUG_CONV_GEMM=0 \
+      #--test_env=MIOPEN_DEBUG_CONV_IMPLICIT_GEMM=0 \
+      #--test_env=MIOPEN_DEBUG_CONV_DIRECT=1 \
+# Run bazel test command. Double test timeouts to avoid flakes.
+bazel test \
+      --config=rocm \
+      -k \
+      --test_tag_filters=gpu,-no_oss,-oss_excluded,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-tpu,-v1only \
+      --jobs=${N_BUILD_JOBS} \
+      --local_test_jobs=${N_TEST_JOBS} \
+      --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
+      --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
+      --test_env=HSA_TOOLS_LIB=libroctracer64.so \
+			--test_env=AMD_SERIALIZE_KERNEL=3 \
+      --test_env=AMD_SERIALIZE_COPY=3 \
+			--test_env=ROCBLAS_LAYER=2 \
+      --test_env=TENSILE_DB=0x8000 \
+      --test_env=MIOPEN_ENABLE_LOGGING_CMD=1 \
+      --test_env=MIOPEN_ENABLE_LOGGING=1 \
+      --test_env=MIOPEN_LOG_LEVEL=7 \
+      --test_env=TF_PYTHON_VERSION=$PYTHON_VERSION \
+      --test_timeout 920,2400,7200,9600 \
+      --build_tests_only \
+      --test_output=errors \
+      --test_sharding_strategy=disabled \
+      --test_size_filters=small,medium,large \
+      --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
+      -- \
+//tensorflow/python/kernel_tests/math_ops:tensordot_op_test_gpu \
+#//tensorflow/compiler/tests:qr_op_test_gpu \
+#//tensorflow/python/kernel_tests/linalg:matrix_solve_ls_op_test_gpu \
+#//tensorflow/python/kernel_tests/array_ops:init_ops_test_gpu \
+#//tensorflow/compiler/xla/tests:convolution_test_cudnn_frontend_disabled_gpu \
+#//tensorflow/compiler/xla/tests:convolution_test_gpu \
+#//tensorflow/compiler/xla/tests:convolution_test_gpu_alternative_layout_gpu \
+#//tensorflow/compiler/xla/service/gpu/tests:gpu_kernel_tiling_test_gpu \
+#//tensorflow/compiler/xla/tests:conv_depthwise_test_gpu \
+#//tensorflow/core/grappler/optimizers:remapper_test_gpu \
+#//tensorflow/dtensor/python/tests:multi_client_test_2gpus \
+#//tensorflow/python/distribute/experimental:multi_worker_mirrored_strategy_test_2gpus \
+#//tensorflow/python/grappler:auto_mixed_precision_test_gpu \
+#//tensorflow/python/grappler:remapper_test_gpu \
+
+
+
+      #//tensorflow/... \
+      #-//tensorflow/python/integration_testing/... \
+      #-//tensorflow/core/tpu/... \
+      #-//tensorflow/lite/... \
+      #-//tensorflow/compiler/tf2tensorrt/... \
+      #-//tensorflow/dtensor/python/tests:multi_client_test_nccl_2gpus
diff --git a/tensorflow/compiler/tests/qr_op_test.py b/tensorflow/compiler/tests/qr_op_test.py
@@ -114,18 +114,21 @@ def _test(self, x_np, full_matrices, full_rank=True):
   DTYPES = [np.float32, np.complex64]
   PARAMS = itertools.product(SIZES, SIZES, DTYPES)
 
+  '''
   @parameterized.parameters(*PARAMS)
   def testQR(self, rows, cols, dtype):
     for full_matrices in [True, False]:
       # Only tests the (3, 2) case for small numbers of rows/columns.
       for batch_dims in [(), (3,)] + [(3, 2)] * (max(rows, cols) < 10):
         x_np = self._random_matrix(dtype, batch_dims + (rows, cols))
         self._test(x_np, full_matrices)
+  '''
 
   def testLarge2000x2000(self):
     x_np = self._random_matrix(np.float32, (2000, 2000))
     self._test(x_np, full_matrices=True)
 
+  '''
   @unittest.skip("Test times out on CI")
   def testLarge17500x128(self):
     x_np = self._random_matrix(np.float32, (17500, 128))
@@ -142,6 +145,7 @@ def testRepeatedColumn(self, rows, cols):
     x_np = self._random_matrix(np.complex64, (rows, cols))
     x_np[:, 1] = x_np[:, 2]
     self._test(x_np, full_matrices=True, full_rank=False)
+  '''
 
 
 if __name__ == "__main__":

diff --git a/tensorflow/python/kernel_tests/math_ops/tensordot_op_test.py b/tensorflow/python/kernel_tests/math_ops/tensordot_op_test.py
@@ -37,6 +37,7 @@ def _add_test(test, test_name, fn):
 
 class TensordotTest(test_lib.TestCase):
 
+  '''
   @test_util.run_in_graph_and_eager_modes(use_gpu=True)
   def test_invalid_shape(self):
     a = [[1, 2], [3, 4]]
@@ -134,6 +135,7 @@ def test_partial_shape_inference(self):
       output_shape = output_shape.as_list()
       self.assertEqual(output_shape[0], 2)
       self.assertEqual(output_shape[1], None)
+  '''
 
 
 def _get_tensordot_tests(dtype_, rank_a_, rank_b_, num_dims_, dynamic_shape_):
@@ -149,9 +151,21 @@ def _generate_random_tensors_and_dims():
     shared_shape = np.random.random_integers(1, _MAXDIM, num_dims_)
     a_dims = _random_subset(num_dims_, rank_a_)
     b_dims = _random_subset(num_dims_, rank_b_)
+    '''
+    print('a_shape=', a_shape)
+    print('b_shape=', b_shape)
+    print('shared_shape=', shared_shape)
+    print('a_dims=', a_dims)
+    print('b_dims=', b_dims)
+    '''
     for i in range(num_dims_):
       a_shape[a_dims[i]] = shared_shape[i]
       b_shape[b_dims[i]] = shared_shape[i]
+    '''
+    print('Rearranging')
+    print('a_shape=', a_shape)
+    print('b_shape=', b_shape)
+    '''
     a = np.random.uniform(
         low=-1.0, high=1.0,
         size=np.prod(a_shape)).reshape(a_shape).astype(dtype_)
@@ -160,6 +174,7 @@ def _generate_random_tensors_and_dims():
         size=np.prod(b_shape)).reshape(b_shape).astype(dtype_)
     return a, b, a_dims, b_dims
 
+  '''
   @test_util.run_in_graph_and_eager_modes(use_gpu=True)
   @test_util.run_without_tensor_float_32("Tests tensordot, which calls matmul")
   def test_tensordot(self):
@@ -191,8 +206,9 @@ def test_tensordot(self):
           tf_ans = math_ops.tensordot(a_np, b_np, (a_dims_np, b_dims_np))
       self.assertAllClose(tf_ans, np_ans, rtol=tol, atol=tol)
       self.assertAllEqual(tf_ans.shape, np_ans.shape)
+  '''
 
-  @test_util.run_in_graph_and_eager_modes(use_gpu=True)
+  #@test_util.run_in_graph_and_eager_modes(use_gpu=True)
   @test_util.run_without_tensor_float_32("Tests tensordot, which calls matmul")
   def test_tensordot_scalar_axes(self):
     if dynamic_shape_ and context.executing_eagerly():
@@ -210,11 +226,21 @@ def test_tensordot_scalar_axes(self):
         low=-1.0, high=1.0, size=np.prod(shape)).reshape(shape).astype(dtype_)
     b_np = np.random.uniform(
         low=-1.0, high=1.0, size=np.prod(shape)).reshape(shape).astype(dtype_)
-    all_axes = [0, 1]
+    #all_axes = [0, 1]
+    all_axes = [ 1]
     if a_np.ndim > 2:
       all_axes.append(a_np.ndim - 1)
+    with open('/tmp/a.bin', 'wb') as fo:
+      a_np.tofile(fo)
+    with open('/tmp/b.bin', 'wb') as fo:
+      b_np.tofile(fo)
+    print('a_np.shape=', a_np.shape)
+    print('b_np.shape=', b_np.shape)
     for axes in all_axes:
+      print('axes=', axes)
       np_ans = np.tensordot(a_np, b_np, axes=axes)
+      with open('/tmp/np_out.bin', 'wb') as fo:
+        np_ans.tofile(fo)
       with self.cached_session() as sess:
         if dynamic_shape_:
           a = array_ops.placeholder(dtype_)
@@ -223,10 +249,13 @@ def test_tensordot_scalar_axes(self):
           tf_ans = sess.run(c, feed_dict={a: a_np, b: b_np})
         else:
           tf_ans = math_ops.tensordot(a_np, b_np, axes=axes)
+      with open('/tmp/tf_out.bin', 'wb') as fo:
+        tf_ans.numpy().tofile(fo)
       self.assertAllClose(tf_ans, np_ans, rtol=tol, atol=tol)
       self.assertAllEqual(tf_ans.shape, np_ans.shape)
 
-  return [test_tensordot, test_tensordot_scalar_axes]
+  #return [test_tensordot, test_tensordot_scalar_axes]
+  return [test_tensordot_scalar_axes]
 
 
 if __name__ == "__main__":
@@ -244,5 +273,6 @@ def test_tensordot_scalar_axes(self):
               name = "%s_%s_%s_%s_%s_%s" % (testcase.__name__, dtype.__name__,
                                             rank_a, rank_b, num_dims,
                                             dynamic_shape)
-              _add_test(TensordotTest, name, testcase)
+              if dtype == np.float64 and rank_a == 5 and rank_b == 5 and num_dims == 5 and dynamic_shape == False:
+                  _add_test(TensordotTest, name, testcase)
   test_lib.main()
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
@@ -5238,15 +5238,23 @@ def _tensordot_axes(a, axes):
     a_reshape, a_free_dims, a_free_dims_static = _tensordot_reshape(a, a_axes)
     b_reshape, b_free_dims, b_free_dims_static = _tensordot_reshape(
         b, b_axes, True)
+    print('a_reshape.shape=', a_reshape.shape)  
+    print('a_free_dims=', a_free_dims)
+    print('a_free_dims_static=', a_free_dims_static)
+    print('b_reshape.shape=', b_reshape.shape)  
+    print('b_free_dims=', b_free_dims)
+    print('b_free_dims_static=', b_free_dims_static)
     ab_matmul = matmul(a_reshape, b_reshape)
     if isinstance(a_free_dims, list) and isinstance(b_free_dims, list):
+      print('If path')
       if (ab_matmul.get_shape().is_fully_defined() and
           ab_matmul.get_shape().as_list() == a_free_dims + b_free_dims):
         return ab_matmul
       else:
         return array_ops.reshape(
             ab_matmul, a_free_dims + b_free_dims, name=name)
     else:
+      print('Else path')
       a_free_dims = ops.convert_to_tensor(a_free_dims, dtype=dtypes.int32)
       b_free_dims = ops.convert_to_tensor(b_free_dims, dtype=dtypes.int32)
       product = array_ops.reshape(