Skip to content

Commit

Permalink
Debug tensordot test.
Browse files Browse the repository at this point in the history
  • Loading branch information
wenchenvincent committed Oct 9, 2023
1 parent 5b0eb43 commit 0b98bd9
Show file tree
Hide file tree
Showing 4 changed files with 145 additions and 4 deletions.
99 changes: 99 additions & 0 deletions run_tensordot_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/usr/bin/env bash
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# ==============================================================================
set -e
set -x

N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo)
#TF_GPU_COUNT=$(lspci|grep 'controller'|grep 'AMD/ATI'|wc -l)
TF_GPU_COUNT=1
TF_TESTS_PER_GPU=1
N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU})

echo ""
echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)."
echo ""

# First positional argument (if any) specifies the ROCM_INSTALL_DIR
ROCM_INSTALL_DIR=/opt/rocm-5.6.0
if [[ -n $1 ]]; then
ROCM_INSTALL_DIR=$1
fi

# Run configure.
export PYTHON_BIN_PATH=`which python3`

PYTHON_VERSION=`python3 -c "import sys;print(f'{sys.version_info.major}.{sys.version_info.minor}')"`
export TF_PYTHON_VERSION=$PYTHON_VERSION
export TF_NEED_ROCM=1
export TF_NEED_CLANG=0
export ROCM_PATH=$ROCM_INSTALL_DIR

yes "" | $PYTHON_BIN_PATH configure.py

#--test_env=MIOPEN_DEBUG_CONV_WINOGRAD=0 \
#--test_env=MIOPEN_DEBUG_CONV_FFT=0 \
#--test_env=MIOPEN_DEBUG_CONV_GEMM=0 \
#--test_env=MIOPEN_DEBUG_CONV_IMPLICIT_GEMM=0 \
#--test_env=MIOPEN_DEBUG_CONV_DIRECT=1 \
# Run bazel test command. Double test timeouts to avoid flakes.
bazel test \
--config=rocm \
-k \
--test_tag_filters=gpu,-no_oss,-oss_excluded,-oss_serial,-no_gpu,-no_rocm,-benchmark-test,-rocm_multi_gpu,-tpu,-v1only \
--jobs=${N_BUILD_JOBS} \
--local_test_jobs=${N_TEST_JOBS} \
--test_env=TF_GPU_COUNT=$TF_GPU_COUNT \
--test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \
--test_env=HSA_TOOLS_LIB=libroctracer64.so \
--test_env=AMD_SERIALIZE_KERNEL=3 \
--test_env=AMD_SERIALIZE_COPY=3 \
--test_env=ROCBLAS_LAYER=2 \
--test_env=TENSILE_DB=0x8000 \
--test_env=MIOPEN_ENABLE_LOGGING_CMD=1 \
--test_env=MIOPEN_ENABLE_LOGGING=1 \
--test_env=MIOPEN_LOG_LEVEL=7 \
--test_env=TF_PYTHON_VERSION=$PYTHON_VERSION \
--test_timeout 920,2400,7200,9600 \
--build_tests_only \
--test_output=errors \
--test_sharding_strategy=disabled \
--test_size_filters=small,medium,large \
--run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute \
-- \
//tensorflow/python/kernel_tests/math_ops:tensordot_op_test_gpu \
#//tensorflow/compiler/tests:qr_op_test_gpu \
#//tensorflow/python/kernel_tests/linalg:matrix_solve_ls_op_test_gpu \
#//tensorflow/python/kernel_tests/array_ops:init_ops_test_gpu \
#//tensorflow/compiler/xla/tests:convolution_test_cudnn_frontend_disabled_gpu \
#//tensorflow/compiler/xla/tests:convolution_test_gpu \
#//tensorflow/compiler/xla/tests:convolution_test_gpu_alternative_layout_gpu \
#//tensorflow/compiler/xla/service/gpu/tests:gpu_kernel_tiling_test_gpu \
#//tensorflow/compiler/xla/tests:conv_depthwise_test_gpu \
#//tensorflow/core/grappler/optimizers:remapper_test_gpu \
#//tensorflow/dtensor/python/tests:multi_client_test_2gpus \
#//tensorflow/python/distribute/experimental:multi_worker_mirrored_strategy_test_2gpus \
#//tensorflow/python/grappler:auto_mixed_precision_test_gpu \
#//tensorflow/python/grappler:remapper_test_gpu \



#//tensorflow/... \
#-//tensorflow/python/integration_testing/... \
#-//tensorflow/core/tpu/... \
#-//tensorflow/lite/... \
#-//tensorflow/compiler/tf2tensorrt/... \
#-//tensorflow/dtensor/python/tests:multi_client_test_nccl_2gpus
4 changes: 4 additions & 0 deletions tensorflow/compiler/tests/qr_op_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,18 +114,21 @@ def _test(self, x_np, full_matrices, full_rank=True):
DTYPES = [np.float32, np.complex64]
PARAMS = itertools.product(SIZES, SIZES, DTYPES)

'''
@parameterized.parameters(*PARAMS)
def testQR(self, rows, cols, dtype):
for full_matrices in [True, False]:
# Only tests the (3, 2) case for small numbers of rows/columns.
for batch_dims in [(), (3,)] + [(3, 2)] * (max(rows, cols) < 10):
x_np = self._random_matrix(dtype, batch_dims + (rows, cols))
self._test(x_np, full_matrices)
'''

def testLarge2000x2000(self):
x_np = self._random_matrix(np.float32, (2000, 2000))
self._test(x_np, full_matrices=True)

'''
@unittest.skip("Test times out on CI")
def testLarge17500x128(self):
x_np = self._random_matrix(np.float32, (17500, 128))
Expand All @@ -142,6 +145,7 @@ def testRepeatedColumn(self, rows, cols):
x_np = self._random_matrix(np.complex64, (rows, cols))
x_np[:, 1] = x_np[:, 2]
self._test(x_np, full_matrices=True, full_rank=False)
'''


if __name__ == "__main__":
Expand Down
38 changes: 34 additions & 4 deletions tensorflow/python/kernel_tests/math_ops/tensordot_op_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def _add_test(test, test_name, fn):

class TensordotTest(test_lib.TestCase):

'''
@test_util.run_in_graph_and_eager_modes(use_gpu=True)
def test_invalid_shape(self):
a = [[1, 2], [3, 4]]
Expand Down Expand Up @@ -134,6 +135,7 @@ def test_partial_shape_inference(self):
output_shape = output_shape.as_list()
self.assertEqual(output_shape[0], 2)
self.assertEqual(output_shape[1], None)
'''


def _get_tensordot_tests(dtype_, rank_a_, rank_b_, num_dims_, dynamic_shape_):
Expand All @@ -149,9 +151,21 @@ def _generate_random_tensors_and_dims():
shared_shape = np.random.random_integers(1, _MAXDIM, num_dims_)
a_dims = _random_subset(num_dims_, rank_a_)
b_dims = _random_subset(num_dims_, rank_b_)
'''
print('a_shape=', a_shape)
print('b_shape=', b_shape)
print('shared_shape=', shared_shape)
print('a_dims=', a_dims)
print('b_dims=', b_dims)
'''
for i in range(num_dims_):
a_shape[a_dims[i]] = shared_shape[i]
b_shape[b_dims[i]] = shared_shape[i]
'''
print('Rearranging')
print('a_shape=', a_shape)
print('b_shape=', b_shape)
'''
a = np.random.uniform(
low=-1.0, high=1.0,
size=np.prod(a_shape)).reshape(a_shape).astype(dtype_)
Expand All @@ -160,6 +174,7 @@ def _generate_random_tensors_and_dims():
size=np.prod(b_shape)).reshape(b_shape).astype(dtype_)
return a, b, a_dims, b_dims

'''
@test_util.run_in_graph_and_eager_modes(use_gpu=True)
@test_util.run_without_tensor_float_32("Tests tensordot, which calls matmul")
def test_tensordot(self):
Expand Down Expand Up @@ -191,8 +206,9 @@ def test_tensordot(self):
tf_ans = math_ops.tensordot(a_np, b_np, (a_dims_np, b_dims_np))
self.assertAllClose(tf_ans, np_ans, rtol=tol, atol=tol)
self.assertAllEqual(tf_ans.shape, np_ans.shape)
'''

@test_util.run_in_graph_and_eager_modes(use_gpu=True)
#@test_util.run_in_graph_and_eager_modes(use_gpu=True)
@test_util.run_without_tensor_float_32("Tests tensordot, which calls matmul")
def test_tensordot_scalar_axes(self):
if dynamic_shape_ and context.executing_eagerly():
Expand All @@ -210,11 +226,21 @@ def test_tensordot_scalar_axes(self):
low=-1.0, high=1.0, size=np.prod(shape)).reshape(shape).astype(dtype_)
b_np = np.random.uniform(
low=-1.0, high=1.0, size=np.prod(shape)).reshape(shape).astype(dtype_)
all_axes = [0, 1]
#all_axes = [0, 1]
all_axes = [ 1]
if a_np.ndim > 2:
all_axes.append(a_np.ndim - 1)
with open('/tmp/a.bin', 'wb') as fo:
a_np.tofile(fo)
with open('/tmp/b.bin', 'wb') as fo:
b_np.tofile(fo)
print('a_np.shape=', a_np.shape)
print('b_np.shape=', b_np.shape)
for axes in all_axes:
print('axes=', axes)
np_ans = np.tensordot(a_np, b_np, axes=axes)
with open('/tmp/np_out.bin', 'wb') as fo:
np_ans.tofile(fo)
with self.cached_session() as sess:
if dynamic_shape_:
a = array_ops.placeholder(dtype_)
Expand All @@ -223,10 +249,13 @@ def test_tensordot_scalar_axes(self):
tf_ans = sess.run(c, feed_dict={a: a_np, b: b_np})
else:
tf_ans = math_ops.tensordot(a_np, b_np, axes=axes)
with open('/tmp/tf_out.bin', 'wb') as fo:
tf_ans.numpy().tofile(fo)
self.assertAllClose(tf_ans, np_ans, rtol=tol, atol=tol)
self.assertAllEqual(tf_ans.shape, np_ans.shape)

return [test_tensordot, test_tensordot_scalar_axes]
#return [test_tensordot, test_tensordot_scalar_axes]
return [test_tensordot_scalar_axes]


if __name__ == "__main__":
Expand All @@ -244,5 +273,6 @@ def test_tensordot_scalar_axes(self):
name = "%s_%s_%s_%s_%s_%s" % (testcase.__name__, dtype.__name__,
rank_a, rank_b, num_dims,
dynamic_shape)
_add_test(TensordotTest, name, testcase)
if dtype == np.float64 and rank_a == 5 and rank_b == 5 and num_dims == 5 and dynamic_shape == False:
_add_test(TensordotTest, name, testcase)
test_lib.main()
8 changes: 8 additions & 0 deletions tensorflow/python/ops/math_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -5238,15 +5238,23 @@ def _tensordot_axes(a, axes):
a_reshape, a_free_dims, a_free_dims_static = _tensordot_reshape(a, a_axes)
b_reshape, b_free_dims, b_free_dims_static = _tensordot_reshape(
b, b_axes, True)
print('a_reshape.shape=', a_reshape.shape)
print('a_free_dims=', a_free_dims)
print('a_free_dims_static=', a_free_dims_static)
print('b_reshape.shape=', b_reshape.shape)
print('b_free_dims=', b_free_dims)
print('b_free_dims_static=', b_free_dims_static)
ab_matmul = matmul(a_reshape, b_reshape)
if isinstance(a_free_dims, list) and isinstance(b_free_dims, list):
print('If path')
if (ab_matmul.get_shape().is_fully_defined() and
ab_matmul.get_shape().as_list() == a_free_dims + b_free_dims):
return ab_matmul
else:
return array_ops.reshape(
ab_matmul, a_free_dims + b_free_dims, name=name)
else:
print('Else path')
a_free_dims = ops.convert_to_tensor(a_free_dims, dtype=dtypes.int32)
b_free_dims = ops.convert_to_tensor(b_free_dims, dtype=dtypes.int32)
product = array_ops.reshape(
Expand Down

0 comments on commit 0b98bd9

Please sign in to comment.