diff --git a/README.md b/README.md index 2c42c48..41798c1 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,16 @@ optimised TensorFlow wheel with TensorRT support. Each release page also has the checksums of the attached files. +## `manylinux_2_28_x86_64` wheels +The following wheels were compiles in the `manylinux_2_28_x86_64` +container described below. These are required to have more recent CUDA +compatibility since Nvidia stopped releasing updates for CentOS 7 based +releases. + +| TF | Python | GPU | CUDA | cuDNN | AVX2 | MKL/oneDNN | TensorRT | Links | +|-|-|-|-|-|-|-|-|-| +| 2.16.2 | 3.9-3.12 | 5.2-9.0 | 12.6 | 9.4 | :heavy_check_mark: | :heavy_check_mark: | 10.4 | [Release](https://github.com/agkphysics/tensorflow-wheels/releases/tag/tf_gpu_cuda12.6_cudnn9.4_avx2_mkl_trt10.4) | + ## `manylinux2014_x86_64` wheels The following wheels were compiled in the `manylinux2014_x86_64` container described below. These should have better glibc @@ -39,14 +49,13 @@ The following wheels were compiled on an Ubuntu 20.04 system | 1.15.0 | 3.8 | 7.x | 10.2 | 7 | :heavy_check_mark: | :heavy_check_mark: | 6 | [Release](https://github.com/agkphysics/tensorflow-wheels/releases/tag/tf_1.15.0_gpu_cm7x_cuda102_cudnn7_avx2_mkl_trt6) | | 1.14.1 | 3.8 | 7.x | 10.2 | 7 | :heavy_check_mark: | :heavy_check_mark: | 6 | [Release](https://github.com/agkphysics/tensorflow-wheels/releases/tag/tf_1.14.1_gpu_cm7x_cuda102_cudnn7_avx2_mkl_trt6) | -## `manylinux2014_x86_64` Docker container -The [Dockerfile](./docker/Dockerfile) is based on `manylinux2014_x86_64` -and can be built with the following command, from within the `docker/` -directory: +## Docker containers +The Dockerfiles under `docker/` are based on `manylinux*` and can be +built with the following command, from within the `docker/` directory: ``` -docker build -t tf_build . +docker build -t tf_build -f Dockerfile. . ``` -The container can be run like so: +The container can then be run like so: ``` docker run --gpus all -it --rm --tmpfs /tmp:exec -v /path/to/tensorflow:/build -u $(id -u):$(id -g) -e USER=$(id -u) tf_build ``` @@ -54,7 +63,7 @@ docker run --gpus all -it --rm --tmpfs /tmp:exec -v /path/to/tensorflow:/build - Then, you can run the build script: ``` cd /path/to/tensorflow -bash build-tf2-gpu-avx2-mkl.sh -p 11 +bash build-tf2-gpu-avx2-mkl.sh -p 12 ``` The other scripts assume a directory structure as follows: @@ -62,8 +71,10 @@ The other scripts assume a directory structure as follows: .../ tensorflow/ build-tf2-gpu-avx2-mkl.sh - keras/ - build-keras.sh + text/ + build-text.sh + io/ + build-io.sh wheels/ tensorflow/ *.whl diff --git a/build-io.sh b/build-io.sh new file mode 100755 index 0000000..799d2d9 --- /dev/null +++ b/build-io.sh @@ -0,0 +1,60 @@ +#!/bin/sh + +set -e + +usage() { + echo "Usage: $0 -p -t " + echo " -p Python version to use (6-12)" + echo " -t TensorFlow version to use (2.3.0, 2.4.0, etc.)" +} + +if [ $# -lt 4 ]; then + usage + exit +fi + +while getopts "ht:p:" opt; do + case $opt in + p) + py3_ver=$OPTARG + ;; + t) + tf_ver=$OPTARG + ;; + h) + usage + exit + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + usage + exit + ;; + esac +done +if [ "$py3_ver" -lt 6 ] || [ "$py3_ver" -gt 12 ]; then + echo "Python version must be between 6 and 12" + usage + exit +fi + +rm -rf venvs +python3.$py3_ver -m venv venvs/py3$py3_ver +. venvs/py3${py3_ver}/bin/activate + +# Get most recent wheel +tf_wheel=$(ls ../wheels/tensorflow/tensorflow-${tf_ver}-cp3${py3_ver}-*-linux_x86_64.whl) +echo "Installing TensorFlow wheel $tf_wheel" +pip install -q "$tf_wheel" +pip uninstall -q -y tensorflow-io tensorflow-io-gcs-filesystem + +bazel clean --expunge +python tools/build/configure.py +bazel build --config=linux --config=optimization --copt="-mavx" --copt="-mavx2" --copt="-Wno-error=dangling-pointer=" --copt="-Wno-error=array-bounds=" --copt="-Wno-error=array-parameter=" --copt="-I/usr/include/tirpc" //tensorflow_io/... //tensorflow_io_gcs_filesystem/... +python setup.py bdist_wheel --data bazel-bin --project tensorflow-io +python setup.py bdist_wheel --data bazel-bin --project tensorflow-io-gcs-filesystem +mv dist/tensorflow_io*.whl ../wheels/io +bazel clean --expunge + +deactivate +rm -rf venvs diff --git a/build-keras.sh b/build-keras.sh deleted file mode 100755 index 7e847e0..0000000 --- a/build-keras.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/sh - -set -e - -usage() { - echo "Usage: $0 -p -t " - echo " -p Python version to use (6-11)" - echo " -t TensorFlow version to use (2.3.0, 2.4.0, etc.)" -} - -if [ $# -lt 4 ]; then - usage - exit -fi - -while getopts "ht:p:" opt; do - case $opt in - p) - py3_ver=$OPTARG - ;; - t) - tf_ver=$OPTARG - ;; - h) - usage - exit - ;; - \?) - echo "Invalid option: -$OPTARG" >&2 - usage - exit - ;; - esac -done -if [ "$py3_ver" -lt 6 ] || [ "$py3_ver" -gt 11 ]; then - echo "Python version must be between 6 and 11" - usage - exit -fi - -rm -rf venvs -python3.$py3_ver -m venv venvs/py3$py3_ver -. venvs/py3${py3_ver}/bin/activate - -# Get most recent wheel -tf_wheel=$(ls ../wheels/tensorflow/tensorflow-${tf_ver}-cp3${py3_ver}-*-linux_x86_64.whl) -echo "Installing TensorFlow wheel $tf_wheel" -pip install -q "$tf_wheel" -pip uninstall -q -y keras - -bazel clean --expunge -bazel build //keras/tools/pip_package:build_pip_package -./bazel-bin/keras/tools/pip_package/build_pip_package ../wheels/keras -bazel clean --expunge - -deactivate -rm -rf venvs diff --git a/build-text.sh b/build-text.sh new file mode 100755 index 0000000..8f339e3 --- /dev/null +++ b/build-text.sh @@ -0,0 +1,57 @@ +#!/bin/sh + +set -e + +usage() { + echo "Usage: $0 -p -t " + echo " -p Python version to use (6-12)" + echo " -t TensorFlow version to use (2.3.0, 2.4.0, etc.)" +} + +if [ $# -lt 4 ]; then + usage + exit +fi + +while getopts "ht:p:" opt; do + case $opt in + p) + py3_ver=$OPTARG + ;; + t) + tf_ver=$OPTARG + ;; + h) + usage + exit + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + usage + exit + ;; + esac +done +if [ "$py3_ver" -lt 6 ] || [ "$py3_ver" -gt 12 ]; then + echo "Python version must be between 6 and 12" + usage + exit +fi + +rm -rf venvs +python3.$py3_ver -m venv venvs/py3$py3_ver +. venvs/py3${py3_ver}/bin/activate + +# Get most recent wheel +tf_wheel=$(ls ../wheels/tensorflow/tensorflow-${tf_ver}-cp3${py3_ver}-*-linux_x86_64.whl) +echo "Installing TensorFlow wheel $tf_wheel" +pip install -q --ignore-requires-python "$tf_wheel" +pip uninstall -q -y tensorflow-text + +bazel clean --expunge +./oss_scripts/run_build.sh +mv tensorflow_text-*.whl ../wheels/text +bazel clean --expunge + +deactivate +rm -rf venvs diff --git a/build-tf2-gpu-avx2-mkl.sh b/build-tf2-gpu-avx2-mkl.sh index 4a97a9f..7d10981 100755 --- a/build-tf2-gpu-avx2-mkl.sh +++ b/build-tf2-gpu-avx2-mkl.sh @@ -1,11 +1,12 @@ -#!/bin/sh +#!/bin/bash set -e usage() { echo "Usage: $0 -p [-b]" - echo " -p Python version to use (6-11)" - echo " -b Warm build (don't clean)" + echo " -p Python version to use (8-12)" + echo " -b Warm build (don't clean)" + echo " -d Debug mode" } if [ $# -lt 2 ]; then @@ -13,7 +14,7 @@ if [ $# -lt 2 ]; then exit fi -while getopts "hbp:" opt; do +while getopts "hbdp:" opt; do case $opt in p) py3_ver=$OPTARG @@ -21,6 +22,9 @@ while getopts "hbp:" opt; do b) warm_build=1 ;; + d) + debug_mode=1 + ;; h) usage exit @@ -32,12 +36,13 @@ while getopts "hbp:" opt; do ;; esac done -if [ "$py3_ver" -lt 6 ] || [ "$py3_ver" -gt 11 ]; then - echo "Python version must be between 6 and 11" +if [ "$py3_ver" -lt 8 ] || [ "$py3_ver" -gt 12 ]; then + echo "Python version must be between 6 and 12" usage exit fi +echo "Using Python 3.$py3_ver environment" if [ "$warm_build" = 1 ]; then source venvs/py3${py3_ver}/bin/activate else @@ -45,18 +50,10 @@ else python3.${py3_ver} -m venv venvs/py3${py3_ver} source venvs/py3${py3_ver}/bin/activate pip install -q -r py_build_reqs.txt - _tag=$(git describe --tags) - tf_ver=$(echo "$_tag" | sed -n -E -e 's/^v2\.([0-9]+).*/\1/p') - if [ "$tf_ver" = "7" ]; then - pip install -q keras-preprocessing - fi fi -PYTHON_BIN_PATH=$(which python) -export PYTHON_BIN_PATH -export USE_DEFAULT_PYTHON_LIB_PATH=1 export TF_NEED_JEMALLOC=1 -export TF_NEED_KAFKA=0 +export TF_NEED_KAFKA=1 export TF_NEED_OPENCL_SYCL=0 export TF_NEED_OPENCL=0 export TF_NEED_AWS=1 @@ -68,15 +65,19 @@ export TF_NEED_GDR=0 export TF_NEED_VERBS=0 export TF_NEED_MPI=0 export TF_NEED_TENSORRT=1 +_tensorrt_maj=$(sed -n -E -e 's/^#define NV_TENSORRT_MAJOR\s+([0-9]+).*/\1/p' /usr/include/NvInferVersion.h) +_tensorrt_min=$(sed -n -E -e 's/^#define NV_TENSORRT_MINOR\s+([0-9]+).*/\1/p' /usr/include/NvInferVersion.h) +export TF_TENSORRT_VERSION=${_tensorrt_maj}.${_tensorrt_min} export TF_NEED_NGRAPH=0 export TF_NEED_IGNITE=0 export TF_NEED_ROCM=0 +export TF_NEED_CLANG=1 +export CLANG_COMPILER_PATH=/usr/bin/clang export TF_SET_ANDROID_WORKSPACE=0 export TF_DOWNLOAD_CLANG=0 -_nccl_maj=$(sed -n -E -e 's/^#define NCCL_MAJOR\s*(.*).*/\1/p' /usr/include/nccl.h) -_nccl_min=$(sed -n -E -e 's/^#define NCCL_MINOR\s*(.*).*/\1/p' /usr/include/nccl.h) +_nccl_maj=$(sed -n -E -e 's/^#define NCCL_MAJOR\s+([0-9]+).*/\1/p' /usr/include/nccl.h) +_nccl_min=$(sed -n -E -e 's/^#define NCCL_MINOR\s+([0-9]+).*/\1/p' /usr/include/nccl.h) export TF_NCCL_VERSION="${_nccl_maj}.${_nccl_min}" -export TF_IGNORE_MAX_BAZEL_VERSION=1 export NCCL_INSTALL_PATH=/usr GCC_HOST_COMPILER_PATH=$(which gcc) export GCC_HOST_COMPILER_PATH @@ -93,13 +94,24 @@ export TF_CUDA_VERSION TF_CUDNN_VERSION=$(sed -n -E -e 's/^#define CUDNN_MAJOR\s*(.*).*/\1/p' /usr/include/cudnn_version.h) export TF_CUDNN_VERSION export TF_CUDA_COMPUTE_CAPABILITIES=sm_52,sm_53,sm_60,sm_61,sm_62,sm_70,sm_72,sm_75,sm_80,sm_86,sm_87,sm_89,sm_90,compute_90 -export CC_OPT_FLAGS="-march=haswell -O3" +export TF_PYTHON_VERSION=3.${py3_ver} +echo "TF_PYTHON_VERSION=$TF_PYTHON_VERSION" +PYTHON_BIN_PATH=$(which python) +export PYTHON_BIN_PATH +echo "PYTHON_BIN_PATH=$PYTHON_BIN_PATH" +export USE_DEFAULT_PYTHON_LIB_PATH=1 +export CC_OPT_FLAGS="-march=haswell -mavx2 -O3" + +echo $(python --version) if [ "$warm_build" != 1 ]; then bazel clean --expunge fi +if [ "$debug_mode" = 1 ]; then + bazel_opts=(-s) +fi ./configure -bazel build --config=mkl --config=avx2_linux -c opt //tensorflow/tools/pip_package:build_pip_package +bazel build "${bazel_opts[@]}" --verbose_failures --config=avx_linux -c opt //tensorflow/tools/pip_package:build_pip_package bazel-bin/tensorflow/tools/pip_package/build_pip_package ../wheels/tensorflow if [ "$warm_build" != 1 ]; then diff --git a/docker/Dockerfile b/docker/Dockerfile.manylinux2014_x86_64 similarity index 79% rename from docker/Dockerfile rename to docker/Dockerfile.manylinux2014_x86_64 index 727ce23..75b031c 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile.manylinux2014_x86_64 @@ -2,9 +2,9 @@ FROM quay.io/pypa/manylinux2014_x86_64 RUN yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo \ && yum clean all \ - && yum -y install cuda-12-2 libcudnn8-devel tensorrt-devel libnccl-devel + && yum -y install cuda-12-4 libcudnn9-devel tensorrt-devel libnccl-devel ENV PATH=/usr/local/cuda/bin:$PATH -RUN curl -L "https://github.com/bazelbuild/bazelisk/releases/download/v1.18.0/bazelisk-linux-amd64" -o /usr/local/bin/bazel \ +RUN curl -L "https://github.com/bazelbuild/bazelisk/releases/download/v1.20.0/bazelisk-linux-amd64" -o /usr/local/bin/bazel \ && chmod +x /usr/local/bin/bazel \ && echo "startup --output_user_root=/tmp/bazel" > /etc/bazel.bazelrc ENV BAZELISK_HOME=/tmp/bazelisk USER=build diff --git a/docker/Dockerfile.manylinux_2_28_x86_64 b/docker/Dockerfile.manylinux_2_28_x86_64 new file mode 100644 index 0000000..7e5e336 --- /dev/null +++ b/docker/Dockerfile.manylinux_2_28_x86_64 @@ -0,0 +1,12 @@ +FROM quay.io/pypa/manylinux_2_28_x86_64 + +RUN dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo \ + && dnf clean all \ + && dnf -y install cuda-toolkit-12-6 libcudnn9-devel tensorrt-devel libnccl-devel clang libtirpc-devel +RUN curl -L "https://github.com/bazelbuild/bazelisk/releases/download/v1.21.0/bazelisk-linux-amd64" -o /usr/local/bin/bazel \ + && chmod +x /usr/local/bin/bazel \ + && echo "startup --output_user_root=/tmp/bazel" > /etc/bazel.bazelrc +ENV BAZELISK_HOME=/tmp/bazelisk USER=build + +WORKDIR /build +CMD ["/bin/bash"] diff --git a/patches/full/tf2.16.2_py3.11.patch b/patches/full/tf2.16.2_py3.11.patch new file mode 100644 index 0000000..803ec74 --- /dev/null +++ b/patches/full/tf2.16.2_py3.11.patch @@ -0,0 +1,2970 @@ +diff --git a/tensorflow/api_template.__init__.py b/tensorflow/api_template.__init__.py +index a0f88926b5e..9c0bb098feb 100644 +--- a/tensorflow/api_template.__init__.py ++++ b/tensorflow/api_template.__init__.py +@@ -27,7 +27,6 @@ this file with a file generated from [`api_template.__init__.py`](https://www.gi + """ + # pylint: disable=g-bad-import-order,protected-access,g-import-not-at-top + +-import distutils as _distutils + import importlib + import inspect as _inspect + import os as _os +@@ -95,10 +94,10 @@ _site_packages_dirs = [] + if _site.ENABLE_USER_SITE and _site.USER_SITE is not None: + _site_packages_dirs += [_site.USER_SITE] + _site_packages_dirs += [p for p in _sys.path if "site-packages" in p] +-if "getsitepackages" in dir(_site): ++try: + _site_packages_dirs += _site.getsitepackages() +- +-if "sysconfig" in dir(_distutils): ++except AttributeError: ++ import distutils as _distutils + _site_packages_dirs += [_distutils.sysconfig.get_python_lib()] + + _site_packages_dirs = list(set(_site_packages_dirs)) +diff --git a/tensorflow/api_template_v1.__init__.py b/tensorflow/api_template_v1.__init__.py +index 6a4ab4e655f..d6f8f2e0441 100644 +--- a/tensorflow/api_template_v1.__init__.py ++++ b/tensorflow/api_template_v1.__init__.py +@@ -14,7 +14,6 @@ + # ============================================================================== + """Bring in all of the public TensorFlow interface into this module.""" + +-import distutils as _distutils + import importlib + import inspect as _inspect + import os as _os +@@ -144,10 +143,10 @@ from tensorflow.python.lib.io import file_io as _fi + _site_packages_dirs = [] + _site_packages_dirs += [] if _site.USER_SITE is None else [_site.USER_SITE] + _site_packages_dirs += [p for p in _sys.path if "site-packages" in p] +-if "getsitepackages" in dir(_site): ++try: + _site_packages_dirs += _site.getsitepackages() +- +-if "sysconfig" in dir(_distutils): ++except AttributeError: ++ import distutils as _distutils + _site_packages_dirs += [_distutils.sysconfig.get_python_lib()] + + _site_packages_dirs = list(set(_site_packages_dirs)) +diff --git a/tensorflow/compiler/tf2tensorrt/BUILD b/tensorflow/compiler/tf2tensorrt/BUILD +index 91ef722b52d..a0d8e9736ad 100644 +--- a/tensorflow/compiler/tf2tensorrt/BUILD ++++ b/tensorflow/compiler/tf2tensorrt/BUILD +@@ -531,6 +531,7 @@ tf_cuda_library( + hdrs = ["utils/trt_allocator.h"], + features = ["-layering_check"], + deps = [ ++ ":common_utils", + "//tensorflow/core:framework_headers_lib", + "//tensorflow/core:framework_lite", + "//tensorflow/core:lib_proto_parsing", +diff --git a/tensorflow/compiler/tf2tensorrt/common/utils.cc b/tensorflow/compiler/tf2tensorrt/common/utils.cc +index 26ac37b237b..bca157b43bc 100644 +--- a/tensorflow/compiler/tf2tensorrt/common/utils.cc ++++ b/tensorflow/compiler/tf2tensorrt/common/utils.cc +@@ -41,11 +41,19 @@ std::tuple GetLinkedTensorRTVersion() { + + std::tuple GetLoadedTensorRTVersion() { + #if GOOGLE_CUDA && GOOGLE_TENSORRT ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + int ver = getInferLibVersion(); + int major = ver / 1000; + ver = ver - major * 1000; + int minor = ver / 100; + int patch = ver - minor * 100; ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ // Note: The above logic using getInferLibVersion() produces the wrong version ++ // numbers since TensorRT 10.0, so these new functions must be used instead. ++ int major = getInferLibMajorVersion(); ++ int minor = getInferLibMinorVersion(); ++ int patch = getInferLibPatchVersion(); ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + return std::tuple{major, minor, patch}; + #else + return std::tuple{0, 0, 0}; +@@ -59,6 +67,7 @@ std::tuple GetLoadedTensorRTVersion() { + namespace tensorflow { + namespace tensorrt { + ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + Status GetTrtBindingIndex(const char* tensor_name, int profile_index, + const nvinfer1::ICudaEngine* cuda_engine, + int* binding_index) { +@@ -93,6 +102,11 @@ Status GetTrtBindingIndex(int network_input_index, int profile_index, + return GetTrtBindingIndex(input_name.c_str(), profile_index, cuda_engine, + binding_index); + } ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++string GetTrtInputName(int network_input_index) { ++ return absl::StrCat(IONamePrefixes::kInputPHName, network_input_index); ++} ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + + namespace { + +@@ -234,6 +248,19 @@ std::ostream& operator<<(std::ostream& os, const nvinfer1::DataType& v) { + os << "kUINT8"; + break; + #endif ++ ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ case nvinfer1::DataType::kBF16: ++ os << "kBF16"; ++ break; ++ case nvinfer1::DataType::kINT64: ++ os << "kINT64"; ++ break; ++ case nvinfer1::DataType::kINT4: ++ os << "kINT4"; ++ break; ++#endif ++ + } + return os; + } +diff --git a/tensorflow/compiler/tf2tensorrt/common/utils.h b/tensorflow/compiler/tf2tensorrt/common/utils.h +index 0bc63ecd5c2..5c5f298051d 100644 +--- a/tensorflow/compiler/tf2tensorrt/common/utils.h ++++ b/tensorflow/compiler/tf2tensorrt/common/utils.h +@@ -102,6 +102,7 @@ class IONamePrefixes { + static constexpr const char* const kOutputPHName = "TensorRTOutputPH_"; + }; + ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + // Gets the binding index of a tensor in an engine. + // + // The binding index is looked up using the tensor's name and the profile index. +@@ -116,6 +117,9 @@ Status GetTrtBindingIndex(const char* tensor_name, int profile_index, + Status GetTrtBindingIndex(int network_input_idx, int profile_index, + const nvinfer1::ICudaEngine* cuda_engine, + int* binding_index); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++string GetTrtInputName(int network_input_index); ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + } // namespace tensorrt + } // namespace tensorflow + +diff --git a/tensorflow/compiler/tf2tensorrt/convert/algorithm_selector.cc b/tensorflow/compiler/tf2tensorrt/convert/algorithm_selector.cc +index 5421e7bb46b..10e99b68412 100644 +--- a/tensorflow/compiler/tf2tensorrt/convert/algorithm_selector.cc ++++ b/tensorflow/compiler/tf2tensorrt/convert/algorithm_selector.cc +@@ -53,9 +53,16 @@ std::ostream& operator<<(std::ostream& os, const nvinfer1::IAlgorithm& alg) { + + std::ostream& operator<<(std::ostream& os, + const nvinfer1::IAlgorithmIOInfo& info) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + os << "IOTensor(format=" << info.getTensorFormat() + << ",dtype=" << info.getDataType() << ",strides=" << info.getStrides() + << ")"; ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ os << "IOTensor(dtype=" << info.getDataType() ++ << ",strides=" << info.getStrides() ++ << ",vectorized_dim=" << info.getVectorizedDim() ++ << ",vectorized_components=" << info.getComponentsPerElement() << ")"; ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + return os; + } + } // namespace nvinfer1 +@@ -200,11 +207,13 @@ bool TftrtAlgorithmSelector::AlgorithmPolicy( + return false; + } + ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + if (selector_.IsShuffleLayer(variant.getImplementation())) { + return selector_.AllowShuffleAlgorithm( + tactic_id, alg.getAlgorithmIOInfo(0).getDataType(), + alg.getAlgorithmIOInfo(0).getTensorFormat()); + } ++#endif // !IS_TRT_VERSION_GE(10, 0, 0, 0) + return true; + } + +diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc +index e809152c1e7..9f26e73ecbd 100644 +--- a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc ++++ b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc +@@ -713,13 +713,16 @@ Status CreateStaticEngine(const TRTOptimizationPass::ConversionParams& params, + (info.precision_mode == TrtPrecisionMode::INT8 && info.use_calibration); + + // Create static engines with precision_mode fp32/fp16. ++ TrtUniquePtrType infer( ++ nvinfer1::createInferRuntime(*trt_logger)); + TrtUniquePtrType engine; + TF_RETURN_IF_ERROR(ConvertGraphDefToEngine( + info.segment_graph_def, nullptr, + calibrate_int8 ? TrtPrecisionMode::FP32 : info.precision_mode, + max_batch_size, info.max_workspace_size_bytes, input_shapes, trt_logger, +- trt_allocator.get(), /*calibrator=*/nullptr, &engine, +- info.use_calibration, params.use_implicit_batch, ++ trt_allocator.get(), infer.get(), ++ /*calibrator=*/nullptr, &engine, info.use_calibration, ++ params.use_implicit_batch, + /*convert_successfully=*/nullptr, profile, info.engine_name, + /*use_explicit_precision=*/params.use_explicit_precision, cluster)); + TrtUniquePtrType engine_data(engine->serialize()); +diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc +index 1c3a1903477..31d78e4db0e 100644 +--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc ++++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc +@@ -108,7 +108,9 @@ namespace { + const char* LayerTypeToString(nvinfer1::LayerType layer_type) { + switch (layer_type) { + ADD_LAYER(CONVOLUTION) ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + ADD_LAYER(FULLY_CONNECTED) ++#endif + ADD_LAYER(ACTIVATION) + ADD_LAYER(POOLING) + ADD_LAYER(LRN) +@@ -130,7 +132,9 @@ const char* LayerTypeToString(nvinfer1::LayerType layer_type) { + ADD_LAYER(MATRIX_MULTIPLY) + ADD_LAYER(RAGGED_SOFTMAX) + ADD_LAYER(CONSTANT) ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + ADD_LAYER(RNN_V2) ++#endif + ADD_LAYER(IDENTITY) + ADD_LAYER(PLUGIN_V2) + ADD_LAYER(SLICE) +@@ -1082,9 +1086,13 @@ Status Converter::Init(nvinfer1::ILogger* trt_logger) { + : (1U << static_cast( + nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH)); + if (use_explicit_precision_) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + flags |= + (1U << static_cast( + nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_PRECISION)); ++#else ++ return errors::Internal("Explicit precision is not supported since TensorRT 10"); ++#endif + } + trt_network_.reset(trt_builder_->createNetworkV2(flags)); + if (!trt_network_) { +@@ -1252,7 +1260,8 @@ bool AbortCudaEngineBuild() { + Status Converter::BuildCudaEngine( + TrtUniquePtrType* engine, int max_batch_size, + size_t max_workspace_size_bytes, nvinfer1::IGpuAllocator* allocator, +- TRTInt8Calibrator* calibrator, TrtShapeOptimizationProfile* profiles) { ++ nvinfer1::IRuntime* runtime, TRTInt8Calibrator* calibrator, ++ TrtShapeOptimizationProfile* profiles) { + tensorflow::profiler::AnnotatedTraceMe activity( + [&]() { + return tensorflow::profiler::TraceMeOpOverride("TRTEngineOp", +@@ -1266,13 +1275,20 @@ Status Converter::BuildCudaEngine( + } + + VLOG(1) << "Configuring TensorRT builder"; ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + trt_builder_->setMaxBatchSize(max_batch_size); ++#endif + trt_builder_->setGpuAllocator(allocator); + + // Create a network configuration and use it to build a TRT engine. + TrtUniquePtrType builder_config( + trt_builder_->createBuilderConfig()); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + builder_config->setMaxWorkspaceSize(max_workspace_size_bytes); ++#else ++ builder_config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, ++ max_workspace_size_bytes); ++#endif + + // Create the algorithm selector. For TensorRT 7.x, the algorithm selector + // cannot be used when building with INT8 calibration. +@@ -1429,23 +1445,45 @@ Status Converter::BuildCudaEngine( + } + } + } ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + engine->reset( + trt_builder_->buildEngineWithConfig(*network(), *builder_config)); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ TrtUniquePtrType serialized( ++ trt_builder_->buildSerializedNetwork(*network(), *builder_config)); ++ if (!serialized) return errors::Internal("Failed to build TensorRT serialized network"); ++ engine->reset( ++ runtime->deserializeCudaEngine(serialized->data(), serialized->size())); ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + if (engine->get() == nullptr) { + return errors::Internal("Failed to build TensorRT engine"); + } + if (VLOG_IS_ON(2)) { + VLOG(2) << "TRT engine created"; ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + int nbBindings = (*engine)->getNbBindings(); ++#else ++ int nbBindings = (*engine)->getNbIOTensors(); ++#endif + VLOG(2) << "Number of engine bindings: " << nbBindings; + for (int i = 0; i < nbBindings; i++) { + auto get_location_string = [&engine](int i) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + if ((*engine)->getLocation(i) == nvinfer1::TensorLocation::kDEVICE) ++#else ++ if ((*engine)->getTensorLocation((*engine)->getIOTensorName(i)) == ++ nvinfer1::TensorLocation::kDEVICE) ++#endif + return " on device"; + else + return " on host"; + }; +- VLOG(2) << "Binding " << i << " name: " << (*engine)->getBindingName(i) ++ VLOG(2) << "Binding " << i << " name: " ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ << (*engine)->getBindingName(i) ++#else ++ << (*engine)->getIOTensorName(i) ++#endif + << get_location_string(i); + } + } +@@ -2060,11 +2098,19 @@ Status ConvertConv2DHelper(const OpConverterParams* params, int group, + nvinfer1::ILayer* conv_layer = nullptr; + if (is_conv2d_backprop_input) { + nvinfer1::IDeconvolutionLayer* layer = ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + params->converter->network()->addDeconvolution( ++#else ++ params->converter->network()->addDeconvolutionNd( ++#endif + *tensor->trt_tensor(), noutput, kernel_size, + weights->GetTrtWeights(), biases->GetTrtWeights()); + TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + layer->setStride(stride); ++#else ++ layer->setStrideNd(stride); ++#endif + // VALID padding is the default TRT behavior. + if (padding_type == "SAME") { + // SAME_UPPER means that post padding is preferred. +@@ -2076,18 +2122,30 @@ Status ConvertConv2DHelper(const OpConverterParams* params, int group, + const nvinfer1::Weights empty_weights{nvinfer1::DataType::kFLOAT, nullptr, + 0}; + nvinfer1::IConvolutionLayer* layer = ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + params->converter->network()->addConvolution( ++#else ++ params->converter->network()->addConvolutionNd( ++#endif + *tensor->trt_tensor(), noutput, kernel_size, + params->use_explicit_precision ? empty_weights + : weights->GetTrtWeights(), + empty_weights); + TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + layer->setStride(stride); ++#else ++ layer->setStrideNd(stride); ++#endif + if (padding_type == "SAME") { + layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER); + } + layer->setNbGroups(num_groups); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + layer->setDilation(dilation); ++#else ++ layer->setDilationNd(dilation); ++#endif + conv_layer = layer; + } + +@@ -2136,8 +2194,12 @@ Status ConvertConv2DHelper(const OpConverterParams* params, int group, + nvinfer1::DimsHW pre_padding(0, 0); + nvinfer1::DimsHW post_padding(height_diff, width_diff); + nvinfer1::IPaddingLayer* padding_layer = +- params->converter->network()->addPadding(*output_tensor->trt_tensor(), +- pre_padding, post_padding); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ params->converter->network()->addPadding( ++#else ++ params->converter->network()->addPaddingNd( ++#endif ++ *output_tensor->trt_tensor(), pre_padding, post_padding); + output_tensor = padding_layer->getOutput(0); + params->converter->SetLayerName(padding_layer, node_def, "pad"); + } +@@ -2212,6 +2274,11 @@ Status ConvertTranspose(const OpConverterParams* params) { + + Status ConvertShape(const OpConverterParams* params) { + const auto& inputs = params->inputs; ++ const auto& node_def = params->node_def; ++ DataType out_type; ++ TF_RETURN_IF_ERROR(GetNodeAttr(AttrSlice(node_def), "out_type", &out_type)); ++ nvinfer1::DataType trt_out_type; ++ TF_RETURN_IF_ERROR(TfTypeToTrtType(out_type, &trt_out_type)); + TF_RETURN_IF_ERROR( + CheckInputsWeights(*params, {{"input", TrtInputArg::kBoth}})); + if (params->use_implicit_batch) { +@@ -2224,20 +2291,27 @@ Status ConvertShape(const OpConverterParams* params) { + StatusOr builder = TRTNetworkBuilder::Create( + params->converter->network(), params->weight_store); + TRT_ENSURE_OK(builder); ++ nvinfer1::ITensor* out_tensor; + if (input_dims.IsStatic()) { + // Create a const node with the value of the shape. + StatusOr const_layer = + builder->ConstantShape(input_dims); + TRT_ENSURE_PTR_OK(const_layer); +- params->outputs->push_back( +- TRT_TensorOrWeights((*const_layer)->getOutput(0))); +- return OkStatus(); +- } +- StatusOr shape_layer = +- builder->Shape(inputs.at(0).tensor()->trt_tensor()); +- TRT_ENSURE_PTR_OK(shape_layer); +- params->converter->SetLayerName(*shape_layer, params->node_def, "shape"); +- params->outputs->push_back(TRT_TensorOrWeights((*shape_layer)->getOutput(0))); ++ out_tensor = (*const_layer)->getOutput(0); ++ } else { ++ StatusOr shape_layer = ++ builder->Shape(inputs.at(0).tensor()->trt_tensor()); ++ TRT_ENSURE_PTR_OK(shape_layer); ++ params->converter->SetLayerName(*shape_layer, params->node_def, "shape"); ++ out_tensor = (*shape_layer)->getOutput(0); ++ } ++ if (out_tensor->getType() != trt_out_type) { ++ nvinfer1::ICastLayer* cast_layer = ++ params->converter->network()->addCast(*out_tensor, trt_out_type); ++ TRT_ENSURE(cast_layer); ++ out_tensor = cast_layer->getOutput(0); ++ } ++ params->outputs->push_back(TRT_TensorOrWeights(out_tensor)); + return OkStatus(); + } + +@@ -2430,6 +2504,14 @@ Status Converter::DynamicReshape(ITensorProxyPtr input, + } + ITensorProxyPtr shape = + network()->addShape(*input->trt_tensor())->getOutput(0); ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ // TODO(benbarsdell): Casting to int32 makes this match the pre-TRT10 ++ // behavior, but it would be better to instead cast all the other int32 ++ // tensors below to int64. ++ shape = network() ++ ->addCast(*shape->trt_tensor(), nvinfer1::DataType::kINT32) ++ ->getOutput(0); ++#endif + // Build new shape = shape[:trt_axis] + [1] + shape[trt_axis:] + std::vector concat_inputs; + int max_num_slices = std::max(slices.size(), size_for_added_dims.size()); +@@ -3266,7 +3348,11 @@ Status ConvertFusedConv2DBiasActivation(const OpConverterParams* params) { + nvinfer1::IConvolutionLayer* conv_layer = nullptr; + if (filter_format == "OIHW") { + // Weights are already in the right order. ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + conv_layer = params->converter->network()->addConvolution( ++#else ++ conv_layer = params->converter->network()->addConvolutionNd( ++#endif + *tensor->trt_tensor(), weights.Shape().dim(0), kernel_size, + weights.GetTrtWeights(), biases.GetTrtWeights()); + } else { +@@ -3276,18 +3362,30 @@ Status ConvertFusedConv2DBiasActivation(const OpConverterParams* params) { + params->weight_store->GetTempWeights(weights); + TRT_ENSURE_OK(weights_kcrs); + ReorderRSCKToKCRS(weights, &*weights_kcrs, 1); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + conv_layer = params->converter->network()->addConvolution( ++#else ++ conv_layer = params->converter->network()->addConvolutionNd( ++#endif + *tensor->trt_tensor(), weights.Shape().dim(3), kernel_size, + weights_kcrs->GetTrtWeights(), biases.GetTrtWeights()); + } + TFTRT_RETURN_ERROR_IF_NULLPTR(conv_layer, node_def.name()); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + conv_layer->setStride(stride); ++#else ++ conv_layer->setStrideNd(stride); ++#endif + if (padding_type == "SAME") { + conv_layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER); + } + params->converter->SetLayerName(conv_layer, node_def, "conv"); + conv_layer->setNbGroups(1); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + conv_layer->setDilation(dilation); ++#else ++ conv_layer->setDilationNd(dilation); ++#endif + ITensorProxyPtr output_tensor = conv_layer->getOutput(0); + + // Add activation if there is one. +@@ -3359,11 +3457,19 @@ Status ConvertPool(const OpConverterParams* params) { + tensor, {0, 3, 1, 2}, &tensor, node_def, "to_NCHW")); + } + ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + nvinfer1::IPoolingLayer* layer = params->converter->network()->addPooling( ++#else ++ nvinfer1::IPoolingLayer* layer = params->converter->network()->addPoolingNd( ++#endif + *tensor->trt_tensor(), type, ksize); + TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); + ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + layer->setStride(stride); ++#else ++ layer->setStrideNd(stride); ++#endif + // VALID padding is the default TRT behavior. + if (padding_type == "SAME") { + // SAME_UPPER means that post padding is preferred. +@@ -4000,7 +4106,11 @@ Status ConvertPad(const OpConverterParams* params) { + tensor, transpose_idx, &tensor, node_def, "to_pad")); + } + ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + nvinfer1::IPaddingLayer* layer = params->converter->network()->addPadding( ++#else ++ nvinfer1::IPaddingLayer* layer = params->converter->network()->addPaddingNd( ++#endif + *tensor->trt_tensor(), pre_padding, post_padding); + TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); + params->converter->SetLayerName(layer, node_def); +@@ -4684,10 +4794,27 @@ StatusOr ConvertFullyConnectedImpl( + << ", n_output=" << noutput + << " weights shape: " << weights.Shape().DebugString() + << " to convert " << node_def.op(); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + nvinfer1::IFullyConnectedLayer* layer = + params->converter->network()->addFullyConnected( + *tensor_a->trt_tensor(), noutput, weights.GetTrtWeights(), + biases.GetTrtWeights()); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ nvinfer1::IConstantLayer* weights_layer = ++ params->converter->network()->addConstant(weights.Shape().AsTrtDims(), ++ weights.GetTrtWeights()); ++ nvinfer1::IConstantLayer* bias_layer = ++ params->converter->network()->addConstant(biases.Shape().AsTrtDims(), ++ biases.GetTrtWeights()); ++ nvinfer1::IMatrixMultiplyLayer* matmul_layer = ++ params->converter->network()->addMatrixMultiply( ++ *tensor_a->trt_tensor(), nvinfer1::MatrixOperation::kNONE, ++ *weights_layer->getOutput(0), nvinfer1::MatrixOperation::kNONE); ++ nvinfer1::IElementWiseLayer* layer = ++ params->converter->network()->addElementWise( ++ *matmul_layer->getOutput(0), *bias_layer->getOutput(0), ++ nvinfer1::ElementWiseOperation::kSUM); ++#endif // !IS_TRT_VERSION_GE(10, 0, 0, 0) + + TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); + params->converter->SetLayerName(layer, node_def); +@@ -4703,7 +4830,13 @@ StatusOr ConvertFullyConnectedImpl( + TF_RETURN_IF_ERROR(PrepareTensorForShape( + params->converter, TRT_TensorOrWeights(output_tensor), output_dim, + /*validation_only=*/false, &output_tensor, node_def, +- /*op_instance=*/1, /*origin_node_name=*/"FULLY_CONNECTED")); ++ /*op_instance=*/1, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ /*origin_node_name=*/"FULLY_CONNECTED") ++#else ++ /*origin_node_name=*/"MATRIX_MULTIPLY") ++#endif ++ ); + return output_tensor; + } + +@@ -5007,6 +5140,14 @@ CalcDepthSpaceDynamicShape(const OpConverterParams* params, int block_size, + ITensorProxyPtr shape = params->converter->network() + ->addShape(*inputs.at(0).tensor()->trt_tensor()) + ->getOutput(0); ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ // TODO(benbarsdell): Casting to int32 makes this match the pre-TRT10 ++ // behavior, but it would be better to instead cast all the other int32 ++ // tensors below to int64. ++ shape = params->converter->network() ++ ->addCast(*shape->trt_tensor(), nvinfer1::DataType::kINT32) ++ ->getOutput(0); ++#endif + ITensorProxyPtr batch_size = + params->converter->network() + ->addSlice(*shape->trt_tensor(), {1, {0}}, {1, {1}}, {1, {1}}) +@@ -5597,7 +5738,11 @@ Status ConvertResize(const OpConverterParams* params) { + AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF})); + + // Verify resize mode. Initialize resize mode if supported. ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + nvinfer1::ResizeMode resize_mode; ++#else ++ nvinfer1::InterpolationMode resize_mode; ++#endif + if (node_def.op() == "ResizeBilinear") { + #if IS_TRT_VERSION_GE(7, 1, 0, 0) + if (!align_corners) { +@@ -5605,9 +5750,17 @@ Status ConvertResize(const OpConverterParams* params) { + "Cannot Convert Bilinear Resize when align_corners=False"); + } + #endif ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + resize_mode = nvinfer1::ResizeMode::kLINEAR; ++#else ++ resize_mode = nvinfer1::InterpolationMode::kLINEAR; ++#endif + } else if (node_def.op() == "ResizeNearestNeighbor") { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + resize_mode = nvinfer1::ResizeMode::kNEAREST; ++#else ++ resize_mode = nvinfer1::InterpolationMode::kNEAREST; ++#endif + } else { + return errors::Unimplemented(node_def.op(), " is not yet implemented"); + } +@@ -5643,6 +5796,14 @@ Status ConvertResize(const OpConverterParams* params) { + ITensorProxyPtr shape = params->converter->network() + ->addShape(*inputs_tensor->trt_tensor()) + ->getOutput(0); ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ // TODO(benbarsdell): Casting to int32 makes this match the pre-TRT10 ++ // behavior, but it would be better to instead cast all the other int32 ++ // tensors below to int64. ++ shape = params->converter->network() ++ ->addCast(*shape->trt_tensor(), nvinfer1::DataType::kINT32) ++ ->getOutput(0); ++#endif + ITensorProxyPtr batch_size = + params->converter->network() + ->addSlice(*shape->trt_tensor(), {1, {0}}, {1, {1}}, {1, {1}}) +@@ -5686,7 +5847,14 @@ Status ConvertResize(const OpConverterParams* params) { + + // Set layer parameters. + layer->setResizeMode(resize_mode); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + layer->setAlignCorners(align_corners); ++#else ++ if (align_corners) { ++ layer->setCoordinateTransformation( ++ nvinfer1::ResizeCoordinateTransformation::kALIGN_CORNERS); ++ } ++#endif + + // Set output shape. + if (static_output_shape) { +@@ -5833,7 +6001,7 @@ Status ConvertGraphDefToEngine( + int max_batch_size, size_t max_workspace_size_bytes, + const std::vector& input_shapes, + nvinfer1::ILogger* trt_logger, nvinfer1::IGpuAllocator* allocator, +- TRTInt8Calibrator* calibrator, ++ nvinfer1::IRuntime* runtime, TRTInt8Calibrator* calibrator, + TrtUniquePtrType* engine, bool use_calibration, + const bool use_implicit_batch, bool* convert_successfully, + TrtShapeOptimizationProfile* profiles, absl::string_view engine_name, +@@ -6026,8 +6194,8 @@ Status ConvertGraphDefToEngine( + + // Build the engine. + TF_RETURN_IF_ERROR(converter->BuildCudaEngine( +- engine, max_batch_size, max_workspace_size_bytes, allocator, calibrator, +- profiles)); ++ engine, max_batch_size, max_workspace_size_bytes, allocator, runtime, ++ calibrator, profiles)); + + VLOG(1) << "Finished conversion"; + return OkStatus(); +diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h +index e9afd320be9..241de56c3ea 100644 +--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h ++++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h +@@ -156,7 +156,7 @@ Status ConvertGraphDefToEngine( + int max_batch_size, size_t max_workspace_size_bytes, + const std::vector& input_shapes, + nvinfer1::ILogger* logger, nvinfer1::IGpuAllocator* allocator, +- TRTInt8Calibrator* calibrator, ++ nvinfer1::IRuntime* runtime, TRTInt8Calibrator* calibrator, + TrtUniquePtrType* engine, bool use_calibration, + const bool use_implicit_batch, bool* convert_successfully, + TrtShapeOptimizationProfile* profiles, absl::string_view engine_name, +@@ -280,6 +280,7 @@ class Converter { + Status BuildCudaEngine(TrtUniquePtrType* engine, + int max_batch_size, size_t max_workspace_size_bytes, + nvinfer1::IGpuAllocator* allocator, ++ nvinfer1::IRuntime* runtime, + TRTInt8Calibrator* calibrator, + TrtShapeOptimizationProfile* profiles); + +diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc +index 332be3f50bf..90e8f207dcc 100644 +--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc ++++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc +@@ -135,9 +135,16 @@ using ::testing::PrintToString; + using ::tensorflow::testing::IsOk; + using ::tensorflow::testing::StatusIs; + ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + constexpr std::array ValidTrtModes = { +- TrtTestMode::kImplicitBatch, TrtTestMode::kExplicitBatch, ++ TrtTestMode::kImplicitBatch, ++ TrtTestMode::kExplicitBatch, + TrtTestMode::kDynamicShape}; ++#else ++constexpr std::array ValidTrtModes = { ++ TrtTestMode::kExplicitBatch, ++ TrtTestMode::kDynamicShape}; ++#endif + + bool TrtShapedWeightsEquals(const TRT_ShapedWeights& lhs, + const TRT_ShapedWeights& rhs) { +@@ -299,7 +306,11 @@ class ValidatorTest : public ::testing::Test { + + TrtNodeValidator validator(graph_properties, TrtPrecisionMode::FP32, + /*use_calibration=*/false, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + /*use_implicit_batch=*/true, ++#else ++ /*use_implicit_batch=*/false, ++#endif + /*use_explicit_precision=*/false); + return validator.ConvertToTensorOrWeights(node->def(), output_port, + tensor_or_weights); +@@ -336,8 +347,15 @@ TEST_F(ValidatorTest, ConvertToTensorOrWeights) { + convert_to_tensor_or_weights( + std::vector(nvinfer1::Dims::MAX_DIMS + 2, 1), &output), + StatusIs(absl::StatusCode::kOutOfRange, +- HasSubstr("Input tensor rank is greater than 9"))); ++ HasSubstr("Input tensor rank is greater than " ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ "9" ++#else ++ "8" ++#endif ++ ))); + } ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + // Convert non-Const with #dims < 1. + { + TRT_TensorOrWeights output; +@@ -360,6 +378,7 @@ TEST_F(ValidatorTest, ConvertToTensorOrWeights) { + EXPECT_NE(nullptr, output.tensor()->simple_tensor()); + EXPECT_THAT(output.GetTrtDims(), DimsAreArray({non_batch_dim})); + } ++#endif // !IS_TRT_VERSION_GE(10, 0, 0, 0) + } + + TEST_F(ValidatorTest, IsTensorRTCandidate_Basics) { +@@ -375,7 +394,11 @@ TEST_F(ValidatorTest, IsTensorRTCandidate_Basics) { + TF_EXPECT_OK(graph_properties.InferStatically(true)); + TrtNodeValidator validator(graph_properties, TrtPrecisionMode::FP32, + /*use_calibration=*/false, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + /*use_implicit_batch=*/true, ++#else ++ /*use_implicit_batch=*/false, ++#endif + /*use_explicit_precision=*/false); + + // Override the Add converter. +@@ -462,15 +485,21 @@ TEST(TrtNodeValidator, IsTensorRTCandidate) { + {TrtPrecisionMode::FP32, TrtPrecisionMode::INT8}) { + TrtNodeValidator validator(graph_properties, precision_mode, + /*use_calibration=*/false, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + /*use_implicit_batch=*/true, ++#else ++ /*use_implicit_batch=*/false, ++#endif + /*use_explicit_precision=*/false); + TF_EXPECT_OK(validator.IsTensorRTCandidate(matmul.operation.node())); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + EXPECT_THAT( + validator.IsTensorRTCandidate(incompatible_matmul.operation.node()), + StatusIs(absl::StatusCode::kInvalidArgument, + HasSubstr("MatMul with 2D tensors requires explicit batch " + "mode, or that tensor A " + "is not transposed and B is a constant tensor."))); ++#endif + EXPECT_THAT(validator.IsTensorRTCandidate(unsupported_op.operation.node()), + StatusIs(absl::StatusCode::kUnimplemented, + HasSubstr("Op type Erfc is not supported"))); +@@ -503,7 +532,11 @@ class ConverterTest : public ::testing::Test { + converter_ = + std::move(Converter::Create(TrtPrecisionMode::FP32, + /*use_calibration=*/false, &logger_, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + /*use_implicit_batch=*/true, ++#else ++ /*use_implicit_batch=*/false, ++#endif + /*engine_name=*/"TRTEngineOp_000_000", + /*use_explicit_precision=*/false) + .value()); +@@ -692,15 +725,23 @@ TEST_F(ConverterTest, TransposeTensor) { + "with that of the input"))); + + // Transpose at batch dimension. ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + EXPECT_THAT( + converter_->TransposeTensor(input_tensor, {1, 0, 2, 3}, &output_tensor, + dummy_node_def, "sub2"), + StatusIs(absl::StatusCode::kUnimplemented, + HasSubstr("Transpose at batch dimension is not supported."))); ++#endif + + // OK. + TF_EXPECT_OK(converter_->TransposeTensor( +- input_tensor, {0, 3, 1, 2}, &output_tensor, dummy_node_def, "sub3")); ++ input_tensor, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ {0, 3, 1, 2}, ++#else ++ {2, 0, 1}, ++#endif ++ &output_tensor, dummy_node_def, "sub3")); + EXPECT_THAT(output_tensor->getDimensions(), DimsAreArray({5, 2, 3})); + EXPECT_THAT( + converter_->network(), +@@ -815,14 +856,18 @@ TEST_F(ConverterTest, AddAndGetTensorOrWeights) { + // Add a tensor. + ITensorProxyPtr simple_tensor; + TRT_TensorOrWeights tensor(simple_tensor); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + EXPECT_EQ(-1, tensor.batch_size()); ++#endif + TF_EXPECT_OK(MaybeUpdateBatchSize(123)); + TF_EXPECT_OK(AddTensorOrWeights("my_tensor", tensor)); + + // Get the added tensor. + TRT_TensorOrWeights added_tensor; + TF_EXPECT_OK(GetTensorOrWeights("my_tensor", &added_tensor)); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + EXPECT_EQ(123, added_tensor.batch_size()); ++#endif + + // Add the same tensor again. + EXPECT_THAT(AddTensorOrWeights("my_tensor", tensor), +@@ -875,7 +920,11 @@ TEST_F(ConverterTest, MaybeApplyQuantizationRanges) { + Logger& logger = *Logger::GetLogger(); + auto int8_converter = Converter::Create(TrtPrecisionMode::INT8, + /*use_calibration=*/true, &logger, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + /*use_implicit_batch=*/true, ++#else ++ /*use_implicit_batch=*/false, ++#endif + /*engine_name=*/"") + .value(); + int8_converter->ProvideQuantizationRange(&input, -5.0f, 5.0f); +@@ -1016,6 +1065,10 @@ TEST_F(ConverterTest, CreateConstantLayer) { + + class ConvertGraphDefToEngineTest : public ::testing::Test { + public: ++ ConvertGraphDefToEngineTest() { ++ runtime_.reset(nvinfer1::createInferRuntime(logger_)); ++ } ++ + Status RunConvertGraphDefToEngine(Scope* s) { + GraphDef gdef; + TF_EXPECT_OK(s->ToGraphDef(&gdef)); +@@ -1040,13 +1093,20 @@ class ConvertGraphDefToEngineTest : public ::testing::Test { + return ConvertGraphDefToEngine( + gdef, /*ctx=*/nullptr, TrtPrecisionMode::FP32, /*max_batch_size=*/1, + /*max_workspace_size_bytes=*/64 << 20, input_shapes, &logger_, +- /*allocator=*/nullptr, /*calibrator=*/nullptr, &engine_, +- /*use_calibration=*/false, /*use_implicit_batch=*/true, ++ /*allocator=*/nullptr, runtime_.get(), ++ /*calibrator=*/nullptr, &engine_, ++ /*use_calibration=*/false, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ /*use_implicit_batch=*/true, ++#else ++ /*use_implicit_batch=*/false, ++#endif + /*convert_successfully=*/nullptr, /*profiles=*/nullptr, + "TRTEngineOp_000_000", /*use_explicit_precision=*/false); + } + + protected: ++ TrtUniquePtrType runtime_; + TrtUniquePtrType engine_; + + private: +@@ -1127,11 +1187,17 @@ class OpConverterTest : public ::testing::Test { + } + + void Reset(TrtPrecisionMode precision_mode_to_test = TrtPrecisionMode::FP32, +- TrtTestMode trt_mode = TrtTestMode::kImplicitBatch, ++ TrtTestMode trt_mode = ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ TrtTestMode::kImplicitBatch, ++#else ++ TrtTestMode::kDynamicShape, ++#endif + OpKernelContext* ctx = nullptr) { + // Destroy existing TRT objects in a proper order. + converter_.reset(nullptr); + engine_.reset(nullptr); ++ runtime_.reset(nullptr); + + // Re-create them in proper order. + converter_ = +@@ -1145,6 +1211,8 @@ class OpConverterTest : public ::testing::Test { + + // Reset other related artifacts. + scope_ = Scope::NewRootScope(); ++ ++ runtime_.reset(nvinfer1::createInferRuntime(logger_)); + } + + // Constructs a flat tensor with 'vals' in Unified Memory. +@@ -1230,18 +1298,32 @@ class OpConverterTest : public ::testing::Test { + + void CheckDataTypeMatches(const DataVec& datas) { + if (VLOG_IS_ON(2)) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + int nbBindings = engine_->getNbBindings(); ++#else ++ int nbBindings = engine_->getNbIOTensors(); ++#endif + VLOG(2) << "Number of engine bindings: " << nbBindings; + for (int i = 0; i < nbBindings; i++) { +- VLOG(2) << "Binding " << i << " name: " << engine_->getBindingName(i); ++ VLOG(2) << "Binding " << i << " name: " << ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ engine_->getBindingName(i); ++#else ++ engine_->getIOTensorName(i); ++#endif + } + } + for (const auto& data : datas) { + VLOG(2) << "Checking if data type matches for tensor " << data.name; ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + const int input_index = engine_->getBindingIndex(data.name.c_str()); + ASSERT_NE(-1, input_index); + const nvinfer1::DataType trt_dtype = + engine_->getBindingDataType(input_index); ++#else ++ const nvinfer1::DataType trt_dtype = ++ engine_->getTensorDataType(data.name.c_str()); ++#endif + DataType tf_type; + TF_ASSERT_OK(TrtTypeToTfType(trt_dtype, &tf_type)); + ASSERT_EQ(data.tensor.dtype(), tf_type) +@@ -1287,7 +1369,7 @@ class OpConverterTest : public ::testing::Test { + converter_->BuildCudaEngine(&engine_, + /*max_batch_size=*/batch_size, + /*max_workspace_size_bytes=*/1 << 26, +- /*allocator=*/nullptr, ++ /*allocator=*/nullptr, runtime_.get(), + /*calibrator=*/nullptr, + /*profiles=*/&profiles)); + CHECK_NOTNULL(engine_.get()); +@@ -1297,7 +1379,12 @@ class OpConverterTest : public ::testing::Test { + const int num_bindings = input_data.size() + output_data->size(); + std::vector buffers(num_bindings); + +- if (engine_->getNbBindings() != num_bindings) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ const int actual_num_bindings = engine_->getNbBindings(); ++#else ++ const int actual_num_bindings = engine_->getNbIOTensors(); ++#endif ++ if (actual_num_bindings != num_bindings) { + return errors::Internal("Number of bindings do not match"); + } + // Since we have only 1 optimization profile (which is enabled by default) +@@ -1308,16 +1395,25 @@ class OpConverterTest : public ::testing::Test { + + // Prepare input bindings. + TF_RETURN_IF_ERROR( +- SetTrtEngineInputs(engine_.get(), execution_context.get(), 0, buffers, ++ SetTrtEngineInputs(engine_.get(), execution_context.get(), 0, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ buffers, ++#endif + converter_->use_implicit_batch(), batch_size, + profiles, nullptr, &input_data)); + // Prepare output bindings. + TF_RETURN_IF_ERROR(SetTrtEngineOutputs( +- engine_.get(), execution_context.get(), 0, buffers, ++ engine_.get(), execution_context.get(), 0, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ buffers, ++#endif + converter_->use_implicit_batch(), batch_size, nullptr, output_data)); + // Execute the TRT engine. +- TF_RETURN_IF_ERROR(TrtEnqueue(execution_context.get(), buffers, stream_, +- converter_->use_implicit_batch(), ++ TF_RETURN_IF_ERROR(TrtEnqueue(execution_context.get(), ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ buffers, ++#endif ++ stream_, converter_->use_implicit_batch(), + batch_size)); + cudaStreamSynchronize(stream_); + return OkStatus(); +@@ -1372,9 +1468,11 @@ class OpConverterTest : public ::testing::Test { + std::vector dims_vec; + TF_CHECK_OK(adap.Prepend(batch_size).Vector(&dims_vec)); + AddTestTensorWithTFDims(name, dims_vec, trt_dtype); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + if (adap.IsStatic()) { + ASSERT_EQ(batch_size, converter_->batch_size_); + } ++#endif + } + + // Adds weights for both validation and conversion. The type of the weight is +@@ -1570,6 +1668,7 @@ class OpConverterTest : public ::testing::Test { + Logger& logger_ = *Logger::GetLogger(); + + private: ++ TrtUniquePtrType runtime_; + TrtUniquePtrType engine_; + cudaStream_t stream_; + std::unique_ptr tensor_buffer_allocator_; +@@ -1592,7 +1691,13 @@ class OpConverterTest : public ::testing::Test { + class VariableOpConverterTest : public OpConverterTest { + public: + void Reset(TrtPrecisionMode precision_mode_to_test = TrtPrecisionMode::FP32, +- TrtTestMode trt_mode = TrtTestMode::kImplicitBatch) { ++ TrtTestMode trt_mode = ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ TrtTestMode::kImplicitBatch ++#else ++ TrtTestMode::kDynamicShape ++#endif ++ ) { + OpConverterTest::Reset(precision_mode_to_test, trt_mode, context_.get()); + } + +@@ -8046,6 +8151,9 @@ void TestConvertSplit(OpConverterTest* test) { + } + } + ++// TODO(benbarsdell): This test needs to be fixed in many places to support ++// non-implicit-batch for TRT10. ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + TEST_F(OpConverterTest, ConvertSplit) { + { + // Axis is a tensor, should fail. +@@ -8122,6 +8230,7 @@ TEST_F(OpConverterTest, ConvertSplit) { + TestConvertSplit(this); + TestConvertSplit(this); + } ++#endif + + // Get the NodeDef for Unpack (Unstack in TF API). + auto get_unpack_nodedef = [](DataType dtype, int num, int axis) -> NodeDef { +diff --git a/tensorflow/compiler/tf2tensorrt/convert/ops/einsum.cc b/tensorflow/compiler/tf2tensorrt/convert/ops/einsum.cc +index dea0eca7326..8f9d6cc13eb 100755 +--- a/tensorflow/compiler/tf2tensorrt/convert/ops/einsum.cc ++++ b/tensorflow/compiler/tf2tensorrt/convert/ops/einsum.cc +@@ -311,6 +311,14 @@ class EinsumDescriptor { + builder->Shape(operand.tensor()->trt_tensor()); + TRT_ENSURE_PTR_OK(shape_layer); + nvinfer1::ITensor* shape = (*shape_layer)->getOutput(0); ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ // TODO(benbarsdell): Casting to int32 makes this match the pre-TRT10 ++ // behavior, but it would be better to instead cast the other int32 ++ // tensors to int64. ++ shape = builder->Network() ++ ->addCast(*shape, nvinfer1::DataType::kINT32) ++ ->getOutput(0); ++#endif + for (int i = 0; i < operand.GetTrtDims().nbDims; i++) { + int idx = permute.empty() ? i : permute.at(i); + StatusOr slice_layer = +diff --git a/tensorflow/compiler/tf2tensorrt/convert/ops/layer_utils.h b/tensorflow/compiler/tf2tensorrt/convert/ops/layer_utils.h +index e3aadc279d9..3c656bff4ac 100644 +--- a/tensorflow/compiler/tf2tensorrt/convert/ops/layer_utils.h ++++ b/tensorflow/compiler/tf2tensorrt/convert/ops/layer_utils.h +@@ -425,6 +425,14 @@ class TRTNetworkBuilder { + StatusOr shape_layer = this->Shape(input); + TRT_ENSURE_PTR_OK(shape_layer); + nvinfer1::ITensor* runtime_shape = (*shape_layer)->getOutput(0); ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ // TODO(benbarsdell): Casting to int32 makes this match the pre-TRT10 ++ // behavior, but it would be better to instead cast the other int32 ++ // tensors to int64. ++ runtime_shape = ++ network_->addCast(*runtime_shape, nvinfer1::DataType::kINT32) ++ ->getOutput(0); ++#endif + + if (sub_one) { + StatusOr ones = this->Constant(1, 1); +@@ -601,9 +609,9 @@ class TRTNetworkBuilder { + nvinfer1::ITensor* input, float quantize_scale, float dequantize_scale, + const std::string& name) { + TRT_ENSURE(input); +- if (!IS_TRT_VERSION_GE(8, 0, 0, 0)) { +- TRT_ENSURE(network_->hasExplicitPrecision()); +- } ++#if !IS_TRT_VERSION_GE(8, 0, 0, 0) ++ TRT_ENSURE(network_->hasExplicitPrecision()); ++#endif + TRT_ENSURE(IS_TRT_VERSION_GE(7, 1, 0, 0)); + + static int count = 0; +diff --git a/tensorflow/compiler/tf2tensorrt/convert/ops/like_ops.cc b/tensorflow/compiler/tf2tensorrt/convert/ops/like_ops.cc +index dd9dd0f2304..646950c00a3 100644 +--- a/tensorflow/compiler/tf2tensorrt/convert/ops/like_ops.cc ++++ b/tensorflow/compiler/tf2tensorrt/convert/ops/like_ops.cc +@@ -64,6 +64,15 @@ class ConvertLikeOps : public OpConverterBase> { + builder->Shape(input.tensor()->trt_tensor()); + TF_RETURN_IF_ERROR(shape_layer.status()); + dims_input_tensor = (*shape_layer)->getOutput(0); ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ // TODO(benbarsdell): Casting to int32 makes this match the pre-TRT10 ++ // behavior, but it would be better to instead cast the other int32 ++ // tensors to int64. ++ dims_input_tensor = network ++ ->addCast(*dims_input_tensor->trt_tensor(), ++ nvinfer1::DataType::kINT32) ++ ->getOutput(0); ++#endif + dims.nbDims = 0; + } + +diff --git a/tensorflow/compiler/tf2tensorrt/convert/ops/tile.cc b/tensorflow/compiler/tf2tensorrt/convert/ops/tile.cc +index 85c9c6a0292..f71dc70344c 100644 +--- a/tensorflow/compiler/tf2tensorrt/convert/ops/tile.cc ++++ b/tensorflow/compiler/tf2tensorrt/convert/ops/tile.cc +@@ -169,6 +169,13 @@ class ConvertTile : public OpConverterBase { + + nvinfer1::ITensor *shape = + network->addShape(input_trt_tensor)->getOutput(0); ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ // TODO(benbarsdell): Casting to int32 makes this match the pre-TRT10 ++ // behavior, but it would be better to instead cast all the other int32 ++ // tensors to int64. ++ shape = ++ network->addCast(*shape, nvinfer1::DataType::kINT32)->getOutput(0); ++#endif + target_shape = network + ->addElementWise(*shape, *mult, + nvinfer1::ElementWiseOperation::kPROD) +@@ -179,7 +186,11 @@ class ConvertTile : public OpConverterBase { + DimsAdapter stride(std::vector(nb_dims, 1)); + auto layer = network->addSlice(input_trt_tensor, start, output_size, + stride.AsTrtDims()); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + layer->setMode(nvinfer1::SliceMode::kWRAP); ++#else ++ layer->setMode(nvinfer1::SampleMode::kWRAP); ++#endif + if (target_shape) layer->setInput(2, *target_shape); + + converter->SetLayerName(layer, params.node_def.name(), "to_tile"); +diff --git a/tensorflow/compiler/tf2tensorrt/convert/timing_cache.cc b/tensorflow/compiler/tf2tensorrt/convert/timing_cache.cc +index d9b4a9dc5e8..536d09d2eb5 100644 +--- a/tensorflow/compiler/tf2tensorrt/convert/timing_cache.cc ++++ b/tensorflow/compiler/tf2tensorrt/convert/timing_cache.cc +@@ -70,7 +70,11 @@ void TimingCacheRegistry::Upsert(const string& name, TimingCache* cache) { + std::copy_n(static_cast(memory->data()), memory->size(), + mem.begin()); + } ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + memory->destroy(); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ delete memory; ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + #endif // IS_TRT_VERSION_GE(8, 0, 0, 0) + } + +diff --git a/tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.cc b/tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.cc +index 5c49346940a..49faef71b16 100644 +--- a/tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.cc ++++ b/tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.cc +@@ -47,7 +47,7 @@ using absl::StrCat; + namespace { + + bool ShouldUseExplicitPrecision(const GraphDef& gdef) { +- if (!IS_TRT_VERSION_GE(8, 0, 0, 0)) { ++ if (!IS_TRT_VERSION_GE(8, 0, 0, 0) || IS_TRT_VERSION_GE(10, 0, 0, 0)) { + return false; + } + return absl::c_any_of(gdef.node(), [](const auto& node) { +diff --git a/tensorflow/compiler/tf2tensorrt/convert/utils.cc b/tensorflow/compiler/tf2tensorrt/convert/utils.cc +index f2cc8be2fd0..bfc4f5dacaf 100644 +--- a/tensorflow/compiler/tf2tensorrt/convert/utils.cc ++++ b/tensorflow/compiler/tf2tensorrt/convert/utils.cc +@@ -221,10 +221,21 @@ Status TrtTypeToTfType(nvinfer1::DataType trt_type, DataType* tf_type) { + } + + int GetNumberOfEngineInputs(const nvinfer1::ICudaEngine* engine) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + int n_bindings = engine->getNbBindings(); ++#else ++ int n_bindings = engine->getNbIOTensors(); ++#endif + int n_input = 0; + for (int i = 0; i < n_bindings; i++) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + if (engine->bindingIsInput(i)) n_input++; ++#else ++ if (engine->getTensorIOMode(engine->getIOTensorName(i)) == ++ nvinfer1::TensorIOMode::kINPUT) { ++ n_input++; ++ } ++#endif + } + // According to TensorRT 7 doc: "If the engine has been built for K profiles, + // the first getNbBindings() / K bindings are used by profile number 0, the +@@ -232,7 +243,11 @@ int GetNumberOfEngineInputs(const nvinfer1::ICudaEngine* engine) { + // Therefore, to get the number of input tensors, we need to divide by the + // the number of profiles. + int n_profiles = engine->getNbOptimizationProfiles(); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + return n_input / n_profiles; ++#else ++ return n_input; ++#endif + } + + absl::string_view GetDeviceName(const Node* node) { +diff --git a/tensorflow/compiler/tf2tensorrt/convert/utils.h b/tensorflow/compiler/tf2tensorrt/convert/utils.h +index 9a03d2f9093..75f9a5218fa 100644 +--- a/tensorflow/compiler/tf2tensorrt/convert/utils.h ++++ b/tensorflow/compiler/tf2tensorrt/convert/utils.h +@@ -228,7 +228,11 @@ class DimsAdapter { + // in via the result pointer. + void TrtDims(nvinfer1::Dims* result) const { + result->nbDims = num_dims_; ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + absl::c_copy(storage_, static_cast(result->d)); ++#else ++ absl::c_copy(storage_, static_cast(result->d)); ++#endif + } + + // Converts to an nvinfer1::Dims and return by value. +diff --git a/tensorflow/compiler/tf2tensorrt/convert/weights.cc b/tensorflow/compiler/tf2tensorrt/convert/weights.cc +index da2157096b5..5b76fee995f 100644 +--- a/tensorflow/compiler/tf2tensorrt/convert/weights.cc ++++ b/tensorflow/compiler/tf2tensorrt/convert/weights.cc +@@ -59,11 +59,19 @@ Status TRT_ShapedWeights::SetShape(DimsAdapter dims) { + size_t TRT_ShapedWeights::size_bytes() const { + size_t data_type_size = -1; + switch (type_) { ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ case nvinfer1::DataType::kINT64: ++ data_type_size = 8; ++ break; ++#endif + case nvinfer1::DataType::kFLOAT: + case nvinfer1::DataType::kINT32: + data_type_size = 4; + break; + case nvinfer1::DataType::kHALF: ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ case nvinfer1::DataType::kBF16: ++#endif + data_type_size = 2; + break; + #if IS_TRT_VERSION_GE(8, 5, 0, 0) +@@ -76,6 +84,10 @@ size_t TRT_ShapedWeights::size_bytes() const { + case nvinfer1::DataType::kBOOL: + data_type_size = 1; + break; ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ case nvinfer1::DataType::kINT4: // Not supported ++ return 0; ++#endif + } + return volume_ * data_type_size; + } +diff --git a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc +index 7a74a43d88a..cfa92bbdaee 100644 +--- a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc ++++ b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc +@@ -77,7 +77,11 @@ class ContextDeviceMemory { + + ~ContextDeviceMemory() { + if (device_memory_) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + device_memory_allocator_->free(device_memory_); ++#else ++ device_memory_allocator_->deallocate(device_memory_); ++#endif + } + } + +@@ -194,7 +198,8 @@ class TRTEngineOp : public AsyncOpKernel { + StatusOr> BuildEngine( + const std::vector& input_concrete_shapes, int batch_size, + bool use_calibration, TRTInt8Calibrator* calibrator, +- TRTEngineCacheResource* cache_resource, OpKernelContext* ctx); ++ TRTEngineCacheResource* cache_resource, OpKernelContext* ctx, ++ nvinfer1::IRuntime* runtime); + + // Verify that the input shapes are consistent and can be handled by this op. + Status VerifyInputShapes(const std::vector& shapes); +@@ -222,6 +227,7 @@ class TRTEngineOp : public AsyncOpKernel { + bool calibration_mode_; + + // Whether to use implicit batch dimension for TensorRT. ++ // Note that this is no longer supported since TensorRT 10.0. + bool use_implicit_batch_; + + // Whether to collect optimization profiles for TensorRT, only used when +@@ -498,6 +504,12 @@ TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) + << ", thus setting _use_implicit_batch=true"; + use_implicit_batch_ = true; + } ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ OP_REQUIRES( ++ context, !use_implicit_batch_, ++ errors::InvalidArgument( ++ "_use_implicit_batch must be false when using TensorRT >= 10.0")); ++#endif + + status = + context->GetAttr("_profile_generation_mode", &profile_generation_mode_); +@@ -1003,18 +1015,35 @@ Status TRTEngineOp::ExecuteTrtEngine( + VLOG(2) << " Workspace size: " << cuda_engine->getWorkspaceSize() + << " bytes"; + #endif // #if !IS_TRT_VERSION_GE(8, 0, 0, 0) ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + VLOG(2) << " Datatype of " << cuda_engine->getNbBindings() ++#else ++ VLOG(2) << " Datatype of " << cuda_engine->getNbIOTensors() ++#endif + << " inputs/outputs"; + string binding_types = ""; ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + for (int i = 0; i < cuda_engine->getNbBindings(); i++) { + binding_types += " " + string(cuda_engine->getBindingName(i)) + ": " + + DebugString(cuda_engine->getBindingDataType(i)) + "\n"; + } ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ for (int i = 0; i < cuda_engine->getNbIOTensors(); i++) { ++ binding_types += " " + string(cuda_engine->getIOTensorName(i)) + ": " + ++ DebugString(cuda_engine->getTensorDataType( ++ cuda_engine->getIOTensorName(i))) + ++ "\n"; ++ } ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + VLOG(2) << binding_types; + } + ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + const int num_binding = cuda_engine->getNbBindings(); + std::vector buffers(num_binding); ++#else ++ const int num_binding = cuda_engine->getNbIOTensors(); ++#endif + + // nvinfer1::IExecutionContext::enqueue is not thread safe and we need a mutex + // for it. +@@ -1031,11 +1060,17 @@ Status TRTEngineOp::ExecuteTrtEngine( + use_implicit_batch_ ? ctx->input(0).shape().dim_size(0) : 0; + + TF_RETURN_IF_ERROR(SetTrtEngineInputs( +- cuda_engine, execution_context, trt_context_idx, buffers, ++ cuda_engine, execution_context, trt_context_idx, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ buffers, ++#endif + use_implicit_batch_, num_batch, profiles, ctx)); + + TF_RETURN_IF_ERROR(SetTrtEngineOutputs(cuda_engine, execution_context, +- trt_context_idx, buffers, ++ trt_context_idx, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ buffers, ++#endif + use_implicit_batch_, num_batch, ctx)); + + // Copied from gpu_kernel_helper.h as the header can only be used in *.cu.cc +@@ -1054,8 +1089,11 @@ Status TRTEngineOp::ExecuteTrtEngine( + execution_context, allocator, engine_context->GetDeviceMemorySize())); + } + // Enqueue the TensorRT engine for execution. +- return TrtEnqueue(execution_context, buffers, stream, use_implicit_batch_, +- num_batch); ++ return TrtEnqueue(execution_context, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ buffers, ++#endif ++ stream, use_implicit_batch_, num_batch); + } + + Status TRTEngineOp::GetEngineCacheResource(OpKernelContext* ctx, +@@ -1087,7 +1125,8 @@ Status TRTEngineOp::GetEngineCacheResource(OpKernelContext* ctx, + StatusOr> TRTEngineOp::BuildEngine( + const std::vector& input_concrete_shapes, int batch_size, + bool use_calibration, TRTInt8Calibrator* calibrator, +- TRTEngineCacheResource* cache_resource, OpKernelContext* ctx) { ++ TRTEngineCacheResource* cache_resource, OpKernelContext* ctx, ++ nvinfer1::IRuntime* runtime) { + tensorflow::profiler::TraceMe activity( + "TRTEngineOp::BuildEngine", tensorflow::profiler::TraceMeLevel::kInfo); + TRT_ENSURE(cache_resource); +@@ -1116,9 +1155,9 @@ StatusOr> TRTEngineOp::BuildEngine( + auto status = convert::ConvertGraphDefToEngine( + segment_graph_def_, ctx, precision_mode_, batch_size, workspace_size_, + conversion_input_shapes, &logger, cache_resource->allocator_.get(), +- calibrator, &engine, use_calibration, use_implicit_batch_, nullptr, +- &cache_resource->profiles_, name(), use_explicit_precision_, &cluster, +- ctx->device()->name()); ++ runtime, calibrator, &engine, use_calibration, use_implicit_batch_, ++ nullptr, &cache_resource->profiles_, name(), use_explicit_precision_, ++ &cluster, ctx->device()->name()); + if (!status.ok()) { + LOG_FIRST_FEW_WARNING_WITH_PREFIX + << "Engine creation for " << name() << " failed. " +@@ -1152,6 +1191,9 @@ StatusOr> TRTEngineOp::GetEngine( + return std::pair(&empty_context, 0); + } + ++ TrtUniquePtrType infer(nvinfer1::createInferRuntime(logger)); ++ infer->setGpuAllocator(allocator); ++ + // Handle the static engine case. For static engines, the cache will have a + // single element containing the only engine. + if (static_engine_) { +@@ -1172,14 +1214,17 @@ StatusOr> TRTEngineOp::GetEngine( + return std::pair(&empty_context, 0); + } + +- TrtUniquePtrType infer(nvinfer1::createInferRuntime(logger)); +- infer->setGpuAllocator(allocator); + // Need to initialize plugins in order to deserialize engines that contain + // plugins. + MaybeInitializeTrtPlugins(&logger); + TrtUniquePtrType static_engine( + infer->deserializeCudaEngine(serialized_segment_.c_str(), +- serialized_segment_.size(), nullptr)); ++ serialized_segment_.size() ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ , ++ nullptr ++#endif ++ )); + int profile_id = 0; + if (static_engine && !use_implicit_batch_) { + // load profiles +@@ -1189,7 +1234,8 @@ StatusOr> TRTEngineOp::GetEngine( + TF_RETURN_IF_ERROR(cache_res->profiles_.CreateExecutionContexts( + static_engine.get(), &exec_contexts)); + cache.emplace(input_concrete_shapes, +- std::make_unique(std::move(static_engine), ++ std::make_unique(std::move(infer), ++ std::move(static_engine), + std::move(exec_contexts))); + VLOG(1) << "Added new engine to cache of " << name() + << ". Cache size: " << cache.size(); +@@ -1218,9 +1264,10 @@ StatusOr> TRTEngineOp::GetEngine( + << "Reason: " << status; + } + } +- auto result = BuildEngine(input_concrete_shapes, batch_size, +- /*use_calibration=*/false, +- /*calibrator=*/nullptr, cache_res, ctx); ++ auto result = ++ BuildEngine(input_concrete_shapes, batch_size, ++ /*use_calibration=*/false, ++ /*calibrator=*/nullptr, cache_res, ctx, infer.get()); + if (!result.ok()) { + return std::pair(&empty_context, 0); + } +@@ -1232,20 +1279,27 @@ StatusOr> TRTEngineOp::GetEngine( + + int max_batch_size = 1; + if (use_implicit_batch_) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + max_batch_size = raw_static_engine->getMaxBatchSize(); + // Static engine will have max_batch_size for batch size so that all + // inputs will map to this single engine. + for (int i = 0; i < engine_input_shapes.size(); i++) { + engine_input_shapes[i].set_dim(0, max_batch_size); + } ++#else ++ return errors::Internal( ++ "Implicit batch is not supported since TensorRT 10.0. Pass " ++ "use_dynamic_shape=True to TrtGraphConverterV2 to avoid this error."); ++#endif + } + + ExecutionContext context = ExecutionContext::Create(raw_static_engine); + // TODO(laigd): here we assume engine_input_shapes matches the actual input + // shapes of the engine, we should verify that. +- cache.emplace(engine_input_shapes, +- std::make_unique(std::move(static_engine), +- std::move(context))); ++ cache.emplace( ++ engine_input_shapes, ++ std::make_unique( ++ std::move(infer), std::move(static_engine), std::move(context))); + // Runtime is safe to delete after engine creation + VLOG(1) << "Size of serialized TRT engine: " + << serialized_segment_.capacity(); +@@ -1294,7 +1348,7 @@ StatusOr> TRTEngineOp::GetEngine( + // means calibration_mode_ is true and this path won't get executed. + auto result = + BuildEngine(input_concrete_shapes, batch_size, use_calibration_, +- calibrator_.get(), cache_res, ctx); ++ calibrator_.get(), cache_res, ctx, infer.get()); + if (!result.ok()) { + return std::pair(&empty_context, 0); + } +@@ -1302,9 +1356,10 @@ StatusOr> TRTEngineOp::GetEngine( + std::vector exec_contexts; + TF_RETURN_IF_ERROR(cache_res->profiles_.CreateExecutionContexts( + engine.get(), &exec_contexts)); +- cache.emplace(input_concrete_shapes, +- std::make_unique(std::move(engine), +- std::move(exec_contexts))); ++ cache.emplace( ++ input_concrete_shapes, ++ std::make_unique(std::move(infer), std::move(engine), ++ std::move(exec_contexts))); + VLOG(1) << "Added new engine to cache of " << name() + << ". Cache size: " << cache.size(); + engine_contexts = cache.at(input_concrete_shapes).get(); +@@ -1390,6 +1445,9 @@ Status TRTEngineOp::AllocateCalibrationResources( + grappler::GetDeviceInfo(full_parsed_name)); + tensorflow::grappler::VirtualCluster cluster(device_map); + ++ TrtUniquePtrType infer(nvinfer1::createInferRuntime(logger)); ++ infer->setGpuAllocator(cache_res->allocator_.get()); ++ + // ConvertGraphDefToEngine() will try to build the engine. This thread + // will loop inside buildCudaEngine() consuming the calibration data + // that is set by the TF op, and drive the builder until calibrator +@@ -1402,7 +1460,8 @@ Status TRTEngineOp::AllocateCalibrationResources( + this->segment_graph_def_, ctx, TrtPrecisionMode::INT8, + cres->calibrator_->getBatchSize(), this->workspace_size_, + conversion_input_shapes, &cache_res->GetLogger(), +- cache_res->allocator_.get(), cres->calibrator_.get(), &cres->engine_, ++ cache_res->allocator_.get(), infer.get(), ++ cres->calibrator_.get(), &cres->engine_, + /*use_calibration=*/true, this->use_implicit_batch_, + /*convert_successfully=*/nullptr, + /*profiles=*/&cache_res->profiles_, name(), +@@ -1423,13 +1482,15 @@ Status TRTEngineOp::AllocateCalibrationResources( + auto calib_result = cache_res->profiles_.CreateExecutionContexts( + cres->engine_.get(), &exec_contexts); + cache_res->cache_.emplace( +- shapes, std::make_unique(std::move(cres->engine_), ++ shapes, std::make_unique(std::move(infer), ++ std::move(cres->engine_), + std::move(exec_contexts))); + } else { + ExecutionContext context = + ExecutionContext::Create(cres->engine_.get()); + cache_res->cache_.emplace( +- shapes, std::make_unique(std::move(cres->engine_), ++ shapes, std::make_unique(std::move(infer), ++ std::move(cres->engine_), + std::move(context))); + } + } +diff --git a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op_test.cc b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op_test.cc +index 317f3a54357..3368d3d4754 100644 +--- a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op_test.cc ++++ b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op_test.cc +@@ -66,7 +66,12 @@ class TRTEngineOpTestBase : public OpsTestBase { + public: + void AddSimpleTrtOp(DataType dtype, int max_cached_engines_count = 1, + PartialTensorShape shape = PartialTensorShape({-1, -1}), +- bool use_implicit_batch = true, ++ bool use_implicit_batch ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ = true, ++#else ++ = false, ++#endif + bool allow_build_at_runtime = true, + bool static_engine = false) { + // Create the GPU device. +@@ -207,6 +212,7 @@ constexpr std::array TestParameters{TestParam{false}, + INSTANTIATE_TEST_CASE_P(TRTEngineOpTestInstantiation, TRTEngineOpTestWithParam, + ::testing::ValuesIn(TestParameters)); + ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + TEST_F(TRTEngineOpTestBase, DynamicEngines) { + // Test dynamic engine creation during inference time + TRTEngineOpTestBase::AddSimpleTrtOp(DT_FLOAT, /*max_cached_engines_count=*/4); +@@ -256,11 +262,16 @@ TEST_F(TRTEngineOpTestBase, DynamicEngines) { + EXPECT_EQ(1, cache->count({TensorShape({3, 2})})); + EXPECT_EQ(1, cache->count({TensorShape({10, 10})})); + } ++#endif // !IS_TRT_VERSION_GE(10, 0, 0, 0) + + TEST_F(TRTEngineOpTestBase, AllowBuildAtRuntime) { + TRTEngineOpTestBase::AddSimpleTrtOp(DT_FLOAT, /*max_cached_engines_count=*/1, + PartialTensorShape({-1, -1}), ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + /*use_implicit_batch=*/true, ++#else ++ /*use_implicit_batch=*/false, ++#endif + /*allow_build_at_runtime=*/false); + + // Execute the op +diff --git a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_resource_ops.cc b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_resource_ops.cc +index 234330e328a..23ca2fc5b53 100644 +--- a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_resource_ops.cc ++++ b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_resource_ops.cc +@@ -147,7 +147,12 @@ class InitializeTRTResource : public OpKernel { + TrtUniquePtrType engine( + infer->deserializeCudaEngine( + engine_instance.serialized_engine().c_str(), +- engine_instance.serialized_engine().size(), nullptr)); ++ engine_instance.serialized_engine().size() ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ , ++ nullptr ++#endif ++ )); + auto raw_engine = engine.get(); + std::vector ctx_vec; + if (num_loaded_engine == 0) { +@@ -163,9 +168,10 @@ class InitializeTRTResource : public OpKernel { + // we have only a single execution context. + ctx_vec.push_back(ExecutionContext::Create(raw_engine)); + } +- resource->cache_.emplace(engine_input_shapes, +- std::make_unique( +- std::move(engine), std::move(ctx_vec))); ++ resource->cache_.emplace( ++ engine_input_shapes, ++ std::make_unique(std::move(infer), std::move(engine), ++ std::move(ctx_vec))); + ++num_loaded_engine; + } while (1); + VLOG(1) << "Loaded " << num_loaded_engine << " TRT engines for op " +diff --git a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_resource_ops_test.cc b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_resource_ops_test.cc +index 987b01eebcb..28debd542fd 100644 +--- a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_resource_ops_test.cc ++++ b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_resource_ops_test.cc +@@ -119,7 +119,11 @@ class TRTEngineResourceOpsTest + return layer->getOutput(0); + } + +- TrtUniquePtrType CreateTRTEngine() { ++ std::pair, ++ TrtUniquePtrType> ++ CreateTRTEngine() { ++ TrtUniquePtrType runtime( ++ nvinfer1::createInferRuntime(logger_)); + TrtUniquePtrType builder( + nvinfer1::createInferBuilder(logger_)); + TrtUniquePtrType network; +@@ -155,8 +159,13 @@ class TRTEngineResourceOpsTest + // Build the engine + TrtUniquePtrType builder_config( + builder->createBuilderConfig()); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + builder_config->setMaxWorkspaceSize(1 << 10); + builder->setMaxBatchSize(1); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ builder_config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, ++ 1 << 10); ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + + if (this->param_.dynamic_shape) { + TrtShapeOptimizationProfile profile; +@@ -205,11 +214,18 @@ class TRTEngineResourceOpsTest + network.get())); + } + VLOG(2) << "ConfigureBuilder Finished"; ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + TrtUniquePtrType engine( + builder->buildEngineWithConfig(*network, *builder_config)); ++#else ++ TrtUniquePtrType serialized( ++ builder->buildSerializedNetwork(*network, *builder_config)); ++ TrtUniquePtrType engine( ++ runtime->deserializeCudaEngine(serialized->data(), serialized->size())); ++#endif + VLOG(2) << "Engine constructed"; + EXPECT_NE(nullptr, engine); +- return engine; ++ return {std::move(engine), std::move(runtime)}; + } + Logger& logger_ = *Logger::GetLogger(); + TestParam param_; +@@ -278,7 +294,11 @@ TEST_P(TRTEngineResourceOpsTest, Basic) { + EXPECT_EQ(0, resource->cache_.size()); + + // Create an engine and add it to the cache of the resource. +- TrtUniquePtrType engine = CreateTRTEngine(); ++ auto engine_and_runtime = CreateTRTEngine(); ++ TrtUniquePtrType engine = ++ std::move(engine_and_runtime.first); ++ TrtUniquePtrType runtime = ++ std::move(engine_and_runtime.second); + ExecutionContext context = ExecutionContext::Create(engine.get()); + + std::vector engine_input_shape(1); +@@ -288,7 +308,8 @@ TEST_P(TRTEngineResourceOpsTest, Basic) { + } + resource->cache_.emplace( + engine_input_shape, +- std::make_unique(std::move(engine), std::move(context))); ++ std::make_unique(std::move(runtime), std::move(engine), ++ std::move(context))); + // Check that the resource has multiple references before it is unregistered + // from the resource manager. + EXPECT_FALSE(resource->RefCountIsOne()); +diff --git a/tensorflow/compiler/tf2tensorrt/stub/nvinfer_plugin_stub.cc b/tensorflow/compiler/tf2tensorrt/stub/nvinfer_plugin_stub.cc +index 8fc3c6e478f..c083e9f00a7 100644 +--- a/tensorflow/compiler/tf2tensorrt/stub/nvinfer_plugin_stub.cc ++++ b/tensorflow/compiler/tf2tensorrt/stub/nvinfer_plugin_stub.cc +@@ -52,7 +52,7 @@ void LogFatalSymbolNotFound(const char* symbol_name) { + + #if NV_TENSORRT_MAJOR < 7 + #error TensorRT version earlier than 7 is not supported. +-#elif NV_TENSORRT_MAJOR == 7 || NV_TENSORRT_MAJOR == 8 ++#elif NV_TENSORRT_MAJOR == 7 || NV_TENSORRT_MAJOR == 8 || NV_TENSORRT_MAJOR == 10 + #include "tensorflow/compiler/tf2tensorrt/stub/NvInferPlugin_7_0.inc" + #else + #error This version of TensorRT is not supported. +diff --git a/tensorflow/compiler/tf2tensorrt/stub/nvinfer_stub.cc b/tensorflow/compiler/tf2tensorrt/stub/nvinfer_stub.cc +index 1a4964032ba..a80e338b13e 100644 +--- a/tensorflow/compiler/tf2tensorrt/stub/nvinfer_stub.cc ++++ b/tensorflow/compiler/tf2tensorrt/stub/nvinfer_stub.cc +@@ -56,6 +56,8 @@ void LogFatalSymbolNotFound(const char* symbol_name) { + #include "tensorflow/compiler/tf2tensorrt/stub/NvInfer_7_0.inc" + #elif NV_TENSORRT_MAJOR == 8 + #include "tensorflow/compiler/tf2tensorrt/stub/NvInfer_8_0.inc" ++#elif NV_TENSORRT_MAJOR == 10 ++#include "tensorflow/compiler/tf2tensorrt/stub/NvInfer_10_0.inc" + #else + #error This version of TensorRT is not supported. + #endif +diff --git a/tensorflow/compiler/tf2tensorrt/tensorrt_test.cc b/tensorflow/compiler/tf2tensorrt/tensorrt_test.cc +index 28da5e81da7..732e758bc57 100644 +--- a/tensorflow/compiler/tf2tensorrt/tensorrt_test.cc ++++ b/tensorflow/compiler/tf2tensorrt/tensorrt_test.cc +@@ -157,27 +157,50 @@ TrtUniquePtrType CreateSerializedEngine() { + #endif + + // Build the engine. ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + builder->setMaxBatchSize(1); ++#endif + TrtUniquePtrType builderConfig( + builder->createBuilderConfig()); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + builderConfig->setMaxWorkspaceSize(1 << 20); + TrtUniquePtrType engine( + builder->buildEngineWithConfig(*network, *builderConfig)); + EXPECT_NE(engine, nullptr); + // Serialize the engine to create a model, then close everything. + TrtUniquePtrType model(engine->serialize()); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ builderConfig->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, ++ 1 << 20); ++ TrtUniquePtrType model( ++ builder->buildSerializedNetwork(*network, *builderConfig)); ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + return model; + } + + template +-unsigned GetBindingSizeBytes(const nvinfer1::ICudaEngine& engine, int index, ++unsigned GetBindingSizeBytes(const nvinfer1::ICudaEngine& engine, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ int index, ++#else ++ const char* name, ++#endif + unsigned batch_size) { + unsigned vol = batch_size; ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + auto dims = engine.getBindingDimensions(index); +- int vecDim = engine.getBindingVectorizedDim(index); ++ int vecDim = engine.getBindingVectorizedDim(name); ++#else ++ auto dims = engine.getTensorShape(name); ++ int vecDim = engine.getTensorVectorizedDim(name); ++#endif + if (-1 != vecDim) // i.e., 0 != lgScalarsPerVector + { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + int scalarsPerVec = engine.getBindingComponentsPerElement(index); ++#else ++ int scalarsPerVec = engine.getTensorComponentsPerElement(name); ++#endif + // Divide round up. + dims.d[vecDim] = (dims.d[vecDim] + scalarsPerVec - 1 / scalarsPerVec); + vol *= scalarsPerVec; +@@ -192,17 +215,32 @@ void Execute(nvinfer1::IExecutionContext* context, const float* input1, + const nvinfer1::ICudaEngine& engine = context->getEngine(); + + // We have two bindings: input and output. +- ASSERT_EQ(engine.getNbBindings(), 4); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ int num_bindings = engine.getNbBindings(); + const int input_index1 = engine.getBindingIndex(kInputTensor1); + const int input_index2 = engine.getBindingIndex(kInputTensor2); + const int output_index1 = engine.getBindingIndex(kOutputTensor1); + const int output_index2 = engine.getBindingIndex(kOutputTensor2); ++#else ++ int num_bindings = engine.getNbIOTensors(); ++#endif ++ ASSERT_EQ(num_bindings, 4); + + // Create GPU buffers and a stream +- std::vector buffers(engine.getNbBindings()); ++ std::vector buffers(num_bindings); + for (int i = 0; i < buffers.size(); i++) { +- ASSERT_EQ( +- 0, cudaMalloc(&buffers[i], GetBindingSizeBytes(engine, i, 1))); ++ ASSERT_EQ(0, cudaMalloc(&buffers[i], GetBindingSizeBytes(engine, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ i, ++#else ++ engine ++ .getIOTensorName( ++ i), ++#endif ++ 1))); ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ context->setTensorAddress(engine.getIOTensorName(i), buffers[i]); ++#endif + } + + cudaStream_t stream; +@@ -213,17 +251,26 @@ void Execute(nvinfer1::IExecutionContext* context, const float* input1, + // Note that since the host buffer was not created as pinned memory, these + // async copies are turned into sync copies. So the following synchronization + // could be removed. +- ASSERT_EQ(0, cudaMemcpyAsync(buffers[input_index1], input1, sizeof(float), ++ ASSERT_EQ(0, cudaMemcpyAsync(buffers[0], input1, sizeof(float), + cudaMemcpyHostToDevice, stream)); +- ASSERT_EQ(0, cudaMemcpyAsync(buffers[input_index2], input2, sizeof(float), ++ ASSERT_EQ(0, cudaMemcpyAsync(buffers[1], input2, sizeof(float), + cudaMemcpyHostToDevice, stream)); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + context->enqueueV2(buffers.data(), stream, nullptr); +- ASSERT_EQ(0, cudaMemcpyAsync(output1, buffers[output_index1], sizeof(float), ++#else ++ context->enqueueV3(stream); ++#endif ++ ASSERT_EQ(0, cudaMemcpyAsync(output1, buffers[2], sizeof(float), ++ cudaMemcpyDeviceToHost, stream)); ++ ASSERT_EQ(0, cudaMemcpyAsync(output2, buffers[3], ++ GetBindingSizeBytes(engine, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ output_index2, ++#else ++ kOutputTensor2, ++#endif ++ 1), + cudaMemcpyDeviceToHost, stream)); +- ASSERT_EQ( +- 0, cudaMemcpyAsync(output2, buffers[output_index2], +- GetBindingSizeBytes(engine, output_index2, 1), +- cudaMemcpyDeviceToHost, stream)); + cudaStreamSynchronize(stream); + + // Release the stream and the buffers +@@ -253,8 +300,13 @@ TEST(TensorrtTest, BasicFunctions) { + Logger& logger = *Logger::GetLogger(); + TrtUniquePtrType runtime( + nvinfer1::createInferRuntime(logger)); +- TrtUniquePtrType engine( +- runtime->deserializeCudaEngine(model->data(), model->size(), nullptr)); ++ TrtUniquePtrType engine(runtime->deserializeCudaEngine( ++ model->data(), model->size() ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ , ++ nullptr ++#endif ++ )); + TrtUniquePtrType context( + engine->createExecutionContext()); + +@@ -262,11 +314,25 @@ TEST(TensorrtTest, BasicFunctions) { + float input1 = 1234; + float input2 = 567; + +- std::vector output1( +- GetBindingSizeBytes(*engine, 2, 1) / sizeof(float), 0.0f); ++ std::vector output1(GetBindingSizeBytes(*engine, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ 2, ++#else ++ kOutputTensor1, ++#endif ++ 1) / ++ sizeof(float), ++ 0.0f); + +- std::vector output2( +- GetBindingSizeBytes(*engine, 3, 1) / sizeof(int32), 0.0f); ++ std::vector output2(GetBindingSizeBytes(*engine, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ 3, ++#else ++ kOutputTensor2, ++#endif ++ 1) / ++ sizeof(int32), ++ 0.0f); + + ASSERT_EQ(output1.size(), 1); + ASSERT_EQ(output2.size(), 1); +diff --git a/tensorflow/compiler/tf2tensorrt/trt_convert_api_test.cc b/tensorflow/compiler/tf2tensorrt/trt_convert_api_test.cc +index 74415d85686..10264da8e98 100644 +--- a/tensorflow/compiler/tf2tensorrt/trt_convert_api_test.cc ++++ b/tensorflow/compiler/tf2tensorrt/trt_convert_api_test.cc +@@ -297,6 +297,7 @@ INSTANTIATE_TEST_CASE_P( + true // convert_to_static_engine + }, + {{1, 2}, {4, 2}}}, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + // Implicit batch mode test with conver_to_static_engine=true. + TestParam{TfTrtConversionParams{ + 1 << 20, // max workspace size +@@ -310,6 +311,7 @@ INSTANTIATE_TEST_CASE_P( + true // convert_to_static_engine + }, + {{1, 2}}}, ++#endif // !IS_TRT_VERSION_GE(10, 0, 0, 0) + // Dynamic shape mode test convert_to_static_engine=false: we cannot + // save the engines, therefore we do not generate profiles. A single + // engine will be built during runtime, with profile that matches +@@ -326,7 +328,9 @@ INSTANTIATE_TEST_CASE_P( + true, // allow_build_at_runtime + false // convert_to_static_engine + }, +- {{1, 2}, {4, 2}}}, ++ {{1, 2}, {4, 2}}} ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ , + // Implicit batch mode test with convert_to_static_engine=false. + // We will have two engines in the cache to handle the two shapes. + TestParam{TfTrtConversionParams{ +@@ -340,7 +344,9 @@ INSTANTIATE_TEST_CASE_P( + true, // allow_build_at_runtime + false // convert_to_static_engine + }, +- {{1, 2}, {4, 2}}}), ++ {{1, 2}, {4, 2}}} ++#endif // !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ ), + ::testing::Values(false, true), // use_variables + ::testing::Values(false, true))); // use_function + +diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_allocator.cc b/tensorflow/compiler/tf2tensorrt/utils/trt_allocator.cc +index 832154940f3..ad3da4fbe3e 100644 +--- a/tensorflow/compiler/tf2tensorrt/utils/trt_allocator.cc ++++ b/tensorflow/compiler/tf2tensorrt/utils/trt_allocator.cc +@@ -95,7 +95,11 @@ TRTDeviceAllocator::TRTDeviceAllocator(Allocator* allocator) + VLOG(1) << "Using " << allocator->Name() << " allocator from TensorFlow"; + } + ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + void TRTDeviceAllocator::free(void* memory) noexcept { ++#else ++bool TRTDeviceAllocator::deallocate(void* memory) noexcept { ++#endif + mutex_lock lock(mu_); + VLOG(2) << "Deallocating @ " << memory; + // allocated memory adjusted for alignment, restore the original pointer +@@ -107,6 +111,9 @@ void TRTDeviceAllocator::free(void* memory) noexcept { + } + allocator_->DeallocateRaw(memory); + } ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ return true; ++#endif + } + + } // namespace tensorrt +diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_allocator.h b/tensorflow/compiler/tf2tensorrt/utils/trt_allocator.h +index 2812aa06457..3beaf368e68 100644 +--- a/tensorflow/compiler/tf2tensorrt/utils/trt_allocator.h ++++ b/tensorflow/compiler/tf2tensorrt/utils/trt_allocator.h +@@ -18,6 +18,7 @@ limitations under the License. + + #include + ++#include "tensorflow/compiler/tf2tensorrt/common/utils.h" + #include "tensorflow/core/framework/allocator.h" + #include "tensorflow/core/platform/mutex.h" + +@@ -56,7 +57,11 @@ class TRTDeviceAllocator : public TRTBaseAllocator { + } + void* allocate(uint64_t size, uint64_t alignment, + uint32_t flags) noexcept override; ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + void free(void* memory) noexcept override; ++#else ++ bool deallocate(void* memory) noexcept override; ++#endif + + private: + mutex mu_; +diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_engine_utils.cc b/tensorflow/compiler/tf2tensorrt/utils/trt_engine_utils.cc +index 798ebd8bd0c..38ea076fe5b 100755 +--- a/tensorflow/compiler/tf2tensorrt/utils/trt_engine_utils.cc ++++ b/tensorflow/compiler/tf2tensorrt/utils/trt_engine_utils.cc +@@ -38,9 +38,16 @@ using absl::StrCat; + + ExecutionContext ExecutionContext::Create(nvinfer1::ICudaEngine* cuda_engine) { + bool has_int32_output = false; ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + for (int i = 0; i < cuda_engine->getNbBindings(); i++) { + if (!cuda_engine->bindingIsInput(i) && + cuda_engine->getBindingDataType(i) == nvinfer1::DataType::kINT32) { ++#else ++ for (int i = 0; i < cuda_engine->getNbIOTensors(); i++) { ++ const char* tensor_name = cuda_engine->getIOTensorName(i); ++ if (cuda_engine->getTensorIOMode(tensor_name) == nvinfer1::TensorIOMode::kOUTPUT && ++ cuda_engine->getTensorDataType(tensor_name) == nvinfer1::DataType::kINT32) { ++#endif + has_int32_output = true; + break; + } +@@ -59,14 +66,24 @@ ExecutionContext ExecutionContext::Create(nvinfer1::ICudaEngine* cuda_engine) { + + Status GetTrtBindingShape(const nvinfer1::ICudaEngine* cuda_engine, + const nvinfer1::IExecutionContext* execution_context, +- int binding_index, bool use_implicit_batch, +- int batch_size, TensorShape& shape) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ int binding_index, ++#else ++ const char* tensor_name, ++#endif ++ bool use_implicit_batch, int batch_size, ++ TensorShape& shape) { + tensorflow::profiler::TraceMe activity( + "getBindingDimensions", tensorflow::profiler::TraceMeLevel::kInfo); + nvinfer1::Dims dims = + use_implicit_batch ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + ? cuda_engine->getBindingDimensions(binding_index) + : execution_context->getBindingDimensions(binding_index); ++#else ++ ? cuda_engine->getTensorShape(tensor_name) ++ : execution_context->getTensorShape(tensor_name); ++#endif + if (!use_implicit_batch) { + if (dims.nbDims == -1) { + return errors::Internal( +@@ -80,39 +97,83 @@ Status GetTrtBindingShape(const nvinfer1::ICudaEngine* cuda_engine, + return OkStatus(); + } + ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + Status SetupBindings(nvinfer1::ICudaEngine* cuda_engine, const Tensor& tensor, + std::vector& buffers, int binding_index) { ++#else ++Status SetupBindings(nvinfer1::ICudaEngine* cuda_engine, ++ nvinfer1::IExecutionContext* execution_context, ++ const Tensor& tensor, const char* tensor_name) { ++#endif + tensorflow::profiler::TraceMe activity( + "SetBindingPointers", tensorflow::profiler::TraceMeLevel::kInfo); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + const auto dtype = cuda_engine->getBindingDataType(binding_index); ++#else ++ const auto dtype = cuda_engine->getTensorDataType(tensor_name); ++#endif + VLOG(2) << "<<<<<<<<< SetupBindings with dtype = " << (int)dtype; + switch (dtype) { + case nvinfer1::DataType::kFLOAT: ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + buffers[binding_index] = const_cast(tensor.flat().data()); ++#else ++ execution_context->setTensorAddress( ++ tensor_name, const_cast(tensor.flat().data())); ++#endif + break; + case nvinfer1::DataType::kHALF: ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + buffers[binding_index] = + const_cast(tensor.flat().data()); ++#else ++ execution_context->setTensorAddress( ++ tensor_name, ++ const_cast(tensor.flat().data())); ++#endif + break; + case nvinfer1::DataType::kINT8: + return errors::Internal("INT8 inputs are not supported yet!"); + case nvinfer1::DataType::kINT32: ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + buffers[binding_index] = const_cast(tensor.flat().data()); ++#else ++ execution_context->setTensorAddress( ++ tensor_name, const_cast(tensor.flat().data())); ++#endif + break; + #if IS_TRT_VERSION_GE(8, 2, 0, 0) + case nvinfer1::DataType::kBOOL: ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + buffers[binding_index] = const_cast(tensor.flat().data()); ++#else ++ execution_context->setTensorAddress( ++ tensor_name, const_cast(tensor.flat().data())); ++#endif + break; + #endif + #if IS_TRT_VERSION_GE(8, 5, 0, 0) + case nvinfer1::DataType::kUINT8: ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + buffers[binding_index] = const_cast(tensor.flat().data()); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ execution_context->setTensorAddress( ++ tensor_name, const_cast(tensor.flat().data())); ++#endif // !IS_TRT_VERSION_GE(10, 0, 0, 0) + break; + #endif + #if IS_TRT_VERSION_GE(8, 6, 0, 0) + case nvinfer1::DataType::kFP8: + return errors::Internal("FP8 inputs are not supported yet!"); + #endif ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ case nvinfer1::DataType::kBF16: ++ return errors::Internal("BF16 inputs are not supported yet!"); ++ case nvinfer1::DataType::kINT64: ++ return errors::Internal("INT64 inputs are not supported yet!"); ++ case nvinfer1::DataType::kINT4: ++ return errors::Internal("INT4 inputs are not supported yet!"); ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + default: + return errors::Internal("Unknown TRT data type: ", + static_cast(dtype)); +@@ -124,8 +185,10 @@ Status SetupBindings(nvinfer1::ICudaEngine* cuda_engine, const Tensor& tensor, + Status SetTrtEngineInputs(nvinfer1::ICudaEngine* cuda_engine, + nvinfer1::IExecutionContext* execution_context, + const int trt_profile_idx, +- std::vector& buffers, bool use_implicit_batch, +- int num_batch, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ std::vector& buffers, ++#endif ++ bool use_implicit_batch, int num_batch, + const TrtShapeOptimizationProfile& profiles, + OpKernelContext* ctx, const DataVec* input_vec) { + tensorflow::profiler::TraceMe activity( +@@ -143,6 +206,7 @@ Status SetTrtEngineInputs(nvinfer1::ICudaEngine* cuda_engine, + + const string input_name = + ctx ? StrCat(IONamePrefixes::kInputPHName, i) : input_vec->at(i).name; ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + int binding_index; + Status status = GetTrtBindingIndex(input_name.c_str(), trt_profile_idx, + cuda_engine, &binding_index); +@@ -155,6 +219,7 @@ Status SetTrtEngineInputs(nvinfer1::ICudaEngine* cuda_engine, + VLOG(2) << "Skipping pruned input " << input_name; + continue; + } ++#endif // !IS_TRT_VERSION_GE(10, 0, 0, 0) + + if (use_implicit_batch && ctx) { + // Ensure all inputs have the same batch size +@@ -168,16 +233,28 @@ Status SetTrtEngineInputs(nvinfer1::ICudaEngine* cuda_engine, + // Set known input dimensions. This is necessary because TRT network + // could be made with dynamic dimensions. + if (!use_implicit_batch) { +- TF_RETURN_IF_ERROR(profiles.SetInputShapeBinding( +- i, binding_index, cuda_engine, execution_context)); ++ TF_RETURN_IF_ERROR(profiles.SetInputShapeBinding(i, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ binding_index, ++#else ++ input_name.c_str(), ++#endif ++ cuda_engine, ++ execution_context)); + +- if (cuda_engine->isExecutionBinding(binding_index)) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ if (cuda_engine->isExecutionBinding(binding_index)) ++#else ++ if (true) ++#endif ++ { + tensorflow::profiler::TraceMe activity( + "SetTrtEngineInputs::setBindingDimensions", + tensorflow::profiler::TraceMeLevel::kInfo); + auto adap = DimsAdapter::Create(input_shape); + TRT_ENSURE_OK(adap); + nvinfer1::Dims trt_dims = adap->AsTrtDims(); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + if (execution_context->getBindingDimensions(binding_index) != + trt_dims) { + VLOG(2) << "Setting binding dimensions for idx " << binding_index; +@@ -190,11 +267,30 @@ Status SetTrtEngineInputs(nvinfer1::ICudaEngine* cuda_engine, + "Binding dimension does not fit selected profile."); + } + } ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ if (execution_context->getTensorShape(input_name.c_str()) != trt_dims) { ++ VLOG(2) << "Setting binding dimensions for input " << input_name; ++ bool ret = ++ execution_context->setInputShape(input_name.c_str(), trt_dims); ++ if (!ret) { ++ VLOG(2) << "Error setting engine input " << input_name << " " ++ << DebugString(trt_dims); ++ return errors::Internal( ++ "Binding dimension does not fit selected profile."); ++ } ++ } ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + } + } + // Setup input bindings. + TF_RETURN_IF_ERROR( +- SetupBindings(cuda_engine, input_tensor, buffers, binding_index)); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ SetupBindings(cuda_engine, input_tensor, buffers, binding_index) ++#else ++ SetupBindings(cuda_engine, execution_context, input_tensor, ++ input_name.c_str()) ++#endif ++ ); + } + + // Ensure all network dynamic dimensions (if any) are set in execution +@@ -212,7 +308,10 @@ Status SetTrtEngineInputs(nvinfer1::ICudaEngine* cuda_engine, + + Status SetTrtEngineOutputs(nvinfer1::ICudaEngine* cuda_engine, + nvinfer1::IExecutionContext* execution_context, +- int trt_profile_idx, std::vector& buffers, ++ int trt_profile_idx, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ std::vector& buffers, ++#endif + bool use_implicit_batch, int batch_size, + OpKernelContext* ctx, DataVec* outputs) { + tensorflow::profiler::TraceMe activity( +@@ -222,15 +321,22 @@ Status SetTrtEngineOutputs(nvinfer1::ICudaEngine* cuda_engine, + for (int i = 0; i < n_outputs; i++) { + const string output_name = + ctx ? StrCat(IONamePrefixes::kOutputPHName, i) : outputs->at(i).name; ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + int binding_index; + TF_RETURN_IF_ERROR(GetTrtBindingIndex(output_name.c_str(), trt_profile_idx, + cuda_engine, &binding_index)); ++#endif + + // Get TRT output shapes for allocating output memory. + TensorShape output_shape; + TF_RETURN_IF_ERROR(GetTrtBindingShape(cuda_engine, execution_context, +- binding_index, use_implicit_batch, +- batch_size, output_shape)); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ binding_index, ++#else ++ output_name.c_str(), ++#endif ++ use_implicit_batch, batch_size, ++ output_shape)); + + // Allocate output tensor of TRTEngineOp. + Tensor* output_tensor = nullptr; +@@ -255,23 +361,40 @@ Status SetTrtEngineOutputs(nvinfer1::ICudaEngine* cuda_engine, + + // Set up output bindings. + TF_RETURN_IF_ERROR( ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + SetupBindings(cuda_engine, *output_tensor, buffers, binding_index)); ++#else ++ SetupBindings(cuda_engine, execution_context, *output_tensor, ++ output_name.c_str())); ++#endif + } + return OkStatus(); + } + + Status TrtEnqueue(nvinfer1::IExecutionContext* execution_context, +- std::vector& buffers, cudaStream_t stream, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ std::vector& buffers, ++#endif ++ cudaStream_t stream, + bool use_implicit_batch, int batch_size) { + tensorflow::profiler::TraceMe activity( + "TrtEnqueue", tensorflow::profiler::TraceMeLevel::kInfo); + bool ret = false; + if (use_implicit_batch) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + ret = execution_context->enqueue(batch_size, &buffers[0], stream, nullptr); + VLOG(1) << "Called IExecutionContext::enqueue"; ++#else ++ return errors::Internal("Implicit batch is not supported with TensorRT >=10"); ++#endif + } else { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + ret = execution_context->enqueueV2(&buffers[0], stream, nullptr); + VLOG(1) << "Called IExecutionContext::enqueueV2"; ++#else ++ ret = execution_context->enqueueV3(stream); ++ VLOG(1) << "Called IExecutionContext::enqueueV3"; ++#endif + } + if (!ret) { + return errors::Internal("Failed to enqueue batch for TRT engine"); +diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_engine_utils.h b/tensorflow/compiler/tf2tensorrt/utils/trt_engine_utils.h +index b0935afb5b2..0b0293f02fe 100644 +--- a/tensorflow/compiler/tf2tensorrt/utils/trt_engine_utils.h ++++ b/tensorflow/compiler/tf2tensorrt/utils/trt_engine_utils.h +@@ -42,7 +42,10 @@ ExecutionContext CreateExecutionContext(nvinfer1::ICudaEngine* cuda_engine); + Status SetTrtEngineInputs(nvinfer1::ICudaEngine* cuda_engine, + nvinfer1::IExecutionContext* execution_context, + const int trt_profile_idx, +- std::vector& buffers, bool use_implicit_batch, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ std::vector& buffers, ++#endif ++ bool use_implicit_batch, + int num_batch, + const TrtShapeOptimizationProfile& profiles, + OpKernelContext* ctx = nullptr, +@@ -63,7 +66,10 @@ Status GetTrtBindingShape(const nvinfer1::ICudaEngine* cuda_engine, + // the Tensors in outputs are already allocated. + Status SetTrtEngineOutputs(nvinfer1::ICudaEngine* cuda_engine, + nvinfer1::IExecutionContext* execution_context, +- int trt_profile_idx, std::vector& buffers, ++ int trt_profile_idx, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ std::vector& buffers, ++#endif + bool use_implicit_batch, int batch_size = 0, + OpKernelContext* ctx = nullptr, + DataVec* outputs = nullptr); +@@ -71,7 +77,10 @@ Status SetTrtEngineOutputs(nvinfer1::ICudaEngine* cuda_engine, + // Enqueues TensorRT inference job. The batch_size argument is only relevant in + // implicit batch mode. + Status TrtEnqueue(nvinfer1::IExecutionContext* execution_context, +- std::vector& buffers, cudaStream_t stream, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ std::vector& buffers, ++#endif ++ cudaStream_t stream, + bool use_implicit_batch, int batch_size = 1); + + } // namespace tensorrt +diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_lru_cache.h b/tensorflow/compiler/tf2tensorrt/utils/trt_lru_cache.h +index 31c3b9c9a90..b4269d24070 100644 +--- a/tensorflow/compiler/tf2tensorrt/utils/trt_lru_cache.h ++++ b/tensorflow/compiler/tf2tensorrt/utils/trt_lru_cache.h +@@ -120,16 +120,19 @@ class LRUCache { + + struct EngineContext { + EngineContext() {} // Creates an empty context. +- EngineContext(TrtUniquePtrType&& cuda_engine, ++ EngineContext(TrtUniquePtrType runtime, ++ TrtUniquePtrType&& cuda_engine, + ExecutionContext&& execution_context) +- : cuda_engine_(std::move(cuda_engine)) { ++ : runtime_(std::move(runtime)), cuda_engine_(std::move(cuda_engine)) { + execution_contexts.push_back(std::move(execution_context)); + device_memory_size_ = + cuda_engine_ ? cuda_engine_->getDeviceMemorySize() : 0; + } +- EngineContext(TrtUniquePtrType&& cuda_engine, ++ EngineContext(TrtUniquePtrType runtime, ++ TrtUniquePtrType&& cuda_engine, + std::vector&& execution_contexts) +- : cuda_engine_(std::move(cuda_engine)), ++ : runtime_(std::move(runtime)), ++ cuda_engine_(std::move(cuda_engine)), + execution_contexts(std::move(execution_contexts)) { + device_memory_size_ = + cuda_engine_ ? cuda_engine_->getDeviceMemorySize() : 0; +@@ -137,6 +140,8 @@ struct EngineContext { + + mutex mu; + ++ nvinfer1::IRuntime* GetRuntime() { return runtime_.get(); } ++ + nvinfer1::ICudaEngine* GetCudaEngine() { return cuda_engine_.get(); } + + Status GetExecutionContext(int idx, nvinfer1::IExecutionContext** exec_ctx, +@@ -160,6 +165,8 @@ struct EngineContext { + size_t GetDeviceMemorySize() { return device_memory_size_; } + + private: ++ // Note: Must out-live the engine object. ++ TrtUniquePtrType runtime_; + // Note: declaration has to come before execution_contexts, to ensure proper + // order of destruction. + TrtUniquePtrType cuda_engine_; +diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_shape_optimization_profiles.cc b/tensorflow/compiler/tf2tensorrt/utils/trt_shape_optimization_profiles.cc +index 57b222826b1..73b38c7032d 100644 +--- a/tensorflow/compiler/tf2tensorrt/utils/trt_shape_optimization_profiles.cc ++++ b/tensorflow/compiler/tf2tensorrt/utils/trt_shape_optimization_profiles.cc +@@ -431,12 +431,16 @@ void TrtShapeOptimizationProfile::SetShapeTensorMask( + const nvinfer1::ICudaEngine* engine, int n_inputs) { + is_shape_tensor_.resize(n_inputs, false); + for (int i = 0; i < n_inputs; i++) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + int binding_index; + Status status = GetTrtBindingIndex(i, 0, engine, &binding_index); + if (!status.ok()) { + continue; + } + is_shape_tensor_[i] = engine->isShapeBinding(binding_index); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ is_shape_tensor_[i] = engine->isShapeInferenceIO(GetTrtInputName(i).c_str()); ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + if (is_shape_tensor_[i]) { + VLOG(2) << "Found shape tensor at " << i; + } +@@ -516,7 +520,11 @@ Status TrtShapeOptimizationProfile::CreateExecutionContexts( + // set optimizationprofiles. + // - The 0th profile is set implicitly for the first execution context + // therefore we do not need to set. ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + if (!context->setOptimizationProfile(i)) { ++#else ++ if (!context->setOptimizationProfileAsync(i, /*stream=*/0)) { ++#endif + return errors::Internal("Could not set TRT optimization profile."); + } + } +@@ -528,24 +536,47 @@ Status TrtShapeOptimizationProfile::CreateExecutionContexts( + } + + Status TrtShapeOptimizationProfile::SetInputShapeBinding( +- int input_index, int binding_index, nvinfer1::ICudaEngine* cuda_engine, ++ int input_index, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ int binding_index, ++#else ++ const char* tensor_name, ++#endif ++ nvinfer1::ICudaEngine* cuda_engine, + nvinfer1::IExecutionContext* exec_context) const { + tensorflow::profiler::TraceMe activity( + "TrtShapeOptimizationProfile::SetInputShapeBinding", + tensorflow::profiler::TraceMeLevel::kInfo); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + if (cuda_engine->isShapeBinding(binding_index)) { ++#else ++ if (cuda_engine->isShapeInferenceIO(tensor_name)) { ++#endif + // Input shape binding data has to be in host memory. That is the reason + // we can't use input_tensor.flat().data(). which contains the same + // values in device memory. Instead, we use data that was copied to host + // by CollectShapeValues. ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + VLOG(2) << "Setting input shape binding for idx " << binding_index ++#else ++ VLOG(2) << "Setting input shape binding for IO tensor " << tensor_name ++#endif + << ", with values " + << DebugString(actual_shape_values_.at(input_index)); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + bool ret = exec_context->setInputShapeBinding( + binding_index, actual_shape_values_.at(input_index).d); ++#else ++ bool ret = exec_context->setInputTensorAddress( ++ tensor_name, actual_shape_values_.at(input_index).d); ++#endif + if (!ret) { +- return errors::Internal("Could not set input shape binding for idx ", +- binding_index); ++ return errors::Internal( ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ "Could not set input shape binding for idx ", binding_index); ++#else ++ "Could not set input shape binding for tensor ", tensor_name); ++#endif + } + } + return OkStatus(); +@@ -553,16 +584,37 @@ Status TrtShapeOptimizationProfile::SetInputShapeBinding( + + // If binding_idx is a shape tensor, then returns the associated min/max/opt + // shape values from prof_idx. +-nvinfer1::Dims GetDimsFromShapeVal(int prof_idx, int binding_idx, ++nvinfer1::Dims GetDimsFromShapeVal(int prof_idx, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ int binding_idx, ++#else ++ const char* tensor_name, ++#endif + nvinfer1::OptProfileSelector selector, + const nvinfer1::ICudaEngine* engine) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + if (engine->isShapeBinding(binding_idx)) { ++#else ++ if (engine->isShapeInferenceIO(tensor_name)) { ++#endif + const int32* shape_val_ptr = ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + engine->getProfileShapeValues(binding_idx, prof_idx, selector); ++#else ++ engine->getProfileTensorValues(tensor_name, prof_idx, selector); ++#endif + if (shape_val_ptr) { + VLOG(2) << "Found shape value in prof " << prof_idx << ", binding " ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + << binding_idx; ++#else ++ << tensor_name; ++#endif ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + nvinfer1::Dims dims = engine->getBindingDimensions(binding_idx); ++#else ++ nvinfer1::Dims dims = engine->getTensorShape(tensor_name); ++#endif + // nbDims == 0 represent scalar, -1 represents invalid dim + int n_values = (dims.nbDims == 0) ? 1 : dims.d[0]; + if (n_values > 0) { +@@ -580,6 +632,7 @@ Status TrtShapeOptimizationProfile::SetPrunedMask( + is_pruned_input_.resize(n_network_inputs); + absl::c_fill(is_pruned_input_, false); + for (int j = 0; j < n_network_inputs; j++) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + int binding_idx; + Status status = GetTrtBindingIndex(j, 0, engine, &binding_idx); + if (!status.ok()) { +@@ -590,6 +643,13 @@ Status TrtShapeOptimizationProfile::SetPrunedMask( + VLOG(2) << "Skipping pruned input " << j; + continue; + } ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ if (engine->getTensorIOMode(GetTrtInputName(j).c_str()) == ++ nvinfer1::TensorIOMode::kNONE) { ++ is_pruned_input_[j] = true; ++ VLOG(2) << "Skipping pruned input " << j; ++ } ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + } + return OkStatus(); + } +@@ -601,10 +661,12 @@ Status TrtShapeOptimizationProfile::RestoreProfiles( + // We do not need to restore profiles for an empty engine. + return OkStatus(); + } ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + if (engine->hasImplicitBatchDimension()) { + // Nothing to do, we cannot have profiles in implicit batch mode. + return OkStatus(); + } ++#endif + int n_profiles = engine->getNbOptimizationProfiles(); + need_profiles_ = n_profiles > 0; + int n_inputs = GetNumberOfEngineInputs(engine); +@@ -626,6 +688,7 @@ Status TrtShapeOptimizationProfile::RestoreProfiles( + // restore shape values + for (int j = 0; j < n_network_inputs; j++) { + if (is_pruned_input_[j]) continue; ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + int binding_idx; + TF_RETURN_IF_ERROR(GetTrtBindingIndex(j, 0, engine, &binding_idx)); + +@@ -635,16 +698,36 @@ Status TrtShapeOptimizationProfile::RestoreProfiles( + binding_idx, prof_idx, nvinfer1::OptProfileSelector::kMAX); + nvinfer1::Dims opt = engine->getProfileDimensions( + binding_idx, prof_idx, nvinfer1::OptProfileSelector::kOPT); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ string tensor_name = GetTrtInputName(j); ++ ++ nvinfer1::Dims min = engine->getProfileShape( ++ tensor_name.c_str(), prof_idx, nvinfer1::OptProfileSelector::kMIN); ++ nvinfer1::Dims max = engine->getProfileShape( ++ tensor_name.c_str(), prof_idx, nvinfer1::OptProfileSelector::kMAX); ++ nvinfer1::Dims opt = engine->getProfileShape( ++ tensor_name.c_str(), prof_idx, nvinfer1::OptProfileSelector::kOPT); ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ + cfg.min[j] = min; + cfg.max[j] = max; + cfg.opt[j] = opt; + ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + cfg.min[j + n_inputs] = GetDimsFromShapeVal( + prof_idx, binding_idx, nvinfer1::OptProfileSelector::kMIN, engine); + cfg.max[j + n_inputs] = GetDimsFromShapeVal( + prof_idx, binding_idx, nvinfer1::OptProfileSelector::kMAX, engine); + cfg.opt[j + n_inputs] = GetDimsFromShapeVal( + prof_idx, binding_idx, nvinfer1::OptProfileSelector::kOPT, engine); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ cfg.min[j + n_inputs] = GetDimsFromShapeVal( ++ prof_idx, tensor_name.c_str(), nvinfer1::OptProfileSelector::kMIN, engine); ++ cfg.max[j + n_inputs] = GetDimsFromShapeVal( ++ prof_idx, tensor_name.c_str(), nvinfer1::OptProfileSelector::kMAX, engine); ++ cfg.opt[j + n_inputs] = GetDimsFromShapeVal( ++ prof_idx, tensor_name.c_str(), nvinfer1::OptProfileSelector::kOPT, engine); ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + } + VLOG(2) << "Restored profile " << cfg.DebugString(); + profiles_.push_back(std::move(cfg)); +diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_shape_optimization_profiles.h b/tensorflow/compiler/tf2tensorrt/utils/trt_shape_optimization_profiles.h +index e2d8fdb655b..7d556c34d2e 100644 +--- a/tensorflow/compiler/tf2tensorrt/utils/trt_shape_optimization_profiles.h ++++ b/tensorflow/compiler/tf2tensorrt/utils/trt_shape_optimization_profiles.h +@@ -111,12 +111,48 @@ struct OptimizationProfileConfig { + int idx = i + n_inputs_tf; + VLOG(2) << "Setting shape values for " << name << ", " + << ::tensorflow::tensorrt::DebugString(opt[idx]); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + profile->setShapeValues(name, nvinfer1::OptProfileSelector::kMIN, + min[idx].d, min[idx].nbDims); + profile->setShapeValues(name, nvinfer1::OptProfileSelector::kOPT, + opt[idx].d, opt[idx].nbDims); + profile->setShapeValues(name, nvinfer1::OptProfileSelector::kMAX, + max[idx].d, max[idx].nbDims); ++#else ++ std::vector vals32; ++ vals32.resize(min[idx].nbDims); ++ for (int dim = 0; dim < min[idx].nbDims; ++dim) { ++ vals32[dim] = static_cast(min[idx].d[dim]); ++ if (vals32[dim] != min[idx].d[dim]) { ++ return errors::Internal("min value does not fit in int32: ", ++ min[idx].d[dim]); ++ } ++ } ++ profile->setShapeValues(name, nvinfer1::OptProfileSelector::kMIN, ++ vals32.data(), min[idx].nbDims); ++ ++ vals32.resize(opt[idx].nbDims); ++ for (int dim = 0; dim < opt[idx].nbDims; ++dim) { ++ vals32[dim] = static_cast(opt[idx].d[dim]); ++ if (vals32[dim] != opt[idx].d[dim]) { ++ return errors::Internal("opt value does not fit in int32: ", ++ opt[idx].d[dim]); ++ } ++ } ++ profile->setShapeValues(name, nvinfer1::OptProfileSelector::kOPT, ++ vals32.data(), opt[idx].nbDims); ++ ++ vals32.resize(max[idx].nbDims); ++ for (int dim = 0; dim < max[idx].nbDims; ++dim) { ++ vals32[dim] = static_cast(max[idx].d[dim]); ++ if (vals32[dim] != max[idx].d[dim]) { ++ return errors::Internal("max value does not fit in int32: ", ++ max[idx].d[dim]); ++ } ++ } ++ profile->setShapeValues(name, nvinfer1::OptProfileSelector::kMAX, ++ vals32.data(), max[idx].nbDims); ++#endif + } + VLOG(2) << "Setting input dimensions for " << name << ", " + << ::tensorflow::tensorrt::DebugString(opt[i]); +@@ -241,7 +277,12 @@ class TrtShapeOptimizationProfile { + Status CreateExecutionContexts(nvinfer1::ICudaEngine* engine, + std::vector* exec_contexts); + +- Status SetInputShapeBinding(int input_index, int binding_index, ++ Status SetInputShapeBinding(int input_index, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ int binding_index, ++#else ++ const char* tensor_name, ++#endif + nvinfer1::ICudaEngine* cuda_engine, + nvinfer1::IExecutionContext* exec_context) const; + +diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_shape_optimization_profiles_test.cc b/tensorflow/compiler/tf2tensorrt/utils/trt_shape_optimization_profiles_test.cc +index 87e17a9fc3f..a4d53b683e4 100644 +--- a/tensorflow/compiler/tf2tensorrt/utils/trt_shape_optimization_profiles_test.cc ++++ b/tensorflow/compiler/tf2tensorrt/utils/trt_shape_optimization_profiles_test.cc +@@ -77,13 +77,21 @@ class TrtShapeOptimizationProfileTest + protected: + TrtShapeOptimizationProfileTest() { + strategy_ = GetParam(); ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ runtime_.reset(nvinfer1::createInferRuntime(logger_)); ++#endif + builder_ = TrtUniquePtrType( + nvinfer1::createInferBuilder(logger_)); + network_ = TrtUniquePtrType( + builder_->createNetworkV2(flags_)); + builder_config_ = TrtUniquePtrType( + builder_->createBuilderConfig()); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + builder_config_->setMaxWorkspaceSize(1 << 10); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ builder_config_->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, ++ 1 << 10); ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + } + + // Defines a simple network: output = input1 + input2. +@@ -117,12 +125,24 @@ class TrtShapeOptimizationProfileTest + int prof_idx = exec_contexts_[idx]->getOptimizationProfile(); + ASSERT_GE(prof_idx, 0); + for (int j = 0; j < dimvec.size(); j++) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + nvinfer1::Dims min = engine->getProfileDimensions( + j, prof_idx, nvinfer1::OptProfileSelector::kMIN); + nvinfer1::Dims max = engine->getProfileDimensions( + j, prof_idx, nvinfer1::OptProfileSelector::kMAX); + nvinfer1::Dims opt = engine->getProfileDimensions( + j, prof_idx, nvinfer1::OptProfileSelector::kOPT); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ nvinfer1::Dims min = ++ engine->getProfileShape(engine->getIOTensorName(j), prof_idx, ++ nvinfer1::OptProfileSelector::kMIN); ++ nvinfer1::Dims max = ++ engine->getProfileShape(engine->getIOTensorName(j), prof_idx, ++ nvinfer1::OptProfileSelector::kMAX); ++ nvinfer1::Dims opt = ++ engine->getProfileShape(engine->getIOTensorName(j), prof_idx, ++ nvinfer1::OptProfileSelector::kOPT); ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + + // This should always hold. + EXPECT_TRUE(DimsContained(dimvec[j], min, max)); +@@ -135,6 +155,7 @@ class TrtShapeOptimizationProfileTest + } + + Logger& logger_ = *Logger::GetLogger(); ++ TrtUniquePtrType runtime_; + TrtUniquePtrType builder_; + TrtUniquePtrType network_; + TrtUniquePtrType builder_config_; +@@ -168,8 +189,16 @@ TEST_P(TrtShapeOptimizationProfileTest, Static) { + TF_CHECK_OK(profile.ConfigureBuilder(builder_.get(), builder_config_.get(), + network_.get())); + ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + engine = TrtUniquePtrType( + builder_->buildEngineWithConfig(*network_, *builder_config_)); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ TrtUniquePtrType serialized( ++ builder_->buildSerializedNetwork(*network_, *builder_config_)); ++ engine.reset( ++ runtime_->deserializeCudaEngine(serialized->data(), serialized->size())); ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ + EXPECT_NE(nullptr, engine); + TF_CHECK_OK(profile.CreateExecutionContexts(engine.get(), &exec_contexts_)); + // A single execution context should be created for a graph with static input. +@@ -213,8 +242,16 @@ TEST_P(TrtShapeOptimizationProfileTest, Dynamic) { + // Configure and build engine. + TF_CHECK_OK(profile.ConfigureBuilder(builder_.get(), builder_config_.get(), + network_.get())); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + engine = TrtUniquePtrType( + builder_->buildEngineWithConfig(*network_.get(), *builder_config_.get())); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ TrtUniquePtrType serialized( ++ builder_->buildSerializedNetwork(*network_.get(), ++ *builder_config_.get())); ++ engine.reset( ++ runtime_->deserializeCudaEngine(serialized->data(), serialized->size())); ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + ASSERT_NE(nullptr, engine); + + TF_CHECK_OK(profile.CreateExecutionContexts(engine.get(), &exec_contexts_)); +diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_testutils.h b/tensorflow/compiler/tf2tensorrt/utils/trt_testutils.h +index e0b9a0366a5..bbbe9512a5c 100644 +--- a/tensorflow/compiler/tf2tensorrt/utils/trt_testutils.h ++++ b/tensorflow/compiler/tf2tensorrt/utils/trt_testutils.h +@@ -83,7 +83,7 @@ nvinfer1::Dims CreateDims(const std::vector& d); + // matches nvinfer1::Dims to initializer list or vector of ints + // Example: EXPECT_THAT(my_dims, DimsAreArray({1, 2, 3})) + MATCHER_P(DimsAreArrayHelper, array_value, +- absl::StrFormat("%s [%s]", negation ? "are" : "are not", ++ absl::StrFormat("%s [%s]", negation ? "are not" : "are", + ::testing::PrintToString(array_value))) { + if (arg.nbDims != array_value.size()) return false; + for (int i = 0; i < arg.nbDims; ++i) { +@@ -100,7 +100,7 @@ using DimsAreArray = DimsAreArrayHelperMatcherP>; + // Checks that layer names are equal to initializer list or vector of strings. + // Example: EXPECT_THAT(my_network, LayerNamesAreArray({"conv1", "conv2"})) + MATCHER_P(LayerNamesAreArrayHelper, array_value, +- absl::StrFormat("layer names %s [%s]", negation ? "are" : "are not", ++ absl::StrFormat("layer names %s [%s]", negation ? "are not" : "are", + ::testing::PrintToString(array_value))) { + if (array_value.size() != arg->getNbLayers()) return false; + for (int i = 0; i < arg->getNbLayers(); ++i) { +diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_testutils_test.cc b/tensorflow/compiler/tf2tensorrt/utils/trt_testutils_test.cc +index d5d9fcf99f5..8a442478349 100644 +--- a/tensorflow/compiler/tf2tensorrt/utils/trt_testutils_test.cc ++++ b/tensorflow/compiler/tf2tensorrt/utils/trt_testutils_test.cc +@@ -77,7 +77,14 @@ TEST(INetworkDefinitionMatchers, CorrectlyMatch) { + ASSERT_NE(input, nullptr); + + const char* fc_layer_name = "my-fc-layer"; ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + auto layer = network->addFullyConnected(*input, 1, weights, weights); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ auto layer = ++ network->addMatrixMultiply(*input, nvinfer1::MatrixOperation::kNONE, ++ *input, nvinfer1::MatrixOperation::kNONE); ++ (void)weights; // Not used ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + ASSERT_NE(layer, nullptr); + layer->setName(fc_layer_name); + +@@ -86,7 +93,12 @@ TEST(INetworkDefinitionMatchers, CorrectlyMatch) { + AllOf(LayerNamesNonEmpty(), LayerNamesAreArray({fc_layer_name}))); + + // Add layer with default name and check layer name. ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + layer = network->addFullyConnected(*input, 1, weights, weights); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ layer = network->addMatrixMultiply(*input, nvinfer1::MatrixOperation::kNONE, ++ *input, nvinfer1::MatrixOperation::kNONE); ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + EXPECT_THAT(network.get(), AllOf(LayerNamesNonEmpty(), + Not(LayerNamesAreArray({fc_layer_name})))); + } +diff --git a/tensorflow/lite/python/convert.py b/tensorflow/lite/python/convert.py +index cfaff27a849..a4eeb02fd6a 100644 +--- a/tensorflow/lite/python/convert.py ++++ b/tensorflow/lite/python/convert.py +@@ -14,7 +14,6 @@ + # ============================================================================== + """Converts a frozen graph into a TFLite FlatBuffer.""" + +-import distutils.spawn + import enum + import hashlib + import os as _os +@@ -45,6 +44,10 @@ from tensorflow.python.platform import resource_loader as _resource_loader + from tensorflow.python.util import deprecation + from tensorflow.python.util.tf_export import tf_export as _tf_export + ++try: ++ from shutil import which ++except ImportError: ++ from distutils.spawn import find_executable as which + + def _is_quantized_input_stats_required( + conversion_flags: _conversion_flags_pb2.TocoFlags, +@@ -399,7 +402,7 @@ def _run_deprecated_conversion_binary( + RuntimeError: When conversion fails, an exception is raised with the error + message embedded. + """ +- if distutils.spawn.find_executable(_deprecated_conversion_binary) is None: ++ if which(_deprecated_conversion_binary) is None: + raise ConverterError("""Could not find `toco_from_protos` binary, make sure + your virtualenv bin directory or pip local bin directory is in your path. + In particular, if you have installed TensorFlow with --user, make sure you +diff --git a/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py b/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py +index 06784c09106..8290ec796ef 100644 +--- a/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py ++++ b/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py +@@ -1117,7 +1117,7 @@ def _GetTestConfigsV1(): + convert_online, convert_offline = True, False + dynamic_engine, static_engine = True, False + use_calibration, no_calibration = True, False +- implicit_batch = False ++ dynamic_shape = False + + # Add all possible test cases and let the derived test class to decide + # whether to run specific ones with ShouldRunTest(). +@@ -1126,11 +1126,11 @@ def _GetTestConfigsV1(): + opts = list( + itertools.product([FP32, FP16, INT8], [convert_online, convert_offline], + [dynamic_engine, static_engine], [no_calibration], +- [implicit_batch])) ++ [dynamic_shape])) + # We always run calibration with offline tool. + # TODO(aaroey): static calibration engine is not supported yet. + opts.append( +- (INT8, convert_offline, dynamic_engine, use_calibration, implicit_batch)) ++ (INT8, convert_offline, dynamic_engine, use_calibration, dynamic_shape)) + return opts + + +@@ -1142,6 +1142,11 @@ def _GetTestConfigsV2(): + # TODO(laigd): add support for calibration. + no_calibration = False + use_calibration = True ++ dynamic_shape_opts = [False, True] ++ ++ if trt_utils.is_loaded_tensorrt_version_greater_equal(10, 0, 0): ++ # Implicit batch mode is not supported since TensorRT 10.0. ++ dynamic_shape_opts = [True] + + # Add all possible test cases and let the derived test class to decide + # whether to run specific ones with ShouldRunTest(). +@@ -1154,10 +1159,11 @@ def _GetTestConfigsV2(): + # - INT8 without calibration behaves like FP32/FP16. + opts = list( + itertools.product([FP32, FP16], [convert_offline], [dynamic_engine], +- [no_calibration], [False, True])) ++ [no_calibration], dynamic_shape_opts)) + # We always run calibration with offline tool. +- opts.append((INT8, convert_offline, dynamic_engine, use_calibration, False)) +- opts.append((INT8, convert_offline, dynamic_engine, use_calibration, True)) ++ for dynamic_shape in dynamic_shape_opts: ++ opts.append(( ++ INT8, convert_offline, dynamic_engine, use_calibration, dynamic_shape)) + return opts + + +diff --git a/tensorflow/python/compiler/tensorrt/trt_convert.py b/tensorflow/python/compiler/tensorrt/trt_convert.py +index 746f910e407..5fab7b0273b 100644 +--- a/tensorflow/python/compiler/tensorrt/trt_convert.py ++++ b/tensorflow/python/compiler/tensorrt/trt_convert.py +@@ -1214,7 +1214,7 @@ class TrtGraphConverterV2(object): + input_saved_model_signature_key: the key of the signature to optimize the + graph for. + use_dynamic_shape: whether to enable dynamic shape support. None is +- equivalent to False in the current implementation. ++ equivalent to True in the current implementation. + dynamic_shape_profile_strategy: one of the strings in + supported_profile_strategies(). None is equivalent to Range in the + current implementation. +@@ -1284,7 +1284,7 @@ class TrtGraphConverterV2(object): + self._calibrated = False + + if use_dynamic_shape is None: +- self._use_dynamic_shape = False ++ self._use_dynamic_shape = True + else: + self._use_dynamic_shape = use_dynamic_shape + +diff --git a/third_party/tensorrt/tensorrt_configure.bzl b/third_party/tensorrt/tensorrt_configure.bzl +index 3d127795638..28c222ab8ad 100644 +--- a/third_party/tensorrt/tensorrt_configure.bzl ++++ b/third_party/tensorrt/tensorrt_configure.bzl +@@ -26,7 +26,7 @@ _TF_TENSORRT_VERSION = "TF_TENSORRT_VERSION" + _TF_NEED_TENSORRT = "TF_NEED_TENSORRT" + + _TF_TENSORRT_LIBS = ["nvinfer", "nvinfer_plugin"] +-_TF_TENSORRT_HEADERS = ["NvInfer.h", "NvUtils.h", "NvInferPlugin.h"] ++_TF_TENSORRT_HEADERS = ["NvInfer.h", "NvInferPlugin.h"] + _TF_TENSORRT_HEADERS_V6 = [ + "NvInfer.h", + "NvUtils.h", +@@ -63,6 +63,21 @@ _TF_TENSORRT_HEADERS_V8_6 = [ + "NvInferVersion.h", + "NvUtils.h", + ] ++_TF_TENSORRT_HEADERS_V10 = [ ++ "NvInfer.h", ++ "NvInferConsistency.h", ++ "NvInferConsistencyImpl.h", ++ "NvInferImpl.h", ++ "NvInferLegacyDims.h", ++ "NvInferPlugin.h", ++ "NvInferPluginUtils.h", ++ "NvInferRuntime.h", ++ "NvInferRuntimeBase.h", ++ "NvInferRuntimeCommon.h", ++ "NvInferRuntimePlugin.h", ++ "NvInferSafeRuntime.h", ++ "NvInferVersion.h", ++] + + _DEFINE_TENSORRT_SONAME_MAJOR = "#define NV_TENSORRT_SONAME_MAJOR" + _DEFINE_TENSORRT_SONAME_MINOR = "#define NV_TENSORRT_SONAME_MINOR" +@@ -89,6 +104,8 @@ def _at_least_version(actual_version, required_version): + return actual >= required + + def _get_tensorrt_headers(tensorrt_version): ++ if _at_least_version(tensorrt_version, "10"): ++ return _TF_TENSORRT_HEADERS_V10 + if _at_least_version(tensorrt_version, "8.6"): + return _TF_TENSORRT_HEADERS_V8_6 + if _at_least_version(tensorrt_version, "8"): +diff --git a/third_party/xla/third_party/tsl/third_party/tensorrt/tensorrt_configure.bzl b/third_party/xla/third_party/tsl/third_party/tensorrt/tensorrt_configure.bzl +index 91b214fd990..d63828fd29b 100644 +--- a/third_party/xla/third_party/tsl/third_party/tensorrt/tensorrt_configure.bzl ++++ b/third_party/xla/third_party/tsl/third_party/tensorrt/tensorrt_configure.bzl +@@ -26,7 +26,7 @@ _TF_TENSORRT_VERSION = "TF_TENSORRT_VERSION" + _TF_NEED_TENSORRT = "TF_NEED_TENSORRT" + + _TF_TENSORRT_LIBS = ["nvinfer", "nvinfer_plugin"] +-_TF_TENSORRT_HEADERS = ["NvInfer.h", "NvUtils.h", "NvInferPlugin.h"] ++_TF_TENSORRT_HEADERS = ["NvInfer.h", "NvInferPlugin.h"] + _TF_TENSORRT_HEADERS_V6 = [ + "NvInfer.h", + "NvUtils.h", diff --git a/patches/full/tf2.16.2_py3.12.patch b/patches/full/tf2.16.2_py3.12.patch new file mode 100644 index 0000000..803ec74 --- /dev/null +++ b/patches/full/tf2.16.2_py3.12.patch @@ -0,0 +1,2970 @@ +diff --git a/tensorflow/api_template.__init__.py b/tensorflow/api_template.__init__.py +index a0f88926b5e..9c0bb098feb 100644 +--- a/tensorflow/api_template.__init__.py ++++ b/tensorflow/api_template.__init__.py +@@ -27,7 +27,6 @@ this file with a file generated from [`api_template.__init__.py`](https://www.gi + """ + # pylint: disable=g-bad-import-order,protected-access,g-import-not-at-top + +-import distutils as _distutils + import importlib + import inspect as _inspect + import os as _os +@@ -95,10 +94,10 @@ _site_packages_dirs = [] + if _site.ENABLE_USER_SITE and _site.USER_SITE is not None: + _site_packages_dirs += [_site.USER_SITE] + _site_packages_dirs += [p for p in _sys.path if "site-packages" in p] +-if "getsitepackages" in dir(_site): ++try: + _site_packages_dirs += _site.getsitepackages() +- +-if "sysconfig" in dir(_distutils): ++except AttributeError: ++ import distutils as _distutils + _site_packages_dirs += [_distutils.sysconfig.get_python_lib()] + + _site_packages_dirs = list(set(_site_packages_dirs)) +diff --git a/tensorflow/api_template_v1.__init__.py b/tensorflow/api_template_v1.__init__.py +index 6a4ab4e655f..d6f8f2e0441 100644 +--- a/tensorflow/api_template_v1.__init__.py ++++ b/tensorflow/api_template_v1.__init__.py +@@ -14,7 +14,6 @@ + # ============================================================================== + """Bring in all of the public TensorFlow interface into this module.""" + +-import distutils as _distutils + import importlib + import inspect as _inspect + import os as _os +@@ -144,10 +143,10 @@ from tensorflow.python.lib.io import file_io as _fi + _site_packages_dirs = [] + _site_packages_dirs += [] if _site.USER_SITE is None else [_site.USER_SITE] + _site_packages_dirs += [p for p in _sys.path if "site-packages" in p] +-if "getsitepackages" in dir(_site): ++try: + _site_packages_dirs += _site.getsitepackages() +- +-if "sysconfig" in dir(_distutils): ++except AttributeError: ++ import distutils as _distutils + _site_packages_dirs += [_distutils.sysconfig.get_python_lib()] + + _site_packages_dirs = list(set(_site_packages_dirs)) +diff --git a/tensorflow/compiler/tf2tensorrt/BUILD b/tensorflow/compiler/tf2tensorrt/BUILD +index 91ef722b52d..a0d8e9736ad 100644 +--- a/tensorflow/compiler/tf2tensorrt/BUILD ++++ b/tensorflow/compiler/tf2tensorrt/BUILD +@@ -531,6 +531,7 @@ tf_cuda_library( + hdrs = ["utils/trt_allocator.h"], + features = ["-layering_check"], + deps = [ ++ ":common_utils", + "//tensorflow/core:framework_headers_lib", + "//tensorflow/core:framework_lite", + "//tensorflow/core:lib_proto_parsing", +diff --git a/tensorflow/compiler/tf2tensorrt/common/utils.cc b/tensorflow/compiler/tf2tensorrt/common/utils.cc +index 26ac37b237b..bca157b43bc 100644 +--- a/tensorflow/compiler/tf2tensorrt/common/utils.cc ++++ b/tensorflow/compiler/tf2tensorrt/common/utils.cc +@@ -41,11 +41,19 @@ std::tuple GetLinkedTensorRTVersion() { + + std::tuple GetLoadedTensorRTVersion() { + #if GOOGLE_CUDA && GOOGLE_TENSORRT ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + int ver = getInferLibVersion(); + int major = ver / 1000; + ver = ver - major * 1000; + int minor = ver / 100; + int patch = ver - minor * 100; ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ // Note: The above logic using getInferLibVersion() produces the wrong version ++ // numbers since TensorRT 10.0, so these new functions must be used instead. ++ int major = getInferLibMajorVersion(); ++ int minor = getInferLibMinorVersion(); ++ int patch = getInferLibPatchVersion(); ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + return std::tuple{major, minor, patch}; + #else + return std::tuple{0, 0, 0}; +@@ -59,6 +67,7 @@ std::tuple GetLoadedTensorRTVersion() { + namespace tensorflow { + namespace tensorrt { + ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + Status GetTrtBindingIndex(const char* tensor_name, int profile_index, + const nvinfer1::ICudaEngine* cuda_engine, + int* binding_index) { +@@ -93,6 +102,11 @@ Status GetTrtBindingIndex(int network_input_index, int profile_index, + return GetTrtBindingIndex(input_name.c_str(), profile_index, cuda_engine, + binding_index); + } ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++string GetTrtInputName(int network_input_index) { ++ return absl::StrCat(IONamePrefixes::kInputPHName, network_input_index); ++} ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + + namespace { + +@@ -234,6 +248,19 @@ std::ostream& operator<<(std::ostream& os, const nvinfer1::DataType& v) { + os << "kUINT8"; + break; + #endif ++ ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ case nvinfer1::DataType::kBF16: ++ os << "kBF16"; ++ break; ++ case nvinfer1::DataType::kINT64: ++ os << "kINT64"; ++ break; ++ case nvinfer1::DataType::kINT4: ++ os << "kINT4"; ++ break; ++#endif ++ + } + return os; + } +diff --git a/tensorflow/compiler/tf2tensorrt/common/utils.h b/tensorflow/compiler/tf2tensorrt/common/utils.h +index 0bc63ecd5c2..5c5f298051d 100644 +--- a/tensorflow/compiler/tf2tensorrt/common/utils.h ++++ b/tensorflow/compiler/tf2tensorrt/common/utils.h +@@ -102,6 +102,7 @@ class IONamePrefixes { + static constexpr const char* const kOutputPHName = "TensorRTOutputPH_"; + }; + ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + // Gets the binding index of a tensor in an engine. + // + // The binding index is looked up using the tensor's name and the profile index. +@@ -116,6 +117,9 @@ Status GetTrtBindingIndex(const char* tensor_name, int profile_index, + Status GetTrtBindingIndex(int network_input_idx, int profile_index, + const nvinfer1::ICudaEngine* cuda_engine, + int* binding_index); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++string GetTrtInputName(int network_input_index); ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + } // namespace tensorrt + } // namespace tensorflow + +diff --git a/tensorflow/compiler/tf2tensorrt/convert/algorithm_selector.cc b/tensorflow/compiler/tf2tensorrt/convert/algorithm_selector.cc +index 5421e7bb46b..10e99b68412 100644 +--- a/tensorflow/compiler/tf2tensorrt/convert/algorithm_selector.cc ++++ b/tensorflow/compiler/tf2tensorrt/convert/algorithm_selector.cc +@@ -53,9 +53,16 @@ std::ostream& operator<<(std::ostream& os, const nvinfer1::IAlgorithm& alg) { + + std::ostream& operator<<(std::ostream& os, + const nvinfer1::IAlgorithmIOInfo& info) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + os << "IOTensor(format=" << info.getTensorFormat() + << ",dtype=" << info.getDataType() << ",strides=" << info.getStrides() + << ")"; ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ os << "IOTensor(dtype=" << info.getDataType() ++ << ",strides=" << info.getStrides() ++ << ",vectorized_dim=" << info.getVectorizedDim() ++ << ",vectorized_components=" << info.getComponentsPerElement() << ")"; ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + return os; + } + } // namespace nvinfer1 +@@ -200,11 +207,13 @@ bool TftrtAlgorithmSelector::AlgorithmPolicy( + return false; + } + ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + if (selector_.IsShuffleLayer(variant.getImplementation())) { + return selector_.AllowShuffleAlgorithm( + tactic_id, alg.getAlgorithmIOInfo(0).getDataType(), + alg.getAlgorithmIOInfo(0).getTensorFormat()); + } ++#endif // !IS_TRT_VERSION_GE(10, 0, 0, 0) + return true; + } + +diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc +index e809152c1e7..9f26e73ecbd 100644 +--- a/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc ++++ b/tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc +@@ -713,13 +713,16 @@ Status CreateStaticEngine(const TRTOptimizationPass::ConversionParams& params, + (info.precision_mode == TrtPrecisionMode::INT8 && info.use_calibration); + + // Create static engines with precision_mode fp32/fp16. ++ TrtUniquePtrType infer( ++ nvinfer1::createInferRuntime(*trt_logger)); + TrtUniquePtrType engine; + TF_RETURN_IF_ERROR(ConvertGraphDefToEngine( + info.segment_graph_def, nullptr, + calibrate_int8 ? TrtPrecisionMode::FP32 : info.precision_mode, + max_batch_size, info.max_workspace_size_bytes, input_shapes, trt_logger, +- trt_allocator.get(), /*calibrator=*/nullptr, &engine, +- info.use_calibration, params.use_implicit_batch, ++ trt_allocator.get(), infer.get(), ++ /*calibrator=*/nullptr, &engine, info.use_calibration, ++ params.use_implicit_batch, + /*convert_successfully=*/nullptr, profile, info.engine_name, + /*use_explicit_precision=*/params.use_explicit_precision, cluster)); + TrtUniquePtrType engine_data(engine->serialize()); +diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc +index 1c3a1903477..31d78e4db0e 100644 +--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc ++++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc +@@ -108,7 +108,9 @@ namespace { + const char* LayerTypeToString(nvinfer1::LayerType layer_type) { + switch (layer_type) { + ADD_LAYER(CONVOLUTION) ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + ADD_LAYER(FULLY_CONNECTED) ++#endif + ADD_LAYER(ACTIVATION) + ADD_LAYER(POOLING) + ADD_LAYER(LRN) +@@ -130,7 +132,9 @@ const char* LayerTypeToString(nvinfer1::LayerType layer_type) { + ADD_LAYER(MATRIX_MULTIPLY) + ADD_LAYER(RAGGED_SOFTMAX) + ADD_LAYER(CONSTANT) ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + ADD_LAYER(RNN_V2) ++#endif + ADD_LAYER(IDENTITY) + ADD_LAYER(PLUGIN_V2) + ADD_LAYER(SLICE) +@@ -1082,9 +1086,13 @@ Status Converter::Init(nvinfer1::ILogger* trt_logger) { + : (1U << static_cast( + nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH)); + if (use_explicit_precision_) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + flags |= + (1U << static_cast( + nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_PRECISION)); ++#else ++ return errors::Internal("Explicit precision is not supported since TensorRT 10"); ++#endif + } + trt_network_.reset(trt_builder_->createNetworkV2(flags)); + if (!trt_network_) { +@@ -1252,7 +1260,8 @@ bool AbortCudaEngineBuild() { + Status Converter::BuildCudaEngine( + TrtUniquePtrType* engine, int max_batch_size, + size_t max_workspace_size_bytes, nvinfer1::IGpuAllocator* allocator, +- TRTInt8Calibrator* calibrator, TrtShapeOptimizationProfile* profiles) { ++ nvinfer1::IRuntime* runtime, TRTInt8Calibrator* calibrator, ++ TrtShapeOptimizationProfile* profiles) { + tensorflow::profiler::AnnotatedTraceMe activity( + [&]() { + return tensorflow::profiler::TraceMeOpOverride("TRTEngineOp", +@@ -1266,13 +1275,20 @@ Status Converter::BuildCudaEngine( + } + + VLOG(1) << "Configuring TensorRT builder"; ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + trt_builder_->setMaxBatchSize(max_batch_size); ++#endif + trt_builder_->setGpuAllocator(allocator); + + // Create a network configuration and use it to build a TRT engine. + TrtUniquePtrType builder_config( + trt_builder_->createBuilderConfig()); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + builder_config->setMaxWorkspaceSize(max_workspace_size_bytes); ++#else ++ builder_config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, ++ max_workspace_size_bytes); ++#endif + + // Create the algorithm selector. For TensorRT 7.x, the algorithm selector + // cannot be used when building with INT8 calibration. +@@ -1429,23 +1445,45 @@ Status Converter::BuildCudaEngine( + } + } + } ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + engine->reset( + trt_builder_->buildEngineWithConfig(*network(), *builder_config)); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ TrtUniquePtrType serialized( ++ trt_builder_->buildSerializedNetwork(*network(), *builder_config)); ++ if (!serialized) return errors::Internal("Failed to build TensorRT serialized network"); ++ engine->reset( ++ runtime->deserializeCudaEngine(serialized->data(), serialized->size())); ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + if (engine->get() == nullptr) { + return errors::Internal("Failed to build TensorRT engine"); + } + if (VLOG_IS_ON(2)) { + VLOG(2) << "TRT engine created"; ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + int nbBindings = (*engine)->getNbBindings(); ++#else ++ int nbBindings = (*engine)->getNbIOTensors(); ++#endif + VLOG(2) << "Number of engine bindings: " << nbBindings; + for (int i = 0; i < nbBindings; i++) { + auto get_location_string = [&engine](int i) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + if ((*engine)->getLocation(i) == nvinfer1::TensorLocation::kDEVICE) ++#else ++ if ((*engine)->getTensorLocation((*engine)->getIOTensorName(i)) == ++ nvinfer1::TensorLocation::kDEVICE) ++#endif + return " on device"; + else + return " on host"; + }; +- VLOG(2) << "Binding " << i << " name: " << (*engine)->getBindingName(i) ++ VLOG(2) << "Binding " << i << " name: " ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ << (*engine)->getBindingName(i) ++#else ++ << (*engine)->getIOTensorName(i) ++#endif + << get_location_string(i); + } + } +@@ -2060,11 +2098,19 @@ Status ConvertConv2DHelper(const OpConverterParams* params, int group, + nvinfer1::ILayer* conv_layer = nullptr; + if (is_conv2d_backprop_input) { + nvinfer1::IDeconvolutionLayer* layer = ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + params->converter->network()->addDeconvolution( ++#else ++ params->converter->network()->addDeconvolutionNd( ++#endif + *tensor->trt_tensor(), noutput, kernel_size, + weights->GetTrtWeights(), biases->GetTrtWeights()); + TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + layer->setStride(stride); ++#else ++ layer->setStrideNd(stride); ++#endif + // VALID padding is the default TRT behavior. + if (padding_type == "SAME") { + // SAME_UPPER means that post padding is preferred. +@@ -2076,18 +2122,30 @@ Status ConvertConv2DHelper(const OpConverterParams* params, int group, + const nvinfer1::Weights empty_weights{nvinfer1::DataType::kFLOAT, nullptr, + 0}; + nvinfer1::IConvolutionLayer* layer = ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + params->converter->network()->addConvolution( ++#else ++ params->converter->network()->addConvolutionNd( ++#endif + *tensor->trt_tensor(), noutput, kernel_size, + params->use_explicit_precision ? empty_weights + : weights->GetTrtWeights(), + empty_weights); + TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + layer->setStride(stride); ++#else ++ layer->setStrideNd(stride); ++#endif + if (padding_type == "SAME") { + layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER); + } + layer->setNbGroups(num_groups); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + layer->setDilation(dilation); ++#else ++ layer->setDilationNd(dilation); ++#endif + conv_layer = layer; + } + +@@ -2136,8 +2194,12 @@ Status ConvertConv2DHelper(const OpConverterParams* params, int group, + nvinfer1::DimsHW pre_padding(0, 0); + nvinfer1::DimsHW post_padding(height_diff, width_diff); + nvinfer1::IPaddingLayer* padding_layer = +- params->converter->network()->addPadding(*output_tensor->trt_tensor(), +- pre_padding, post_padding); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ params->converter->network()->addPadding( ++#else ++ params->converter->network()->addPaddingNd( ++#endif ++ *output_tensor->trt_tensor(), pre_padding, post_padding); + output_tensor = padding_layer->getOutput(0); + params->converter->SetLayerName(padding_layer, node_def, "pad"); + } +@@ -2212,6 +2274,11 @@ Status ConvertTranspose(const OpConverterParams* params) { + + Status ConvertShape(const OpConverterParams* params) { + const auto& inputs = params->inputs; ++ const auto& node_def = params->node_def; ++ DataType out_type; ++ TF_RETURN_IF_ERROR(GetNodeAttr(AttrSlice(node_def), "out_type", &out_type)); ++ nvinfer1::DataType trt_out_type; ++ TF_RETURN_IF_ERROR(TfTypeToTrtType(out_type, &trt_out_type)); + TF_RETURN_IF_ERROR( + CheckInputsWeights(*params, {{"input", TrtInputArg::kBoth}})); + if (params->use_implicit_batch) { +@@ -2224,20 +2291,27 @@ Status ConvertShape(const OpConverterParams* params) { + StatusOr builder = TRTNetworkBuilder::Create( + params->converter->network(), params->weight_store); + TRT_ENSURE_OK(builder); ++ nvinfer1::ITensor* out_tensor; + if (input_dims.IsStatic()) { + // Create a const node with the value of the shape. + StatusOr const_layer = + builder->ConstantShape(input_dims); + TRT_ENSURE_PTR_OK(const_layer); +- params->outputs->push_back( +- TRT_TensorOrWeights((*const_layer)->getOutput(0))); +- return OkStatus(); +- } +- StatusOr shape_layer = +- builder->Shape(inputs.at(0).tensor()->trt_tensor()); +- TRT_ENSURE_PTR_OK(shape_layer); +- params->converter->SetLayerName(*shape_layer, params->node_def, "shape"); +- params->outputs->push_back(TRT_TensorOrWeights((*shape_layer)->getOutput(0))); ++ out_tensor = (*const_layer)->getOutput(0); ++ } else { ++ StatusOr shape_layer = ++ builder->Shape(inputs.at(0).tensor()->trt_tensor()); ++ TRT_ENSURE_PTR_OK(shape_layer); ++ params->converter->SetLayerName(*shape_layer, params->node_def, "shape"); ++ out_tensor = (*shape_layer)->getOutput(0); ++ } ++ if (out_tensor->getType() != trt_out_type) { ++ nvinfer1::ICastLayer* cast_layer = ++ params->converter->network()->addCast(*out_tensor, trt_out_type); ++ TRT_ENSURE(cast_layer); ++ out_tensor = cast_layer->getOutput(0); ++ } ++ params->outputs->push_back(TRT_TensorOrWeights(out_tensor)); + return OkStatus(); + } + +@@ -2430,6 +2504,14 @@ Status Converter::DynamicReshape(ITensorProxyPtr input, + } + ITensorProxyPtr shape = + network()->addShape(*input->trt_tensor())->getOutput(0); ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ // TODO(benbarsdell): Casting to int32 makes this match the pre-TRT10 ++ // behavior, but it would be better to instead cast all the other int32 ++ // tensors below to int64. ++ shape = network() ++ ->addCast(*shape->trt_tensor(), nvinfer1::DataType::kINT32) ++ ->getOutput(0); ++#endif + // Build new shape = shape[:trt_axis] + [1] + shape[trt_axis:] + std::vector concat_inputs; + int max_num_slices = std::max(slices.size(), size_for_added_dims.size()); +@@ -3266,7 +3348,11 @@ Status ConvertFusedConv2DBiasActivation(const OpConverterParams* params) { + nvinfer1::IConvolutionLayer* conv_layer = nullptr; + if (filter_format == "OIHW") { + // Weights are already in the right order. ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + conv_layer = params->converter->network()->addConvolution( ++#else ++ conv_layer = params->converter->network()->addConvolutionNd( ++#endif + *tensor->trt_tensor(), weights.Shape().dim(0), kernel_size, + weights.GetTrtWeights(), biases.GetTrtWeights()); + } else { +@@ -3276,18 +3362,30 @@ Status ConvertFusedConv2DBiasActivation(const OpConverterParams* params) { + params->weight_store->GetTempWeights(weights); + TRT_ENSURE_OK(weights_kcrs); + ReorderRSCKToKCRS(weights, &*weights_kcrs, 1); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + conv_layer = params->converter->network()->addConvolution( ++#else ++ conv_layer = params->converter->network()->addConvolutionNd( ++#endif + *tensor->trt_tensor(), weights.Shape().dim(3), kernel_size, + weights_kcrs->GetTrtWeights(), biases.GetTrtWeights()); + } + TFTRT_RETURN_ERROR_IF_NULLPTR(conv_layer, node_def.name()); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + conv_layer->setStride(stride); ++#else ++ conv_layer->setStrideNd(stride); ++#endif + if (padding_type == "SAME") { + conv_layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER); + } + params->converter->SetLayerName(conv_layer, node_def, "conv"); + conv_layer->setNbGroups(1); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + conv_layer->setDilation(dilation); ++#else ++ conv_layer->setDilationNd(dilation); ++#endif + ITensorProxyPtr output_tensor = conv_layer->getOutput(0); + + // Add activation if there is one. +@@ -3359,11 +3457,19 @@ Status ConvertPool(const OpConverterParams* params) { + tensor, {0, 3, 1, 2}, &tensor, node_def, "to_NCHW")); + } + ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + nvinfer1::IPoolingLayer* layer = params->converter->network()->addPooling( ++#else ++ nvinfer1::IPoolingLayer* layer = params->converter->network()->addPoolingNd( ++#endif + *tensor->trt_tensor(), type, ksize); + TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); + ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + layer->setStride(stride); ++#else ++ layer->setStrideNd(stride); ++#endif + // VALID padding is the default TRT behavior. + if (padding_type == "SAME") { + // SAME_UPPER means that post padding is preferred. +@@ -4000,7 +4106,11 @@ Status ConvertPad(const OpConverterParams* params) { + tensor, transpose_idx, &tensor, node_def, "to_pad")); + } + ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + nvinfer1::IPaddingLayer* layer = params->converter->network()->addPadding( ++#else ++ nvinfer1::IPaddingLayer* layer = params->converter->network()->addPaddingNd( ++#endif + *tensor->trt_tensor(), pre_padding, post_padding); + TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); + params->converter->SetLayerName(layer, node_def); +@@ -4684,10 +4794,27 @@ StatusOr ConvertFullyConnectedImpl( + << ", n_output=" << noutput + << " weights shape: " << weights.Shape().DebugString() + << " to convert " << node_def.op(); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + nvinfer1::IFullyConnectedLayer* layer = + params->converter->network()->addFullyConnected( + *tensor_a->trt_tensor(), noutput, weights.GetTrtWeights(), + biases.GetTrtWeights()); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ nvinfer1::IConstantLayer* weights_layer = ++ params->converter->network()->addConstant(weights.Shape().AsTrtDims(), ++ weights.GetTrtWeights()); ++ nvinfer1::IConstantLayer* bias_layer = ++ params->converter->network()->addConstant(biases.Shape().AsTrtDims(), ++ biases.GetTrtWeights()); ++ nvinfer1::IMatrixMultiplyLayer* matmul_layer = ++ params->converter->network()->addMatrixMultiply( ++ *tensor_a->trt_tensor(), nvinfer1::MatrixOperation::kNONE, ++ *weights_layer->getOutput(0), nvinfer1::MatrixOperation::kNONE); ++ nvinfer1::IElementWiseLayer* layer = ++ params->converter->network()->addElementWise( ++ *matmul_layer->getOutput(0), *bias_layer->getOutput(0), ++ nvinfer1::ElementWiseOperation::kSUM); ++#endif // !IS_TRT_VERSION_GE(10, 0, 0, 0) + + TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); + params->converter->SetLayerName(layer, node_def); +@@ -4703,7 +4830,13 @@ StatusOr ConvertFullyConnectedImpl( + TF_RETURN_IF_ERROR(PrepareTensorForShape( + params->converter, TRT_TensorOrWeights(output_tensor), output_dim, + /*validation_only=*/false, &output_tensor, node_def, +- /*op_instance=*/1, /*origin_node_name=*/"FULLY_CONNECTED")); ++ /*op_instance=*/1, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ /*origin_node_name=*/"FULLY_CONNECTED") ++#else ++ /*origin_node_name=*/"MATRIX_MULTIPLY") ++#endif ++ ); + return output_tensor; + } + +@@ -5007,6 +5140,14 @@ CalcDepthSpaceDynamicShape(const OpConverterParams* params, int block_size, + ITensorProxyPtr shape = params->converter->network() + ->addShape(*inputs.at(0).tensor()->trt_tensor()) + ->getOutput(0); ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ // TODO(benbarsdell): Casting to int32 makes this match the pre-TRT10 ++ // behavior, but it would be better to instead cast all the other int32 ++ // tensors below to int64. ++ shape = params->converter->network() ++ ->addCast(*shape->trt_tensor(), nvinfer1::DataType::kINT32) ++ ->getOutput(0); ++#endif + ITensorProxyPtr batch_size = + params->converter->network() + ->addSlice(*shape->trt_tensor(), {1, {0}}, {1, {1}}, {1, {1}}) +@@ -5597,7 +5738,11 @@ Status ConvertResize(const OpConverterParams* params) { + AllowDataTypes(*params, {DataType::DT_FLOAT, DataType::DT_HALF})); + + // Verify resize mode. Initialize resize mode if supported. ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + nvinfer1::ResizeMode resize_mode; ++#else ++ nvinfer1::InterpolationMode resize_mode; ++#endif + if (node_def.op() == "ResizeBilinear") { + #if IS_TRT_VERSION_GE(7, 1, 0, 0) + if (!align_corners) { +@@ -5605,9 +5750,17 @@ Status ConvertResize(const OpConverterParams* params) { + "Cannot Convert Bilinear Resize when align_corners=False"); + } + #endif ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + resize_mode = nvinfer1::ResizeMode::kLINEAR; ++#else ++ resize_mode = nvinfer1::InterpolationMode::kLINEAR; ++#endif + } else if (node_def.op() == "ResizeNearestNeighbor") { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + resize_mode = nvinfer1::ResizeMode::kNEAREST; ++#else ++ resize_mode = nvinfer1::InterpolationMode::kNEAREST; ++#endif + } else { + return errors::Unimplemented(node_def.op(), " is not yet implemented"); + } +@@ -5643,6 +5796,14 @@ Status ConvertResize(const OpConverterParams* params) { + ITensorProxyPtr shape = params->converter->network() + ->addShape(*inputs_tensor->trt_tensor()) + ->getOutput(0); ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ // TODO(benbarsdell): Casting to int32 makes this match the pre-TRT10 ++ // behavior, but it would be better to instead cast all the other int32 ++ // tensors below to int64. ++ shape = params->converter->network() ++ ->addCast(*shape->trt_tensor(), nvinfer1::DataType::kINT32) ++ ->getOutput(0); ++#endif + ITensorProxyPtr batch_size = + params->converter->network() + ->addSlice(*shape->trt_tensor(), {1, {0}}, {1, {1}}, {1, {1}}) +@@ -5686,7 +5847,14 @@ Status ConvertResize(const OpConverterParams* params) { + + // Set layer parameters. + layer->setResizeMode(resize_mode); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + layer->setAlignCorners(align_corners); ++#else ++ if (align_corners) { ++ layer->setCoordinateTransformation( ++ nvinfer1::ResizeCoordinateTransformation::kALIGN_CORNERS); ++ } ++#endif + + // Set output shape. + if (static_output_shape) { +@@ -5833,7 +6001,7 @@ Status ConvertGraphDefToEngine( + int max_batch_size, size_t max_workspace_size_bytes, + const std::vector& input_shapes, + nvinfer1::ILogger* trt_logger, nvinfer1::IGpuAllocator* allocator, +- TRTInt8Calibrator* calibrator, ++ nvinfer1::IRuntime* runtime, TRTInt8Calibrator* calibrator, + TrtUniquePtrType* engine, bool use_calibration, + const bool use_implicit_batch, bool* convert_successfully, + TrtShapeOptimizationProfile* profiles, absl::string_view engine_name, +@@ -6026,8 +6194,8 @@ Status ConvertGraphDefToEngine( + + // Build the engine. + TF_RETURN_IF_ERROR(converter->BuildCudaEngine( +- engine, max_batch_size, max_workspace_size_bytes, allocator, calibrator, +- profiles)); ++ engine, max_batch_size, max_workspace_size_bytes, allocator, runtime, ++ calibrator, profiles)); + + VLOG(1) << "Finished conversion"; + return OkStatus(); +diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h +index e9afd320be9..241de56c3ea 100644 +--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h ++++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h +@@ -156,7 +156,7 @@ Status ConvertGraphDefToEngine( + int max_batch_size, size_t max_workspace_size_bytes, + const std::vector& input_shapes, + nvinfer1::ILogger* logger, nvinfer1::IGpuAllocator* allocator, +- TRTInt8Calibrator* calibrator, ++ nvinfer1::IRuntime* runtime, TRTInt8Calibrator* calibrator, + TrtUniquePtrType* engine, bool use_calibration, + const bool use_implicit_batch, bool* convert_successfully, + TrtShapeOptimizationProfile* profiles, absl::string_view engine_name, +@@ -280,6 +280,7 @@ class Converter { + Status BuildCudaEngine(TrtUniquePtrType* engine, + int max_batch_size, size_t max_workspace_size_bytes, + nvinfer1::IGpuAllocator* allocator, ++ nvinfer1::IRuntime* runtime, + TRTInt8Calibrator* calibrator, + TrtShapeOptimizationProfile* profiles); + +diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc +index 332be3f50bf..90e8f207dcc 100644 +--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc ++++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc +@@ -135,9 +135,16 @@ using ::testing::PrintToString; + using ::tensorflow::testing::IsOk; + using ::tensorflow::testing::StatusIs; + ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + constexpr std::array ValidTrtModes = { +- TrtTestMode::kImplicitBatch, TrtTestMode::kExplicitBatch, ++ TrtTestMode::kImplicitBatch, ++ TrtTestMode::kExplicitBatch, + TrtTestMode::kDynamicShape}; ++#else ++constexpr std::array ValidTrtModes = { ++ TrtTestMode::kExplicitBatch, ++ TrtTestMode::kDynamicShape}; ++#endif + + bool TrtShapedWeightsEquals(const TRT_ShapedWeights& lhs, + const TRT_ShapedWeights& rhs) { +@@ -299,7 +306,11 @@ class ValidatorTest : public ::testing::Test { + + TrtNodeValidator validator(graph_properties, TrtPrecisionMode::FP32, + /*use_calibration=*/false, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + /*use_implicit_batch=*/true, ++#else ++ /*use_implicit_batch=*/false, ++#endif + /*use_explicit_precision=*/false); + return validator.ConvertToTensorOrWeights(node->def(), output_port, + tensor_or_weights); +@@ -336,8 +347,15 @@ TEST_F(ValidatorTest, ConvertToTensorOrWeights) { + convert_to_tensor_or_weights( + std::vector(nvinfer1::Dims::MAX_DIMS + 2, 1), &output), + StatusIs(absl::StatusCode::kOutOfRange, +- HasSubstr("Input tensor rank is greater than 9"))); ++ HasSubstr("Input tensor rank is greater than " ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ "9" ++#else ++ "8" ++#endif ++ ))); + } ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + // Convert non-Const with #dims < 1. + { + TRT_TensorOrWeights output; +@@ -360,6 +378,7 @@ TEST_F(ValidatorTest, ConvertToTensorOrWeights) { + EXPECT_NE(nullptr, output.tensor()->simple_tensor()); + EXPECT_THAT(output.GetTrtDims(), DimsAreArray({non_batch_dim})); + } ++#endif // !IS_TRT_VERSION_GE(10, 0, 0, 0) + } + + TEST_F(ValidatorTest, IsTensorRTCandidate_Basics) { +@@ -375,7 +394,11 @@ TEST_F(ValidatorTest, IsTensorRTCandidate_Basics) { + TF_EXPECT_OK(graph_properties.InferStatically(true)); + TrtNodeValidator validator(graph_properties, TrtPrecisionMode::FP32, + /*use_calibration=*/false, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + /*use_implicit_batch=*/true, ++#else ++ /*use_implicit_batch=*/false, ++#endif + /*use_explicit_precision=*/false); + + // Override the Add converter. +@@ -462,15 +485,21 @@ TEST(TrtNodeValidator, IsTensorRTCandidate) { + {TrtPrecisionMode::FP32, TrtPrecisionMode::INT8}) { + TrtNodeValidator validator(graph_properties, precision_mode, + /*use_calibration=*/false, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + /*use_implicit_batch=*/true, ++#else ++ /*use_implicit_batch=*/false, ++#endif + /*use_explicit_precision=*/false); + TF_EXPECT_OK(validator.IsTensorRTCandidate(matmul.operation.node())); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + EXPECT_THAT( + validator.IsTensorRTCandidate(incompatible_matmul.operation.node()), + StatusIs(absl::StatusCode::kInvalidArgument, + HasSubstr("MatMul with 2D tensors requires explicit batch " + "mode, or that tensor A " + "is not transposed and B is a constant tensor."))); ++#endif + EXPECT_THAT(validator.IsTensorRTCandidate(unsupported_op.operation.node()), + StatusIs(absl::StatusCode::kUnimplemented, + HasSubstr("Op type Erfc is not supported"))); +@@ -503,7 +532,11 @@ class ConverterTest : public ::testing::Test { + converter_ = + std::move(Converter::Create(TrtPrecisionMode::FP32, + /*use_calibration=*/false, &logger_, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + /*use_implicit_batch=*/true, ++#else ++ /*use_implicit_batch=*/false, ++#endif + /*engine_name=*/"TRTEngineOp_000_000", + /*use_explicit_precision=*/false) + .value()); +@@ -692,15 +725,23 @@ TEST_F(ConverterTest, TransposeTensor) { + "with that of the input"))); + + // Transpose at batch dimension. ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + EXPECT_THAT( + converter_->TransposeTensor(input_tensor, {1, 0, 2, 3}, &output_tensor, + dummy_node_def, "sub2"), + StatusIs(absl::StatusCode::kUnimplemented, + HasSubstr("Transpose at batch dimension is not supported."))); ++#endif + + // OK. + TF_EXPECT_OK(converter_->TransposeTensor( +- input_tensor, {0, 3, 1, 2}, &output_tensor, dummy_node_def, "sub3")); ++ input_tensor, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ {0, 3, 1, 2}, ++#else ++ {2, 0, 1}, ++#endif ++ &output_tensor, dummy_node_def, "sub3")); + EXPECT_THAT(output_tensor->getDimensions(), DimsAreArray({5, 2, 3})); + EXPECT_THAT( + converter_->network(), +@@ -815,14 +856,18 @@ TEST_F(ConverterTest, AddAndGetTensorOrWeights) { + // Add a tensor. + ITensorProxyPtr simple_tensor; + TRT_TensorOrWeights tensor(simple_tensor); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + EXPECT_EQ(-1, tensor.batch_size()); ++#endif + TF_EXPECT_OK(MaybeUpdateBatchSize(123)); + TF_EXPECT_OK(AddTensorOrWeights("my_tensor", tensor)); + + // Get the added tensor. + TRT_TensorOrWeights added_tensor; + TF_EXPECT_OK(GetTensorOrWeights("my_tensor", &added_tensor)); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + EXPECT_EQ(123, added_tensor.batch_size()); ++#endif + + // Add the same tensor again. + EXPECT_THAT(AddTensorOrWeights("my_tensor", tensor), +@@ -875,7 +920,11 @@ TEST_F(ConverterTest, MaybeApplyQuantizationRanges) { + Logger& logger = *Logger::GetLogger(); + auto int8_converter = Converter::Create(TrtPrecisionMode::INT8, + /*use_calibration=*/true, &logger, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + /*use_implicit_batch=*/true, ++#else ++ /*use_implicit_batch=*/false, ++#endif + /*engine_name=*/"") + .value(); + int8_converter->ProvideQuantizationRange(&input, -5.0f, 5.0f); +@@ -1016,6 +1065,10 @@ TEST_F(ConverterTest, CreateConstantLayer) { + + class ConvertGraphDefToEngineTest : public ::testing::Test { + public: ++ ConvertGraphDefToEngineTest() { ++ runtime_.reset(nvinfer1::createInferRuntime(logger_)); ++ } ++ + Status RunConvertGraphDefToEngine(Scope* s) { + GraphDef gdef; + TF_EXPECT_OK(s->ToGraphDef(&gdef)); +@@ -1040,13 +1093,20 @@ class ConvertGraphDefToEngineTest : public ::testing::Test { + return ConvertGraphDefToEngine( + gdef, /*ctx=*/nullptr, TrtPrecisionMode::FP32, /*max_batch_size=*/1, + /*max_workspace_size_bytes=*/64 << 20, input_shapes, &logger_, +- /*allocator=*/nullptr, /*calibrator=*/nullptr, &engine_, +- /*use_calibration=*/false, /*use_implicit_batch=*/true, ++ /*allocator=*/nullptr, runtime_.get(), ++ /*calibrator=*/nullptr, &engine_, ++ /*use_calibration=*/false, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ /*use_implicit_batch=*/true, ++#else ++ /*use_implicit_batch=*/false, ++#endif + /*convert_successfully=*/nullptr, /*profiles=*/nullptr, + "TRTEngineOp_000_000", /*use_explicit_precision=*/false); + } + + protected: ++ TrtUniquePtrType runtime_; + TrtUniquePtrType engine_; + + private: +@@ -1127,11 +1187,17 @@ class OpConverterTest : public ::testing::Test { + } + + void Reset(TrtPrecisionMode precision_mode_to_test = TrtPrecisionMode::FP32, +- TrtTestMode trt_mode = TrtTestMode::kImplicitBatch, ++ TrtTestMode trt_mode = ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ TrtTestMode::kImplicitBatch, ++#else ++ TrtTestMode::kDynamicShape, ++#endif + OpKernelContext* ctx = nullptr) { + // Destroy existing TRT objects in a proper order. + converter_.reset(nullptr); + engine_.reset(nullptr); ++ runtime_.reset(nullptr); + + // Re-create them in proper order. + converter_ = +@@ -1145,6 +1211,8 @@ class OpConverterTest : public ::testing::Test { + + // Reset other related artifacts. + scope_ = Scope::NewRootScope(); ++ ++ runtime_.reset(nvinfer1::createInferRuntime(logger_)); + } + + // Constructs a flat tensor with 'vals' in Unified Memory. +@@ -1230,18 +1298,32 @@ class OpConverterTest : public ::testing::Test { + + void CheckDataTypeMatches(const DataVec& datas) { + if (VLOG_IS_ON(2)) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + int nbBindings = engine_->getNbBindings(); ++#else ++ int nbBindings = engine_->getNbIOTensors(); ++#endif + VLOG(2) << "Number of engine bindings: " << nbBindings; + for (int i = 0; i < nbBindings; i++) { +- VLOG(2) << "Binding " << i << " name: " << engine_->getBindingName(i); ++ VLOG(2) << "Binding " << i << " name: " << ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ engine_->getBindingName(i); ++#else ++ engine_->getIOTensorName(i); ++#endif + } + } + for (const auto& data : datas) { + VLOG(2) << "Checking if data type matches for tensor " << data.name; ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + const int input_index = engine_->getBindingIndex(data.name.c_str()); + ASSERT_NE(-1, input_index); + const nvinfer1::DataType trt_dtype = + engine_->getBindingDataType(input_index); ++#else ++ const nvinfer1::DataType trt_dtype = ++ engine_->getTensorDataType(data.name.c_str()); ++#endif + DataType tf_type; + TF_ASSERT_OK(TrtTypeToTfType(trt_dtype, &tf_type)); + ASSERT_EQ(data.tensor.dtype(), tf_type) +@@ -1287,7 +1369,7 @@ class OpConverterTest : public ::testing::Test { + converter_->BuildCudaEngine(&engine_, + /*max_batch_size=*/batch_size, + /*max_workspace_size_bytes=*/1 << 26, +- /*allocator=*/nullptr, ++ /*allocator=*/nullptr, runtime_.get(), + /*calibrator=*/nullptr, + /*profiles=*/&profiles)); + CHECK_NOTNULL(engine_.get()); +@@ -1297,7 +1379,12 @@ class OpConverterTest : public ::testing::Test { + const int num_bindings = input_data.size() + output_data->size(); + std::vector buffers(num_bindings); + +- if (engine_->getNbBindings() != num_bindings) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ const int actual_num_bindings = engine_->getNbBindings(); ++#else ++ const int actual_num_bindings = engine_->getNbIOTensors(); ++#endif ++ if (actual_num_bindings != num_bindings) { + return errors::Internal("Number of bindings do not match"); + } + // Since we have only 1 optimization profile (which is enabled by default) +@@ -1308,16 +1395,25 @@ class OpConverterTest : public ::testing::Test { + + // Prepare input bindings. + TF_RETURN_IF_ERROR( +- SetTrtEngineInputs(engine_.get(), execution_context.get(), 0, buffers, ++ SetTrtEngineInputs(engine_.get(), execution_context.get(), 0, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ buffers, ++#endif + converter_->use_implicit_batch(), batch_size, + profiles, nullptr, &input_data)); + // Prepare output bindings. + TF_RETURN_IF_ERROR(SetTrtEngineOutputs( +- engine_.get(), execution_context.get(), 0, buffers, ++ engine_.get(), execution_context.get(), 0, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ buffers, ++#endif + converter_->use_implicit_batch(), batch_size, nullptr, output_data)); + // Execute the TRT engine. +- TF_RETURN_IF_ERROR(TrtEnqueue(execution_context.get(), buffers, stream_, +- converter_->use_implicit_batch(), ++ TF_RETURN_IF_ERROR(TrtEnqueue(execution_context.get(), ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ buffers, ++#endif ++ stream_, converter_->use_implicit_batch(), + batch_size)); + cudaStreamSynchronize(stream_); + return OkStatus(); +@@ -1372,9 +1468,11 @@ class OpConverterTest : public ::testing::Test { + std::vector dims_vec; + TF_CHECK_OK(adap.Prepend(batch_size).Vector(&dims_vec)); + AddTestTensorWithTFDims(name, dims_vec, trt_dtype); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + if (adap.IsStatic()) { + ASSERT_EQ(batch_size, converter_->batch_size_); + } ++#endif + } + + // Adds weights for both validation and conversion. The type of the weight is +@@ -1570,6 +1668,7 @@ class OpConverterTest : public ::testing::Test { + Logger& logger_ = *Logger::GetLogger(); + + private: ++ TrtUniquePtrType runtime_; + TrtUniquePtrType engine_; + cudaStream_t stream_; + std::unique_ptr tensor_buffer_allocator_; +@@ -1592,7 +1691,13 @@ class OpConverterTest : public ::testing::Test { + class VariableOpConverterTest : public OpConverterTest { + public: + void Reset(TrtPrecisionMode precision_mode_to_test = TrtPrecisionMode::FP32, +- TrtTestMode trt_mode = TrtTestMode::kImplicitBatch) { ++ TrtTestMode trt_mode = ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ TrtTestMode::kImplicitBatch ++#else ++ TrtTestMode::kDynamicShape ++#endif ++ ) { + OpConverterTest::Reset(precision_mode_to_test, trt_mode, context_.get()); + } + +@@ -8046,6 +8151,9 @@ void TestConvertSplit(OpConverterTest* test) { + } + } + ++// TODO(benbarsdell): This test needs to be fixed in many places to support ++// non-implicit-batch for TRT10. ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + TEST_F(OpConverterTest, ConvertSplit) { + { + // Axis is a tensor, should fail. +@@ -8122,6 +8230,7 @@ TEST_F(OpConverterTest, ConvertSplit) { + TestConvertSplit(this); + TestConvertSplit(this); + } ++#endif + + // Get the NodeDef for Unpack (Unstack in TF API). + auto get_unpack_nodedef = [](DataType dtype, int num, int axis) -> NodeDef { +diff --git a/tensorflow/compiler/tf2tensorrt/convert/ops/einsum.cc b/tensorflow/compiler/tf2tensorrt/convert/ops/einsum.cc +index dea0eca7326..8f9d6cc13eb 100755 +--- a/tensorflow/compiler/tf2tensorrt/convert/ops/einsum.cc ++++ b/tensorflow/compiler/tf2tensorrt/convert/ops/einsum.cc +@@ -311,6 +311,14 @@ class EinsumDescriptor { + builder->Shape(operand.tensor()->trt_tensor()); + TRT_ENSURE_PTR_OK(shape_layer); + nvinfer1::ITensor* shape = (*shape_layer)->getOutput(0); ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ // TODO(benbarsdell): Casting to int32 makes this match the pre-TRT10 ++ // behavior, but it would be better to instead cast the other int32 ++ // tensors to int64. ++ shape = builder->Network() ++ ->addCast(*shape, nvinfer1::DataType::kINT32) ++ ->getOutput(0); ++#endif + for (int i = 0; i < operand.GetTrtDims().nbDims; i++) { + int idx = permute.empty() ? i : permute.at(i); + StatusOr slice_layer = +diff --git a/tensorflow/compiler/tf2tensorrt/convert/ops/layer_utils.h b/tensorflow/compiler/tf2tensorrt/convert/ops/layer_utils.h +index e3aadc279d9..3c656bff4ac 100644 +--- a/tensorflow/compiler/tf2tensorrt/convert/ops/layer_utils.h ++++ b/tensorflow/compiler/tf2tensorrt/convert/ops/layer_utils.h +@@ -425,6 +425,14 @@ class TRTNetworkBuilder { + StatusOr shape_layer = this->Shape(input); + TRT_ENSURE_PTR_OK(shape_layer); + nvinfer1::ITensor* runtime_shape = (*shape_layer)->getOutput(0); ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ // TODO(benbarsdell): Casting to int32 makes this match the pre-TRT10 ++ // behavior, but it would be better to instead cast the other int32 ++ // tensors to int64. ++ runtime_shape = ++ network_->addCast(*runtime_shape, nvinfer1::DataType::kINT32) ++ ->getOutput(0); ++#endif + + if (sub_one) { + StatusOr ones = this->Constant(1, 1); +@@ -601,9 +609,9 @@ class TRTNetworkBuilder { + nvinfer1::ITensor* input, float quantize_scale, float dequantize_scale, + const std::string& name) { + TRT_ENSURE(input); +- if (!IS_TRT_VERSION_GE(8, 0, 0, 0)) { +- TRT_ENSURE(network_->hasExplicitPrecision()); +- } ++#if !IS_TRT_VERSION_GE(8, 0, 0, 0) ++ TRT_ENSURE(network_->hasExplicitPrecision()); ++#endif + TRT_ENSURE(IS_TRT_VERSION_GE(7, 1, 0, 0)); + + static int count = 0; +diff --git a/tensorflow/compiler/tf2tensorrt/convert/ops/like_ops.cc b/tensorflow/compiler/tf2tensorrt/convert/ops/like_ops.cc +index dd9dd0f2304..646950c00a3 100644 +--- a/tensorflow/compiler/tf2tensorrt/convert/ops/like_ops.cc ++++ b/tensorflow/compiler/tf2tensorrt/convert/ops/like_ops.cc +@@ -64,6 +64,15 @@ class ConvertLikeOps : public OpConverterBase> { + builder->Shape(input.tensor()->trt_tensor()); + TF_RETURN_IF_ERROR(shape_layer.status()); + dims_input_tensor = (*shape_layer)->getOutput(0); ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ // TODO(benbarsdell): Casting to int32 makes this match the pre-TRT10 ++ // behavior, but it would be better to instead cast the other int32 ++ // tensors to int64. ++ dims_input_tensor = network ++ ->addCast(*dims_input_tensor->trt_tensor(), ++ nvinfer1::DataType::kINT32) ++ ->getOutput(0); ++#endif + dims.nbDims = 0; + } + +diff --git a/tensorflow/compiler/tf2tensorrt/convert/ops/tile.cc b/tensorflow/compiler/tf2tensorrt/convert/ops/tile.cc +index 85c9c6a0292..f71dc70344c 100644 +--- a/tensorflow/compiler/tf2tensorrt/convert/ops/tile.cc ++++ b/tensorflow/compiler/tf2tensorrt/convert/ops/tile.cc +@@ -169,6 +169,13 @@ class ConvertTile : public OpConverterBase { + + nvinfer1::ITensor *shape = + network->addShape(input_trt_tensor)->getOutput(0); ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ // TODO(benbarsdell): Casting to int32 makes this match the pre-TRT10 ++ // behavior, but it would be better to instead cast all the other int32 ++ // tensors to int64. ++ shape = ++ network->addCast(*shape, nvinfer1::DataType::kINT32)->getOutput(0); ++#endif + target_shape = network + ->addElementWise(*shape, *mult, + nvinfer1::ElementWiseOperation::kPROD) +@@ -179,7 +186,11 @@ class ConvertTile : public OpConverterBase { + DimsAdapter stride(std::vector(nb_dims, 1)); + auto layer = network->addSlice(input_trt_tensor, start, output_size, + stride.AsTrtDims()); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + layer->setMode(nvinfer1::SliceMode::kWRAP); ++#else ++ layer->setMode(nvinfer1::SampleMode::kWRAP); ++#endif + if (target_shape) layer->setInput(2, *target_shape); + + converter->SetLayerName(layer, params.node_def.name(), "to_tile"); +diff --git a/tensorflow/compiler/tf2tensorrt/convert/timing_cache.cc b/tensorflow/compiler/tf2tensorrt/convert/timing_cache.cc +index d9b4a9dc5e8..536d09d2eb5 100644 +--- a/tensorflow/compiler/tf2tensorrt/convert/timing_cache.cc ++++ b/tensorflow/compiler/tf2tensorrt/convert/timing_cache.cc +@@ -70,7 +70,11 @@ void TimingCacheRegistry::Upsert(const string& name, TimingCache* cache) { + std::copy_n(static_cast(memory->data()), memory->size(), + mem.begin()); + } ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + memory->destroy(); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ delete memory; ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + #endif // IS_TRT_VERSION_GE(8, 0, 0, 0) + } + +diff --git a/tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.cc b/tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.cc +index 5c49346940a..49faef71b16 100644 +--- a/tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.cc ++++ b/tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.cc +@@ -47,7 +47,7 @@ using absl::StrCat; + namespace { + + bool ShouldUseExplicitPrecision(const GraphDef& gdef) { +- if (!IS_TRT_VERSION_GE(8, 0, 0, 0)) { ++ if (!IS_TRT_VERSION_GE(8, 0, 0, 0) || IS_TRT_VERSION_GE(10, 0, 0, 0)) { + return false; + } + return absl::c_any_of(gdef.node(), [](const auto& node) { +diff --git a/tensorflow/compiler/tf2tensorrt/convert/utils.cc b/tensorflow/compiler/tf2tensorrt/convert/utils.cc +index f2cc8be2fd0..bfc4f5dacaf 100644 +--- a/tensorflow/compiler/tf2tensorrt/convert/utils.cc ++++ b/tensorflow/compiler/tf2tensorrt/convert/utils.cc +@@ -221,10 +221,21 @@ Status TrtTypeToTfType(nvinfer1::DataType trt_type, DataType* tf_type) { + } + + int GetNumberOfEngineInputs(const nvinfer1::ICudaEngine* engine) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + int n_bindings = engine->getNbBindings(); ++#else ++ int n_bindings = engine->getNbIOTensors(); ++#endif + int n_input = 0; + for (int i = 0; i < n_bindings; i++) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + if (engine->bindingIsInput(i)) n_input++; ++#else ++ if (engine->getTensorIOMode(engine->getIOTensorName(i)) == ++ nvinfer1::TensorIOMode::kINPUT) { ++ n_input++; ++ } ++#endif + } + // According to TensorRT 7 doc: "If the engine has been built for K profiles, + // the first getNbBindings() / K bindings are used by profile number 0, the +@@ -232,7 +243,11 @@ int GetNumberOfEngineInputs(const nvinfer1::ICudaEngine* engine) { + // Therefore, to get the number of input tensors, we need to divide by the + // the number of profiles. + int n_profiles = engine->getNbOptimizationProfiles(); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + return n_input / n_profiles; ++#else ++ return n_input; ++#endif + } + + absl::string_view GetDeviceName(const Node* node) { +diff --git a/tensorflow/compiler/tf2tensorrt/convert/utils.h b/tensorflow/compiler/tf2tensorrt/convert/utils.h +index 9a03d2f9093..75f9a5218fa 100644 +--- a/tensorflow/compiler/tf2tensorrt/convert/utils.h ++++ b/tensorflow/compiler/tf2tensorrt/convert/utils.h +@@ -228,7 +228,11 @@ class DimsAdapter { + // in via the result pointer. + void TrtDims(nvinfer1::Dims* result) const { + result->nbDims = num_dims_; ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + absl::c_copy(storage_, static_cast(result->d)); ++#else ++ absl::c_copy(storage_, static_cast(result->d)); ++#endif + } + + // Converts to an nvinfer1::Dims and return by value. +diff --git a/tensorflow/compiler/tf2tensorrt/convert/weights.cc b/tensorflow/compiler/tf2tensorrt/convert/weights.cc +index da2157096b5..5b76fee995f 100644 +--- a/tensorflow/compiler/tf2tensorrt/convert/weights.cc ++++ b/tensorflow/compiler/tf2tensorrt/convert/weights.cc +@@ -59,11 +59,19 @@ Status TRT_ShapedWeights::SetShape(DimsAdapter dims) { + size_t TRT_ShapedWeights::size_bytes() const { + size_t data_type_size = -1; + switch (type_) { ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ case nvinfer1::DataType::kINT64: ++ data_type_size = 8; ++ break; ++#endif + case nvinfer1::DataType::kFLOAT: + case nvinfer1::DataType::kINT32: + data_type_size = 4; + break; + case nvinfer1::DataType::kHALF: ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ case nvinfer1::DataType::kBF16: ++#endif + data_type_size = 2; + break; + #if IS_TRT_VERSION_GE(8, 5, 0, 0) +@@ -76,6 +84,10 @@ size_t TRT_ShapedWeights::size_bytes() const { + case nvinfer1::DataType::kBOOL: + data_type_size = 1; + break; ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ case nvinfer1::DataType::kINT4: // Not supported ++ return 0; ++#endif + } + return volume_ * data_type_size; + } +diff --git a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc +index 7a74a43d88a..cfa92bbdaee 100644 +--- a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc ++++ b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op.cc +@@ -77,7 +77,11 @@ class ContextDeviceMemory { + + ~ContextDeviceMemory() { + if (device_memory_) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + device_memory_allocator_->free(device_memory_); ++#else ++ device_memory_allocator_->deallocate(device_memory_); ++#endif + } + } + +@@ -194,7 +198,8 @@ class TRTEngineOp : public AsyncOpKernel { + StatusOr> BuildEngine( + const std::vector& input_concrete_shapes, int batch_size, + bool use_calibration, TRTInt8Calibrator* calibrator, +- TRTEngineCacheResource* cache_resource, OpKernelContext* ctx); ++ TRTEngineCacheResource* cache_resource, OpKernelContext* ctx, ++ nvinfer1::IRuntime* runtime); + + // Verify that the input shapes are consistent and can be handled by this op. + Status VerifyInputShapes(const std::vector& shapes); +@@ -222,6 +227,7 @@ class TRTEngineOp : public AsyncOpKernel { + bool calibration_mode_; + + // Whether to use implicit batch dimension for TensorRT. ++ // Note that this is no longer supported since TensorRT 10.0. + bool use_implicit_batch_; + + // Whether to collect optimization profiles for TensorRT, only used when +@@ -498,6 +504,12 @@ TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) + << ", thus setting _use_implicit_batch=true"; + use_implicit_batch_ = true; + } ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ OP_REQUIRES( ++ context, !use_implicit_batch_, ++ errors::InvalidArgument( ++ "_use_implicit_batch must be false when using TensorRT >= 10.0")); ++#endif + + status = + context->GetAttr("_profile_generation_mode", &profile_generation_mode_); +@@ -1003,18 +1015,35 @@ Status TRTEngineOp::ExecuteTrtEngine( + VLOG(2) << " Workspace size: " << cuda_engine->getWorkspaceSize() + << " bytes"; + #endif // #if !IS_TRT_VERSION_GE(8, 0, 0, 0) ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + VLOG(2) << " Datatype of " << cuda_engine->getNbBindings() ++#else ++ VLOG(2) << " Datatype of " << cuda_engine->getNbIOTensors() ++#endif + << " inputs/outputs"; + string binding_types = ""; ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + for (int i = 0; i < cuda_engine->getNbBindings(); i++) { + binding_types += " " + string(cuda_engine->getBindingName(i)) + ": " + + DebugString(cuda_engine->getBindingDataType(i)) + "\n"; + } ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ for (int i = 0; i < cuda_engine->getNbIOTensors(); i++) { ++ binding_types += " " + string(cuda_engine->getIOTensorName(i)) + ": " + ++ DebugString(cuda_engine->getTensorDataType( ++ cuda_engine->getIOTensorName(i))) + ++ "\n"; ++ } ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + VLOG(2) << binding_types; + } + ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + const int num_binding = cuda_engine->getNbBindings(); + std::vector buffers(num_binding); ++#else ++ const int num_binding = cuda_engine->getNbIOTensors(); ++#endif + + // nvinfer1::IExecutionContext::enqueue is not thread safe and we need a mutex + // for it. +@@ -1031,11 +1060,17 @@ Status TRTEngineOp::ExecuteTrtEngine( + use_implicit_batch_ ? ctx->input(0).shape().dim_size(0) : 0; + + TF_RETURN_IF_ERROR(SetTrtEngineInputs( +- cuda_engine, execution_context, trt_context_idx, buffers, ++ cuda_engine, execution_context, trt_context_idx, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ buffers, ++#endif + use_implicit_batch_, num_batch, profiles, ctx)); + + TF_RETURN_IF_ERROR(SetTrtEngineOutputs(cuda_engine, execution_context, +- trt_context_idx, buffers, ++ trt_context_idx, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ buffers, ++#endif + use_implicit_batch_, num_batch, ctx)); + + // Copied from gpu_kernel_helper.h as the header can only be used in *.cu.cc +@@ -1054,8 +1089,11 @@ Status TRTEngineOp::ExecuteTrtEngine( + execution_context, allocator, engine_context->GetDeviceMemorySize())); + } + // Enqueue the TensorRT engine for execution. +- return TrtEnqueue(execution_context, buffers, stream, use_implicit_batch_, +- num_batch); ++ return TrtEnqueue(execution_context, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ buffers, ++#endif ++ stream, use_implicit_batch_, num_batch); + } + + Status TRTEngineOp::GetEngineCacheResource(OpKernelContext* ctx, +@@ -1087,7 +1125,8 @@ Status TRTEngineOp::GetEngineCacheResource(OpKernelContext* ctx, + StatusOr> TRTEngineOp::BuildEngine( + const std::vector& input_concrete_shapes, int batch_size, + bool use_calibration, TRTInt8Calibrator* calibrator, +- TRTEngineCacheResource* cache_resource, OpKernelContext* ctx) { ++ TRTEngineCacheResource* cache_resource, OpKernelContext* ctx, ++ nvinfer1::IRuntime* runtime) { + tensorflow::profiler::TraceMe activity( + "TRTEngineOp::BuildEngine", tensorflow::profiler::TraceMeLevel::kInfo); + TRT_ENSURE(cache_resource); +@@ -1116,9 +1155,9 @@ StatusOr> TRTEngineOp::BuildEngine( + auto status = convert::ConvertGraphDefToEngine( + segment_graph_def_, ctx, precision_mode_, batch_size, workspace_size_, + conversion_input_shapes, &logger, cache_resource->allocator_.get(), +- calibrator, &engine, use_calibration, use_implicit_batch_, nullptr, +- &cache_resource->profiles_, name(), use_explicit_precision_, &cluster, +- ctx->device()->name()); ++ runtime, calibrator, &engine, use_calibration, use_implicit_batch_, ++ nullptr, &cache_resource->profiles_, name(), use_explicit_precision_, ++ &cluster, ctx->device()->name()); + if (!status.ok()) { + LOG_FIRST_FEW_WARNING_WITH_PREFIX + << "Engine creation for " << name() << " failed. " +@@ -1152,6 +1191,9 @@ StatusOr> TRTEngineOp::GetEngine( + return std::pair(&empty_context, 0); + } + ++ TrtUniquePtrType infer(nvinfer1::createInferRuntime(logger)); ++ infer->setGpuAllocator(allocator); ++ + // Handle the static engine case. For static engines, the cache will have a + // single element containing the only engine. + if (static_engine_) { +@@ -1172,14 +1214,17 @@ StatusOr> TRTEngineOp::GetEngine( + return std::pair(&empty_context, 0); + } + +- TrtUniquePtrType infer(nvinfer1::createInferRuntime(logger)); +- infer->setGpuAllocator(allocator); + // Need to initialize plugins in order to deserialize engines that contain + // plugins. + MaybeInitializeTrtPlugins(&logger); + TrtUniquePtrType static_engine( + infer->deserializeCudaEngine(serialized_segment_.c_str(), +- serialized_segment_.size(), nullptr)); ++ serialized_segment_.size() ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ , ++ nullptr ++#endif ++ )); + int profile_id = 0; + if (static_engine && !use_implicit_batch_) { + // load profiles +@@ -1189,7 +1234,8 @@ StatusOr> TRTEngineOp::GetEngine( + TF_RETURN_IF_ERROR(cache_res->profiles_.CreateExecutionContexts( + static_engine.get(), &exec_contexts)); + cache.emplace(input_concrete_shapes, +- std::make_unique(std::move(static_engine), ++ std::make_unique(std::move(infer), ++ std::move(static_engine), + std::move(exec_contexts))); + VLOG(1) << "Added new engine to cache of " << name() + << ". Cache size: " << cache.size(); +@@ -1218,9 +1264,10 @@ StatusOr> TRTEngineOp::GetEngine( + << "Reason: " << status; + } + } +- auto result = BuildEngine(input_concrete_shapes, batch_size, +- /*use_calibration=*/false, +- /*calibrator=*/nullptr, cache_res, ctx); ++ auto result = ++ BuildEngine(input_concrete_shapes, batch_size, ++ /*use_calibration=*/false, ++ /*calibrator=*/nullptr, cache_res, ctx, infer.get()); + if (!result.ok()) { + return std::pair(&empty_context, 0); + } +@@ -1232,20 +1279,27 @@ StatusOr> TRTEngineOp::GetEngine( + + int max_batch_size = 1; + if (use_implicit_batch_) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + max_batch_size = raw_static_engine->getMaxBatchSize(); + // Static engine will have max_batch_size for batch size so that all + // inputs will map to this single engine. + for (int i = 0; i < engine_input_shapes.size(); i++) { + engine_input_shapes[i].set_dim(0, max_batch_size); + } ++#else ++ return errors::Internal( ++ "Implicit batch is not supported since TensorRT 10.0. Pass " ++ "use_dynamic_shape=True to TrtGraphConverterV2 to avoid this error."); ++#endif + } + + ExecutionContext context = ExecutionContext::Create(raw_static_engine); + // TODO(laigd): here we assume engine_input_shapes matches the actual input + // shapes of the engine, we should verify that. +- cache.emplace(engine_input_shapes, +- std::make_unique(std::move(static_engine), +- std::move(context))); ++ cache.emplace( ++ engine_input_shapes, ++ std::make_unique( ++ std::move(infer), std::move(static_engine), std::move(context))); + // Runtime is safe to delete after engine creation + VLOG(1) << "Size of serialized TRT engine: " + << serialized_segment_.capacity(); +@@ -1294,7 +1348,7 @@ StatusOr> TRTEngineOp::GetEngine( + // means calibration_mode_ is true and this path won't get executed. + auto result = + BuildEngine(input_concrete_shapes, batch_size, use_calibration_, +- calibrator_.get(), cache_res, ctx); ++ calibrator_.get(), cache_res, ctx, infer.get()); + if (!result.ok()) { + return std::pair(&empty_context, 0); + } +@@ -1302,9 +1356,10 @@ StatusOr> TRTEngineOp::GetEngine( + std::vector exec_contexts; + TF_RETURN_IF_ERROR(cache_res->profiles_.CreateExecutionContexts( + engine.get(), &exec_contexts)); +- cache.emplace(input_concrete_shapes, +- std::make_unique(std::move(engine), +- std::move(exec_contexts))); ++ cache.emplace( ++ input_concrete_shapes, ++ std::make_unique(std::move(infer), std::move(engine), ++ std::move(exec_contexts))); + VLOG(1) << "Added new engine to cache of " << name() + << ". Cache size: " << cache.size(); + engine_contexts = cache.at(input_concrete_shapes).get(); +@@ -1390,6 +1445,9 @@ Status TRTEngineOp::AllocateCalibrationResources( + grappler::GetDeviceInfo(full_parsed_name)); + tensorflow::grappler::VirtualCluster cluster(device_map); + ++ TrtUniquePtrType infer(nvinfer1::createInferRuntime(logger)); ++ infer->setGpuAllocator(cache_res->allocator_.get()); ++ + // ConvertGraphDefToEngine() will try to build the engine. This thread + // will loop inside buildCudaEngine() consuming the calibration data + // that is set by the TF op, and drive the builder until calibrator +@@ -1402,7 +1460,8 @@ Status TRTEngineOp::AllocateCalibrationResources( + this->segment_graph_def_, ctx, TrtPrecisionMode::INT8, + cres->calibrator_->getBatchSize(), this->workspace_size_, + conversion_input_shapes, &cache_res->GetLogger(), +- cache_res->allocator_.get(), cres->calibrator_.get(), &cres->engine_, ++ cache_res->allocator_.get(), infer.get(), ++ cres->calibrator_.get(), &cres->engine_, + /*use_calibration=*/true, this->use_implicit_batch_, + /*convert_successfully=*/nullptr, + /*profiles=*/&cache_res->profiles_, name(), +@@ -1423,13 +1482,15 @@ Status TRTEngineOp::AllocateCalibrationResources( + auto calib_result = cache_res->profiles_.CreateExecutionContexts( + cres->engine_.get(), &exec_contexts); + cache_res->cache_.emplace( +- shapes, std::make_unique(std::move(cres->engine_), ++ shapes, std::make_unique(std::move(infer), ++ std::move(cres->engine_), + std::move(exec_contexts))); + } else { + ExecutionContext context = + ExecutionContext::Create(cres->engine_.get()); + cache_res->cache_.emplace( +- shapes, std::make_unique(std::move(cres->engine_), ++ shapes, std::make_unique(std::move(infer), ++ std::move(cres->engine_), + std::move(context))); + } + } +diff --git a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op_test.cc b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op_test.cc +index 317f3a54357..3368d3d4754 100644 +--- a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op_test.cc ++++ b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_op_test.cc +@@ -66,7 +66,12 @@ class TRTEngineOpTestBase : public OpsTestBase { + public: + void AddSimpleTrtOp(DataType dtype, int max_cached_engines_count = 1, + PartialTensorShape shape = PartialTensorShape({-1, -1}), +- bool use_implicit_batch = true, ++ bool use_implicit_batch ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ = true, ++#else ++ = false, ++#endif + bool allow_build_at_runtime = true, + bool static_engine = false) { + // Create the GPU device. +@@ -207,6 +212,7 @@ constexpr std::array TestParameters{TestParam{false}, + INSTANTIATE_TEST_CASE_P(TRTEngineOpTestInstantiation, TRTEngineOpTestWithParam, + ::testing::ValuesIn(TestParameters)); + ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + TEST_F(TRTEngineOpTestBase, DynamicEngines) { + // Test dynamic engine creation during inference time + TRTEngineOpTestBase::AddSimpleTrtOp(DT_FLOAT, /*max_cached_engines_count=*/4); +@@ -256,11 +262,16 @@ TEST_F(TRTEngineOpTestBase, DynamicEngines) { + EXPECT_EQ(1, cache->count({TensorShape({3, 2})})); + EXPECT_EQ(1, cache->count({TensorShape({10, 10})})); + } ++#endif // !IS_TRT_VERSION_GE(10, 0, 0, 0) + + TEST_F(TRTEngineOpTestBase, AllowBuildAtRuntime) { + TRTEngineOpTestBase::AddSimpleTrtOp(DT_FLOAT, /*max_cached_engines_count=*/1, + PartialTensorShape({-1, -1}), ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + /*use_implicit_batch=*/true, ++#else ++ /*use_implicit_batch=*/false, ++#endif + /*allow_build_at_runtime=*/false); + + // Execute the op +diff --git a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_resource_ops.cc b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_resource_ops.cc +index 234330e328a..23ca2fc5b53 100644 +--- a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_resource_ops.cc ++++ b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_resource_ops.cc +@@ -147,7 +147,12 @@ class InitializeTRTResource : public OpKernel { + TrtUniquePtrType engine( + infer->deserializeCudaEngine( + engine_instance.serialized_engine().c_str(), +- engine_instance.serialized_engine().size(), nullptr)); ++ engine_instance.serialized_engine().size() ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ , ++ nullptr ++#endif ++ )); + auto raw_engine = engine.get(); + std::vector ctx_vec; + if (num_loaded_engine == 0) { +@@ -163,9 +168,10 @@ class InitializeTRTResource : public OpKernel { + // we have only a single execution context. + ctx_vec.push_back(ExecutionContext::Create(raw_engine)); + } +- resource->cache_.emplace(engine_input_shapes, +- std::make_unique( +- std::move(engine), std::move(ctx_vec))); ++ resource->cache_.emplace( ++ engine_input_shapes, ++ std::make_unique(std::move(infer), std::move(engine), ++ std::move(ctx_vec))); + ++num_loaded_engine; + } while (1); + VLOG(1) << "Loaded " << num_loaded_engine << " TRT engines for op " +diff --git a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_resource_ops_test.cc b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_resource_ops_test.cc +index 987b01eebcb..28debd542fd 100644 +--- a/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_resource_ops_test.cc ++++ b/tensorflow/compiler/tf2tensorrt/kernels/trt_engine_resource_ops_test.cc +@@ -119,7 +119,11 @@ class TRTEngineResourceOpsTest + return layer->getOutput(0); + } + +- TrtUniquePtrType CreateTRTEngine() { ++ std::pair, ++ TrtUniquePtrType> ++ CreateTRTEngine() { ++ TrtUniquePtrType runtime( ++ nvinfer1::createInferRuntime(logger_)); + TrtUniquePtrType builder( + nvinfer1::createInferBuilder(logger_)); + TrtUniquePtrType network; +@@ -155,8 +159,13 @@ class TRTEngineResourceOpsTest + // Build the engine + TrtUniquePtrType builder_config( + builder->createBuilderConfig()); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + builder_config->setMaxWorkspaceSize(1 << 10); + builder->setMaxBatchSize(1); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ builder_config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, ++ 1 << 10); ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + + if (this->param_.dynamic_shape) { + TrtShapeOptimizationProfile profile; +@@ -205,11 +214,18 @@ class TRTEngineResourceOpsTest + network.get())); + } + VLOG(2) << "ConfigureBuilder Finished"; ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + TrtUniquePtrType engine( + builder->buildEngineWithConfig(*network, *builder_config)); ++#else ++ TrtUniquePtrType serialized( ++ builder->buildSerializedNetwork(*network, *builder_config)); ++ TrtUniquePtrType engine( ++ runtime->deserializeCudaEngine(serialized->data(), serialized->size())); ++#endif + VLOG(2) << "Engine constructed"; + EXPECT_NE(nullptr, engine); +- return engine; ++ return {std::move(engine), std::move(runtime)}; + } + Logger& logger_ = *Logger::GetLogger(); + TestParam param_; +@@ -278,7 +294,11 @@ TEST_P(TRTEngineResourceOpsTest, Basic) { + EXPECT_EQ(0, resource->cache_.size()); + + // Create an engine and add it to the cache of the resource. +- TrtUniquePtrType engine = CreateTRTEngine(); ++ auto engine_and_runtime = CreateTRTEngine(); ++ TrtUniquePtrType engine = ++ std::move(engine_and_runtime.first); ++ TrtUniquePtrType runtime = ++ std::move(engine_and_runtime.second); + ExecutionContext context = ExecutionContext::Create(engine.get()); + + std::vector engine_input_shape(1); +@@ -288,7 +308,8 @@ TEST_P(TRTEngineResourceOpsTest, Basic) { + } + resource->cache_.emplace( + engine_input_shape, +- std::make_unique(std::move(engine), std::move(context))); ++ std::make_unique(std::move(runtime), std::move(engine), ++ std::move(context))); + // Check that the resource has multiple references before it is unregistered + // from the resource manager. + EXPECT_FALSE(resource->RefCountIsOne()); +diff --git a/tensorflow/compiler/tf2tensorrt/stub/nvinfer_plugin_stub.cc b/tensorflow/compiler/tf2tensorrt/stub/nvinfer_plugin_stub.cc +index 8fc3c6e478f..c083e9f00a7 100644 +--- a/tensorflow/compiler/tf2tensorrt/stub/nvinfer_plugin_stub.cc ++++ b/tensorflow/compiler/tf2tensorrt/stub/nvinfer_plugin_stub.cc +@@ -52,7 +52,7 @@ void LogFatalSymbolNotFound(const char* symbol_name) { + + #if NV_TENSORRT_MAJOR < 7 + #error TensorRT version earlier than 7 is not supported. +-#elif NV_TENSORRT_MAJOR == 7 || NV_TENSORRT_MAJOR == 8 ++#elif NV_TENSORRT_MAJOR == 7 || NV_TENSORRT_MAJOR == 8 || NV_TENSORRT_MAJOR == 10 + #include "tensorflow/compiler/tf2tensorrt/stub/NvInferPlugin_7_0.inc" + #else + #error This version of TensorRT is not supported. +diff --git a/tensorflow/compiler/tf2tensorrt/stub/nvinfer_stub.cc b/tensorflow/compiler/tf2tensorrt/stub/nvinfer_stub.cc +index 1a4964032ba..a80e338b13e 100644 +--- a/tensorflow/compiler/tf2tensorrt/stub/nvinfer_stub.cc ++++ b/tensorflow/compiler/tf2tensorrt/stub/nvinfer_stub.cc +@@ -56,6 +56,8 @@ void LogFatalSymbolNotFound(const char* symbol_name) { + #include "tensorflow/compiler/tf2tensorrt/stub/NvInfer_7_0.inc" + #elif NV_TENSORRT_MAJOR == 8 + #include "tensorflow/compiler/tf2tensorrt/stub/NvInfer_8_0.inc" ++#elif NV_TENSORRT_MAJOR == 10 ++#include "tensorflow/compiler/tf2tensorrt/stub/NvInfer_10_0.inc" + #else + #error This version of TensorRT is not supported. + #endif +diff --git a/tensorflow/compiler/tf2tensorrt/tensorrt_test.cc b/tensorflow/compiler/tf2tensorrt/tensorrt_test.cc +index 28da5e81da7..732e758bc57 100644 +--- a/tensorflow/compiler/tf2tensorrt/tensorrt_test.cc ++++ b/tensorflow/compiler/tf2tensorrt/tensorrt_test.cc +@@ -157,27 +157,50 @@ TrtUniquePtrType CreateSerializedEngine() { + #endif + + // Build the engine. ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + builder->setMaxBatchSize(1); ++#endif + TrtUniquePtrType builderConfig( + builder->createBuilderConfig()); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + builderConfig->setMaxWorkspaceSize(1 << 20); + TrtUniquePtrType engine( + builder->buildEngineWithConfig(*network, *builderConfig)); + EXPECT_NE(engine, nullptr); + // Serialize the engine to create a model, then close everything. + TrtUniquePtrType model(engine->serialize()); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ builderConfig->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, ++ 1 << 20); ++ TrtUniquePtrType model( ++ builder->buildSerializedNetwork(*network, *builderConfig)); ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + return model; + } + + template +-unsigned GetBindingSizeBytes(const nvinfer1::ICudaEngine& engine, int index, ++unsigned GetBindingSizeBytes(const nvinfer1::ICudaEngine& engine, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ int index, ++#else ++ const char* name, ++#endif + unsigned batch_size) { + unsigned vol = batch_size; ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + auto dims = engine.getBindingDimensions(index); +- int vecDim = engine.getBindingVectorizedDim(index); ++ int vecDim = engine.getBindingVectorizedDim(name); ++#else ++ auto dims = engine.getTensorShape(name); ++ int vecDim = engine.getTensorVectorizedDim(name); ++#endif + if (-1 != vecDim) // i.e., 0 != lgScalarsPerVector + { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + int scalarsPerVec = engine.getBindingComponentsPerElement(index); ++#else ++ int scalarsPerVec = engine.getTensorComponentsPerElement(name); ++#endif + // Divide round up. + dims.d[vecDim] = (dims.d[vecDim] + scalarsPerVec - 1 / scalarsPerVec); + vol *= scalarsPerVec; +@@ -192,17 +215,32 @@ void Execute(nvinfer1::IExecutionContext* context, const float* input1, + const nvinfer1::ICudaEngine& engine = context->getEngine(); + + // We have two bindings: input and output. +- ASSERT_EQ(engine.getNbBindings(), 4); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ int num_bindings = engine.getNbBindings(); + const int input_index1 = engine.getBindingIndex(kInputTensor1); + const int input_index2 = engine.getBindingIndex(kInputTensor2); + const int output_index1 = engine.getBindingIndex(kOutputTensor1); + const int output_index2 = engine.getBindingIndex(kOutputTensor2); ++#else ++ int num_bindings = engine.getNbIOTensors(); ++#endif ++ ASSERT_EQ(num_bindings, 4); + + // Create GPU buffers and a stream +- std::vector buffers(engine.getNbBindings()); ++ std::vector buffers(num_bindings); + for (int i = 0; i < buffers.size(); i++) { +- ASSERT_EQ( +- 0, cudaMalloc(&buffers[i], GetBindingSizeBytes(engine, i, 1))); ++ ASSERT_EQ(0, cudaMalloc(&buffers[i], GetBindingSizeBytes(engine, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ i, ++#else ++ engine ++ .getIOTensorName( ++ i), ++#endif ++ 1))); ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ context->setTensorAddress(engine.getIOTensorName(i), buffers[i]); ++#endif + } + + cudaStream_t stream; +@@ -213,17 +251,26 @@ void Execute(nvinfer1::IExecutionContext* context, const float* input1, + // Note that since the host buffer was not created as pinned memory, these + // async copies are turned into sync copies. So the following synchronization + // could be removed. +- ASSERT_EQ(0, cudaMemcpyAsync(buffers[input_index1], input1, sizeof(float), ++ ASSERT_EQ(0, cudaMemcpyAsync(buffers[0], input1, sizeof(float), + cudaMemcpyHostToDevice, stream)); +- ASSERT_EQ(0, cudaMemcpyAsync(buffers[input_index2], input2, sizeof(float), ++ ASSERT_EQ(0, cudaMemcpyAsync(buffers[1], input2, sizeof(float), + cudaMemcpyHostToDevice, stream)); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + context->enqueueV2(buffers.data(), stream, nullptr); +- ASSERT_EQ(0, cudaMemcpyAsync(output1, buffers[output_index1], sizeof(float), ++#else ++ context->enqueueV3(stream); ++#endif ++ ASSERT_EQ(0, cudaMemcpyAsync(output1, buffers[2], sizeof(float), ++ cudaMemcpyDeviceToHost, stream)); ++ ASSERT_EQ(0, cudaMemcpyAsync(output2, buffers[3], ++ GetBindingSizeBytes(engine, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ output_index2, ++#else ++ kOutputTensor2, ++#endif ++ 1), + cudaMemcpyDeviceToHost, stream)); +- ASSERT_EQ( +- 0, cudaMemcpyAsync(output2, buffers[output_index2], +- GetBindingSizeBytes(engine, output_index2, 1), +- cudaMemcpyDeviceToHost, stream)); + cudaStreamSynchronize(stream); + + // Release the stream and the buffers +@@ -253,8 +300,13 @@ TEST(TensorrtTest, BasicFunctions) { + Logger& logger = *Logger::GetLogger(); + TrtUniquePtrType runtime( + nvinfer1::createInferRuntime(logger)); +- TrtUniquePtrType engine( +- runtime->deserializeCudaEngine(model->data(), model->size(), nullptr)); ++ TrtUniquePtrType engine(runtime->deserializeCudaEngine( ++ model->data(), model->size() ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ , ++ nullptr ++#endif ++ )); + TrtUniquePtrType context( + engine->createExecutionContext()); + +@@ -262,11 +314,25 @@ TEST(TensorrtTest, BasicFunctions) { + float input1 = 1234; + float input2 = 567; + +- std::vector output1( +- GetBindingSizeBytes(*engine, 2, 1) / sizeof(float), 0.0f); ++ std::vector output1(GetBindingSizeBytes(*engine, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ 2, ++#else ++ kOutputTensor1, ++#endif ++ 1) / ++ sizeof(float), ++ 0.0f); + +- std::vector output2( +- GetBindingSizeBytes(*engine, 3, 1) / sizeof(int32), 0.0f); ++ std::vector output2(GetBindingSizeBytes(*engine, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ 3, ++#else ++ kOutputTensor2, ++#endif ++ 1) / ++ sizeof(int32), ++ 0.0f); + + ASSERT_EQ(output1.size(), 1); + ASSERT_EQ(output2.size(), 1); +diff --git a/tensorflow/compiler/tf2tensorrt/trt_convert_api_test.cc b/tensorflow/compiler/tf2tensorrt/trt_convert_api_test.cc +index 74415d85686..10264da8e98 100644 +--- a/tensorflow/compiler/tf2tensorrt/trt_convert_api_test.cc ++++ b/tensorflow/compiler/tf2tensorrt/trt_convert_api_test.cc +@@ -297,6 +297,7 @@ INSTANTIATE_TEST_CASE_P( + true // convert_to_static_engine + }, + {{1, 2}, {4, 2}}}, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + // Implicit batch mode test with conver_to_static_engine=true. + TestParam{TfTrtConversionParams{ + 1 << 20, // max workspace size +@@ -310,6 +311,7 @@ INSTANTIATE_TEST_CASE_P( + true // convert_to_static_engine + }, + {{1, 2}}}, ++#endif // !IS_TRT_VERSION_GE(10, 0, 0, 0) + // Dynamic shape mode test convert_to_static_engine=false: we cannot + // save the engines, therefore we do not generate profiles. A single + // engine will be built during runtime, with profile that matches +@@ -326,7 +328,9 @@ INSTANTIATE_TEST_CASE_P( + true, // allow_build_at_runtime + false // convert_to_static_engine + }, +- {{1, 2}, {4, 2}}}, ++ {{1, 2}, {4, 2}}} ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ , + // Implicit batch mode test with convert_to_static_engine=false. + // We will have two engines in the cache to handle the two shapes. + TestParam{TfTrtConversionParams{ +@@ -340,7 +344,9 @@ INSTANTIATE_TEST_CASE_P( + true, // allow_build_at_runtime + false // convert_to_static_engine + }, +- {{1, 2}, {4, 2}}}), ++ {{1, 2}, {4, 2}}} ++#endif // !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ ), + ::testing::Values(false, true), // use_variables + ::testing::Values(false, true))); // use_function + +diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_allocator.cc b/tensorflow/compiler/tf2tensorrt/utils/trt_allocator.cc +index 832154940f3..ad3da4fbe3e 100644 +--- a/tensorflow/compiler/tf2tensorrt/utils/trt_allocator.cc ++++ b/tensorflow/compiler/tf2tensorrt/utils/trt_allocator.cc +@@ -95,7 +95,11 @@ TRTDeviceAllocator::TRTDeviceAllocator(Allocator* allocator) + VLOG(1) << "Using " << allocator->Name() << " allocator from TensorFlow"; + } + ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + void TRTDeviceAllocator::free(void* memory) noexcept { ++#else ++bool TRTDeviceAllocator::deallocate(void* memory) noexcept { ++#endif + mutex_lock lock(mu_); + VLOG(2) << "Deallocating @ " << memory; + // allocated memory adjusted for alignment, restore the original pointer +@@ -107,6 +111,9 @@ void TRTDeviceAllocator::free(void* memory) noexcept { + } + allocator_->DeallocateRaw(memory); + } ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ return true; ++#endif + } + + } // namespace tensorrt +diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_allocator.h b/tensorflow/compiler/tf2tensorrt/utils/trt_allocator.h +index 2812aa06457..3beaf368e68 100644 +--- a/tensorflow/compiler/tf2tensorrt/utils/trt_allocator.h ++++ b/tensorflow/compiler/tf2tensorrt/utils/trt_allocator.h +@@ -18,6 +18,7 @@ limitations under the License. + + #include + ++#include "tensorflow/compiler/tf2tensorrt/common/utils.h" + #include "tensorflow/core/framework/allocator.h" + #include "tensorflow/core/platform/mutex.h" + +@@ -56,7 +57,11 @@ class TRTDeviceAllocator : public TRTBaseAllocator { + } + void* allocate(uint64_t size, uint64_t alignment, + uint32_t flags) noexcept override; ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + void free(void* memory) noexcept override; ++#else ++ bool deallocate(void* memory) noexcept override; ++#endif + + private: + mutex mu_; +diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_engine_utils.cc b/tensorflow/compiler/tf2tensorrt/utils/trt_engine_utils.cc +index 798ebd8bd0c..38ea076fe5b 100755 +--- a/tensorflow/compiler/tf2tensorrt/utils/trt_engine_utils.cc ++++ b/tensorflow/compiler/tf2tensorrt/utils/trt_engine_utils.cc +@@ -38,9 +38,16 @@ using absl::StrCat; + + ExecutionContext ExecutionContext::Create(nvinfer1::ICudaEngine* cuda_engine) { + bool has_int32_output = false; ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + for (int i = 0; i < cuda_engine->getNbBindings(); i++) { + if (!cuda_engine->bindingIsInput(i) && + cuda_engine->getBindingDataType(i) == nvinfer1::DataType::kINT32) { ++#else ++ for (int i = 0; i < cuda_engine->getNbIOTensors(); i++) { ++ const char* tensor_name = cuda_engine->getIOTensorName(i); ++ if (cuda_engine->getTensorIOMode(tensor_name) == nvinfer1::TensorIOMode::kOUTPUT && ++ cuda_engine->getTensorDataType(tensor_name) == nvinfer1::DataType::kINT32) { ++#endif + has_int32_output = true; + break; + } +@@ -59,14 +66,24 @@ ExecutionContext ExecutionContext::Create(nvinfer1::ICudaEngine* cuda_engine) { + + Status GetTrtBindingShape(const nvinfer1::ICudaEngine* cuda_engine, + const nvinfer1::IExecutionContext* execution_context, +- int binding_index, bool use_implicit_batch, +- int batch_size, TensorShape& shape) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ int binding_index, ++#else ++ const char* tensor_name, ++#endif ++ bool use_implicit_batch, int batch_size, ++ TensorShape& shape) { + tensorflow::profiler::TraceMe activity( + "getBindingDimensions", tensorflow::profiler::TraceMeLevel::kInfo); + nvinfer1::Dims dims = + use_implicit_batch ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + ? cuda_engine->getBindingDimensions(binding_index) + : execution_context->getBindingDimensions(binding_index); ++#else ++ ? cuda_engine->getTensorShape(tensor_name) ++ : execution_context->getTensorShape(tensor_name); ++#endif + if (!use_implicit_batch) { + if (dims.nbDims == -1) { + return errors::Internal( +@@ -80,39 +97,83 @@ Status GetTrtBindingShape(const nvinfer1::ICudaEngine* cuda_engine, + return OkStatus(); + } + ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + Status SetupBindings(nvinfer1::ICudaEngine* cuda_engine, const Tensor& tensor, + std::vector& buffers, int binding_index) { ++#else ++Status SetupBindings(nvinfer1::ICudaEngine* cuda_engine, ++ nvinfer1::IExecutionContext* execution_context, ++ const Tensor& tensor, const char* tensor_name) { ++#endif + tensorflow::profiler::TraceMe activity( + "SetBindingPointers", tensorflow::profiler::TraceMeLevel::kInfo); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + const auto dtype = cuda_engine->getBindingDataType(binding_index); ++#else ++ const auto dtype = cuda_engine->getTensorDataType(tensor_name); ++#endif + VLOG(2) << "<<<<<<<<< SetupBindings with dtype = " << (int)dtype; + switch (dtype) { + case nvinfer1::DataType::kFLOAT: ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + buffers[binding_index] = const_cast(tensor.flat().data()); ++#else ++ execution_context->setTensorAddress( ++ tensor_name, const_cast(tensor.flat().data())); ++#endif + break; + case nvinfer1::DataType::kHALF: ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + buffers[binding_index] = + const_cast(tensor.flat().data()); ++#else ++ execution_context->setTensorAddress( ++ tensor_name, ++ const_cast(tensor.flat().data())); ++#endif + break; + case nvinfer1::DataType::kINT8: + return errors::Internal("INT8 inputs are not supported yet!"); + case nvinfer1::DataType::kINT32: ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + buffers[binding_index] = const_cast(tensor.flat().data()); ++#else ++ execution_context->setTensorAddress( ++ tensor_name, const_cast(tensor.flat().data())); ++#endif + break; + #if IS_TRT_VERSION_GE(8, 2, 0, 0) + case nvinfer1::DataType::kBOOL: ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + buffers[binding_index] = const_cast(tensor.flat().data()); ++#else ++ execution_context->setTensorAddress( ++ tensor_name, const_cast(tensor.flat().data())); ++#endif + break; + #endif + #if IS_TRT_VERSION_GE(8, 5, 0, 0) + case nvinfer1::DataType::kUINT8: ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + buffers[binding_index] = const_cast(tensor.flat().data()); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ execution_context->setTensorAddress( ++ tensor_name, const_cast(tensor.flat().data())); ++#endif // !IS_TRT_VERSION_GE(10, 0, 0, 0) + break; + #endif + #if IS_TRT_VERSION_GE(8, 6, 0, 0) + case nvinfer1::DataType::kFP8: + return errors::Internal("FP8 inputs are not supported yet!"); + #endif ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ case nvinfer1::DataType::kBF16: ++ return errors::Internal("BF16 inputs are not supported yet!"); ++ case nvinfer1::DataType::kINT64: ++ return errors::Internal("INT64 inputs are not supported yet!"); ++ case nvinfer1::DataType::kINT4: ++ return errors::Internal("INT4 inputs are not supported yet!"); ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + default: + return errors::Internal("Unknown TRT data type: ", + static_cast(dtype)); +@@ -124,8 +185,10 @@ Status SetupBindings(nvinfer1::ICudaEngine* cuda_engine, const Tensor& tensor, + Status SetTrtEngineInputs(nvinfer1::ICudaEngine* cuda_engine, + nvinfer1::IExecutionContext* execution_context, + const int trt_profile_idx, +- std::vector& buffers, bool use_implicit_batch, +- int num_batch, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ std::vector& buffers, ++#endif ++ bool use_implicit_batch, int num_batch, + const TrtShapeOptimizationProfile& profiles, + OpKernelContext* ctx, const DataVec* input_vec) { + tensorflow::profiler::TraceMe activity( +@@ -143,6 +206,7 @@ Status SetTrtEngineInputs(nvinfer1::ICudaEngine* cuda_engine, + + const string input_name = + ctx ? StrCat(IONamePrefixes::kInputPHName, i) : input_vec->at(i).name; ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + int binding_index; + Status status = GetTrtBindingIndex(input_name.c_str(), trt_profile_idx, + cuda_engine, &binding_index); +@@ -155,6 +219,7 @@ Status SetTrtEngineInputs(nvinfer1::ICudaEngine* cuda_engine, + VLOG(2) << "Skipping pruned input " << input_name; + continue; + } ++#endif // !IS_TRT_VERSION_GE(10, 0, 0, 0) + + if (use_implicit_batch && ctx) { + // Ensure all inputs have the same batch size +@@ -168,16 +233,28 @@ Status SetTrtEngineInputs(nvinfer1::ICudaEngine* cuda_engine, + // Set known input dimensions. This is necessary because TRT network + // could be made with dynamic dimensions. + if (!use_implicit_batch) { +- TF_RETURN_IF_ERROR(profiles.SetInputShapeBinding( +- i, binding_index, cuda_engine, execution_context)); ++ TF_RETURN_IF_ERROR(profiles.SetInputShapeBinding(i, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ binding_index, ++#else ++ input_name.c_str(), ++#endif ++ cuda_engine, ++ execution_context)); + +- if (cuda_engine->isExecutionBinding(binding_index)) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ if (cuda_engine->isExecutionBinding(binding_index)) ++#else ++ if (true) ++#endif ++ { + tensorflow::profiler::TraceMe activity( + "SetTrtEngineInputs::setBindingDimensions", + tensorflow::profiler::TraceMeLevel::kInfo); + auto adap = DimsAdapter::Create(input_shape); + TRT_ENSURE_OK(adap); + nvinfer1::Dims trt_dims = adap->AsTrtDims(); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + if (execution_context->getBindingDimensions(binding_index) != + trt_dims) { + VLOG(2) << "Setting binding dimensions for idx " << binding_index; +@@ -190,11 +267,30 @@ Status SetTrtEngineInputs(nvinfer1::ICudaEngine* cuda_engine, + "Binding dimension does not fit selected profile."); + } + } ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ if (execution_context->getTensorShape(input_name.c_str()) != trt_dims) { ++ VLOG(2) << "Setting binding dimensions for input " << input_name; ++ bool ret = ++ execution_context->setInputShape(input_name.c_str(), trt_dims); ++ if (!ret) { ++ VLOG(2) << "Error setting engine input " << input_name << " " ++ << DebugString(trt_dims); ++ return errors::Internal( ++ "Binding dimension does not fit selected profile."); ++ } ++ } ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + } + } + // Setup input bindings. + TF_RETURN_IF_ERROR( +- SetupBindings(cuda_engine, input_tensor, buffers, binding_index)); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ SetupBindings(cuda_engine, input_tensor, buffers, binding_index) ++#else ++ SetupBindings(cuda_engine, execution_context, input_tensor, ++ input_name.c_str()) ++#endif ++ ); + } + + // Ensure all network dynamic dimensions (if any) are set in execution +@@ -212,7 +308,10 @@ Status SetTrtEngineInputs(nvinfer1::ICudaEngine* cuda_engine, + + Status SetTrtEngineOutputs(nvinfer1::ICudaEngine* cuda_engine, + nvinfer1::IExecutionContext* execution_context, +- int trt_profile_idx, std::vector& buffers, ++ int trt_profile_idx, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ std::vector& buffers, ++#endif + bool use_implicit_batch, int batch_size, + OpKernelContext* ctx, DataVec* outputs) { + tensorflow::profiler::TraceMe activity( +@@ -222,15 +321,22 @@ Status SetTrtEngineOutputs(nvinfer1::ICudaEngine* cuda_engine, + for (int i = 0; i < n_outputs; i++) { + const string output_name = + ctx ? StrCat(IONamePrefixes::kOutputPHName, i) : outputs->at(i).name; ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + int binding_index; + TF_RETURN_IF_ERROR(GetTrtBindingIndex(output_name.c_str(), trt_profile_idx, + cuda_engine, &binding_index)); ++#endif + + // Get TRT output shapes for allocating output memory. + TensorShape output_shape; + TF_RETURN_IF_ERROR(GetTrtBindingShape(cuda_engine, execution_context, +- binding_index, use_implicit_batch, +- batch_size, output_shape)); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ binding_index, ++#else ++ output_name.c_str(), ++#endif ++ use_implicit_batch, batch_size, ++ output_shape)); + + // Allocate output tensor of TRTEngineOp. + Tensor* output_tensor = nullptr; +@@ -255,23 +361,40 @@ Status SetTrtEngineOutputs(nvinfer1::ICudaEngine* cuda_engine, + + // Set up output bindings. + TF_RETURN_IF_ERROR( ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + SetupBindings(cuda_engine, *output_tensor, buffers, binding_index)); ++#else ++ SetupBindings(cuda_engine, execution_context, *output_tensor, ++ output_name.c_str())); ++#endif + } + return OkStatus(); + } + + Status TrtEnqueue(nvinfer1::IExecutionContext* execution_context, +- std::vector& buffers, cudaStream_t stream, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ std::vector& buffers, ++#endif ++ cudaStream_t stream, + bool use_implicit_batch, int batch_size) { + tensorflow::profiler::TraceMe activity( + "TrtEnqueue", tensorflow::profiler::TraceMeLevel::kInfo); + bool ret = false; + if (use_implicit_batch) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + ret = execution_context->enqueue(batch_size, &buffers[0], stream, nullptr); + VLOG(1) << "Called IExecutionContext::enqueue"; ++#else ++ return errors::Internal("Implicit batch is not supported with TensorRT >=10"); ++#endif + } else { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + ret = execution_context->enqueueV2(&buffers[0], stream, nullptr); + VLOG(1) << "Called IExecutionContext::enqueueV2"; ++#else ++ ret = execution_context->enqueueV3(stream); ++ VLOG(1) << "Called IExecutionContext::enqueueV3"; ++#endif + } + if (!ret) { + return errors::Internal("Failed to enqueue batch for TRT engine"); +diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_engine_utils.h b/tensorflow/compiler/tf2tensorrt/utils/trt_engine_utils.h +index b0935afb5b2..0b0293f02fe 100644 +--- a/tensorflow/compiler/tf2tensorrt/utils/trt_engine_utils.h ++++ b/tensorflow/compiler/tf2tensorrt/utils/trt_engine_utils.h +@@ -42,7 +42,10 @@ ExecutionContext CreateExecutionContext(nvinfer1::ICudaEngine* cuda_engine); + Status SetTrtEngineInputs(nvinfer1::ICudaEngine* cuda_engine, + nvinfer1::IExecutionContext* execution_context, + const int trt_profile_idx, +- std::vector& buffers, bool use_implicit_batch, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ std::vector& buffers, ++#endif ++ bool use_implicit_batch, + int num_batch, + const TrtShapeOptimizationProfile& profiles, + OpKernelContext* ctx = nullptr, +@@ -63,7 +66,10 @@ Status GetTrtBindingShape(const nvinfer1::ICudaEngine* cuda_engine, + // the Tensors in outputs are already allocated. + Status SetTrtEngineOutputs(nvinfer1::ICudaEngine* cuda_engine, + nvinfer1::IExecutionContext* execution_context, +- int trt_profile_idx, std::vector& buffers, ++ int trt_profile_idx, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ std::vector& buffers, ++#endif + bool use_implicit_batch, int batch_size = 0, + OpKernelContext* ctx = nullptr, + DataVec* outputs = nullptr); +@@ -71,7 +77,10 @@ Status SetTrtEngineOutputs(nvinfer1::ICudaEngine* cuda_engine, + // Enqueues TensorRT inference job. The batch_size argument is only relevant in + // implicit batch mode. + Status TrtEnqueue(nvinfer1::IExecutionContext* execution_context, +- std::vector& buffers, cudaStream_t stream, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ std::vector& buffers, ++#endif ++ cudaStream_t stream, + bool use_implicit_batch, int batch_size = 1); + + } // namespace tensorrt +diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_lru_cache.h b/tensorflow/compiler/tf2tensorrt/utils/trt_lru_cache.h +index 31c3b9c9a90..b4269d24070 100644 +--- a/tensorflow/compiler/tf2tensorrt/utils/trt_lru_cache.h ++++ b/tensorflow/compiler/tf2tensorrt/utils/trt_lru_cache.h +@@ -120,16 +120,19 @@ class LRUCache { + + struct EngineContext { + EngineContext() {} // Creates an empty context. +- EngineContext(TrtUniquePtrType&& cuda_engine, ++ EngineContext(TrtUniquePtrType runtime, ++ TrtUniquePtrType&& cuda_engine, + ExecutionContext&& execution_context) +- : cuda_engine_(std::move(cuda_engine)) { ++ : runtime_(std::move(runtime)), cuda_engine_(std::move(cuda_engine)) { + execution_contexts.push_back(std::move(execution_context)); + device_memory_size_ = + cuda_engine_ ? cuda_engine_->getDeviceMemorySize() : 0; + } +- EngineContext(TrtUniquePtrType&& cuda_engine, ++ EngineContext(TrtUniquePtrType runtime, ++ TrtUniquePtrType&& cuda_engine, + std::vector&& execution_contexts) +- : cuda_engine_(std::move(cuda_engine)), ++ : runtime_(std::move(runtime)), ++ cuda_engine_(std::move(cuda_engine)), + execution_contexts(std::move(execution_contexts)) { + device_memory_size_ = + cuda_engine_ ? cuda_engine_->getDeviceMemorySize() : 0; +@@ -137,6 +140,8 @@ struct EngineContext { + + mutex mu; + ++ nvinfer1::IRuntime* GetRuntime() { return runtime_.get(); } ++ + nvinfer1::ICudaEngine* GetCudaEngine() { return cuda_engine_.get(); } + + Status GetExecutionContext(int idx, nvinfer1::IExecutionContext** exec_ctx, +@@ -160,6 +165,8 @@ struct EngineContext { + size_t GetDeviceMemorySize() { return device_memory_size_; } + + private: ++ // Note: Must out-live the engine object. ++ TrtUniquePtrType runtime_; + // Note: declaration has to come before execution_contexts, to ensure proper + // order of destruction. + TrtUniquePtrType cuda_engine_; +diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_shape_optimization_profiles.cc b/tensorflow/compiler/tf2tensorrt/utils/trt_shape_optimization_profiles.cc +index 57b222826b1..73b38c7032d 100644 +--- a/tensorflow/compiler/tf2tensorrt/utils/trt_shape_optimization_profiles.cc ++++ b/tensorflow/compiler/tf2tensorrt/utils/trt_shape_optimization_profiles.cc +@@ -431,12 +431,16 @@ void TrtShapeOptimizationProfile::SetShapeTensorMask( + const nvinfer1::ICudaEngine* engine, int n_inputs) { + is_shape_tensor_.resize(n_inputs, false); + for (int i = 0; i < n_inputs; i++) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + int binding_index; + Status status = GetTrtBindingIndex(i, 0, engine, &binding_index); + if (!status.ok()) { + continue; + } + is_shape_tensor_[i] = engine->isShapeBinding(binding_index); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ is_shape_tensor_[i] = engine->isShapeInferenceIO(GetTrtInputName(i).c_str()); ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + if (is_shape_tensor_[i]) { + VLOG(2) << "Found shape tensor at " << i; + } +@@ -516,7 +520,11 @@ Status TrtShapeOptimizationProfile::CreateExecutionContexts( + // set optimizationprofiles. + // - The 0th profile is set implicitly for the first execution context + // therefore we do not need to set. ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + if (!context->setOptimizationProfile(i)) { ++#else ++ if (!context->setOptimizationProfileAsync(i, /*stream=*/0)) { ++#endif + return errors::Internal("Could not set TRT optimization profile."); + } + } +@@ -528,24 +536,47 @@ Status TrtShapeOptimizationProfile::CreateExecutionContexts( + } + + Status TrtShapeOptimizationProfile::SetInputShapeBinding( +- int input_index, int binding_index, nvinfer1::ICudaEngine* cuda_engine, ++ int input_index, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ int binding_index, ++#else ++ const char* tensor_name, ++#endif ++ nvinfer1::ICudaEngine* cuda_engine, + nvinfer1::IExecutionContext* exec_context) const { + tensorflow::profiler::TraceMe activity( + "TrtShapeOptimizationProfile::SetInputShapeBinding", + tensorflow::profiler::TraceMeLevel::kInfo); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + if (cuda_engine->isShapeBinding(binding_index)) { ++#else ++ if (cuda_engine->isShapeInferenceIO(tensor_name)) { ++#endif + // Input shape binding data has to be in host memory. That is the reason + // we can't use input_tensor.flat().data(). which contains the same + // values in device memory. Instead, we use data that was copied to host + // by CollectShapeValues. ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + VLOG(2) << "Setting input shape binding for idx " << binding_index ++#else ++ VLOG(2) << "Setting input shape binding for IO tensor " << tensor_name ++#endif + << ", with values " + << DebugString(actual_shape_values_.at(input_index)); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + bool ret = exec_context->setInputShapeBinding( + binding_index, actual_shape_values_.at(input_index).d); ++#else ++ bool ret = exec_context->setInputTensorAddress( ++ tensor_name, actual_shape_values_.at(input_index).d); ++#endif + if (!ret) { +- return errors::Internal("Could not set input shape binding for idx ", +- binding_index); ++ return errors::Internal( ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ "Could not set input shape binding for idx ", binding_index); ++#else ++ "Could not set input shape binding for tensor ", tensor_name); ++#endif + } + } + return OkStatus(); +@@ -553,16 +584,37 @@ Status TrtShapeOptimizationProfile::SetInputShapeBinding( + + // If binding_idx is a shape tensor, then returns the associated min/max/opt + // shape values from prof_idx. +-nvinfer1::Dims GetDimsFromShapeVal(int prof_idx, int binding_idx, ++nvinfer1::Dims GetDimsFromShapeVal(int prof_idx, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ int binding_idx, ++#else ++ const char* tensor_name, ++#endif + nvinfer1::OptProfileSelector selector, + const nvinfer1::ICudaEngine* engine) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + if (engine->isShapeBinding(binding_idx)) { ++#else ++ if (engine->isShapeInferenceIO(tensor_name)) { ++#endif + const int32* shape_val_ptr = ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + engine->getProfileShapeValues(binding_idx, prof_idx, selector); ++#else ++ engine->getProfileTensorValues(tensor_name, prof_idx, selector); ++#endif + if (shape_val_ptr) { + VLOG(2) << "Found shape value in prof " << prof_idx << ", binding " ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + << binding_idx; ++#else ++ << tensor_name; ++#endif ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + nvinfer1::Dims dims = engine->getBindingDimensions(binding_idx); ++#else ++ nvinfer1::Dims dims = engine->getTensorShape(tensor_name); ++#endif + // nbDims == 0 represent scalar, -1 represents invalid dim + int n_values = (dims.nbDims == 0) ? 1 : dims.d[0]; + if (n_values > 0) { +@@ -580,6 +632,7 @@ Status TrtShapeOptimizationProfile::SetPrunedMask( + is_pruned_input_.resize(n_network_inputs); + absl::c_fill(is_pruned_input_, false); + for (int j = 0; j < n_network_inputs; j++) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + int binding_idx; + Status status = GetTrtBindingIndex(j, 0, engine, &binding_idx); + if (!status.ok()) { +@@ -590,6 +643,13 @@ Status TrtShapeOptimizationProfile::SetPrunedMask( + VLOG(2) << "Skipping pruned input " << j; + continue; + } ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ if (engine->getTensorIOMode(GetTrtInputName(j).c_str()) == ++ nvinfer1::TensorIOMode::kNONE) { ++ is_pruned_input_[j] = true; ++ VLOG(2) << "Skipping pruned input " << j; ++ } ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + } + return OkStatus(); + } +@@ -601,10 +661,12 @@ Status TrtShapeOptimizationProfile::RestoreProfiles( + // We do not need to restore profiles for an empty engine. + return OkStatus(); + } ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + if (engine->hasImplicitBatchDimension()) { + // Nothing to do, we cannot have profiles in implicit batch mode. + return OkStatus(); + } ++#endif + int n_profiles = engine->getNbOptimizationProfiles(); + need_profiles_ = n_profiles > 0; + int n_inputs = GetNumberOfEngineInputs(engine); +@@ -626,6 +688,7 @@ Status TrtShapeOptimizationProfile::RestoreProfiles( + // restore shape values + for (int j = 0; j < n_network_inputs; j++) { + if (is_pruned_input_[j]) continue; ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + int binding_idx; + TF_RETURN_IF_ERROR(GetTrtBindingIndex(j, 0, engine, &binding_idx)); + +@@ -635,16 +698,36 @@ Status TrtShapeOptimizationProfile::RestoreProfiles( + binding_idx, prof_idx, nvinfer1::OptProfileSelector::kMAX); + nvinfer1::Dims opt = engine->getProfileDimensions( + binding_idx, prof_idx, nvinfer1::OptProfileSelector::kOPT); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ string tensor_name = GetTrtInputName(j); ++ ++ nvinfer1::Dims min = engine->getProfileShape( ++ tensor_name.c_str(), prof_idx, nvinfer1::OptProfileSelector::kMIN); ++ nvinfer1::Dims max = engine->getProfileShape( ++ tensor_name.c_str(), prof_idx, nvinfer1::OptProfileSelector::kMAX); ++ nvinfer1::Dims opt = engine->getProfileShape( ++ tensor_name.c_str(), prof_idx, nvinfer1::OptProfileSelector::kOPT); ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ + cfg.min[j] = min; + cfg.max[j] = max; + cfg.opt[j] = opt; + ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + cfg.min[j + n_inputs] = GetDimsFromShapeVal( + prof_idx, binding_idx, nvinfer1::OptProfileSelector::kMIN, engine); + cfg.max[j + n_inputs] = GetDimsFromShapeVal( + prof_idx, binding_idx, nvinfer1::OptProfileSelector::kMAX, engine); + cfg.opt[j + n_inputs] = GetDimsFromShapeVal( + prof_idx, binding_idx, nvinfer1::OptProfileSelector::kOPT, engine); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ cfg.min[j + n_inputs] = GetDimsFromShapeVal( ++ prof_idx, tensor_name.c_str(), nvinfer1::OptProfileSelector::kMIN, engine); ++ cfg.max[j + n_inputs] = GetDimsFromShapeVal( ++ prof_idx, tensor_name.c_str(), nvinfer1::OptProfileSelector::kMAX, engine); ++ cfg.opt[j + n_inputs] = GetDimsFromShapeVal( ++ prof_idx, tensor_name.c_str(), nvinfer1::OptProfileSelector::kOPT, engine); ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + } + VLOG(2) << "Restored profile " << cfg.DebugString(); + profiles_.push_back(std::move(cfg)); +diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_shape_optimization_profiles.h b/tensorflow/compiler/tf2tensorrt/utils/trt_shape_optimization_profiles.h +index e2d8fdb655b..7d556c34d2e 100644 +--- a/tensorflow/compiler/tf2tensorrt/utils/trt_shape_optimization_profiles.h ++++ b/tensorflow/compiler/tf2tensorrt/utils/trt_shape_optimization_profiles.h +@@ -111,12 +111,48 @@ struct OptimizationProfileConfig { + int idx = i + n_inputs_tf; + VLOG(2) << "Setting shape values for " << name << ", " + << ::tensorflow::tensorrt::DebugString(opt[idx]); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + profile->setShapeValues(name, nvinfer1::OptProfileSelector::kMIN, + min[idx].d, min[idx].nbDims); + profile->setShapeValues(name, nvinfer1::OptProfileSelector::kOPT, + opt[idx].d, opt[idx].nbDims); + profile->setShapeValues(name, nvinfer1::OptProfileSelector::kMAX, + max[idx].d, max[idx].nbDims); ++#else ++ std::vector vals32; ++ vals32.resize(min[idx].nbDims); ++ for (int dim = 0; dim < min[idx].nbDims; ++dim) { ++ vals32[dim] = static_cast(min[idx].d[dim]); ++ if (vals32[dim] != min[idx].d[dim]) { ++ return errors::Internal("min value does not fit in int32: ", ++ min[idx].d[dim]); ++ } ++ } ++ profile->setShapeValues(name, nvinfer1::OptProfileSelector::kMIN, ++ vals32.data(), min[idx].nbDims); ++ ++ vals32.resize(opt[idx].nbDims); ++ for (int dim = 0; dim < opt[idx].nbDims; ++dim) { ++ vals32[dim] = static_cast(opt[idx].d[dim]); ++ if (vals32[dim] != opt[idx].d[dim]) { ++ return errors::Internal("opt value does not fit in int32: ", ++ opt[idx].d[dim]); ++ } ++ } ++ profile->setShapeValues(name, nvinfer1::OptProfileSelector::kOPT, ++ vals32.data(), opt[idx].nbDims); ++ ++ vals32.resize(max[idx].nbDims); ++ for (int dim = 0; dim < max[idx].nbDims; ++dim) { ++ vals32[dim] = static_cast(max[idx].d[dim]); ++ if (vals32[dim] != max[idx].d[dim]) { ++ return errors::Internal("max value does not fit in int32: ", ++ max[idx].d[dim]); ++ } ++ } ++ profile->setShapeValues(name, nvinfer1::OptProfileSelector::kMAX, ++ vals32.data(), max[idx].nbDims); ++#endif + } + VLOG(2) << "Setting input dimensions for " << name << ", " + << ::tensorflow::tensorrt::DebugString(opt[i]); +@@ -241,7 +277,12 @@ class TrtShapeOptimizationProfile { + Status CreateExecutionContexts(nvinfer1::ICudaEngine* engine, + std::vector* exec_contexts); + +- Status SetInputShapeBinding(int input_index, int binding_index, ++ Status SetInputShapeBinding(int input_index, ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) ++ int binding_index, ++#else ++ const char* tensor_name, ++#endif + nvinfer1::ICudaEngine* cuda_engine, + nvinfer1::IExecutionContext* exec_context) const; + +diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_shape_optimization_profiles_test.cc b/tensorflow/compiler/tf2tensorrt/utils/trt_shape_optimization_profiles_test.cc +index 87e17a9fc3f..a4d53b683e4 100644 +--- a/tensorflow/compiler/tf2tensorrt/utils/trt_shape_optimization_profiles_test.cc ++++ b/tensorflow/compiler/tf2tensorrt/utils/trt_shape_optimization_profiles_test.cc +@@ -77,13 +77,21 @@ class TrtShapeOptimizationProfileTest + protected: + TrtShapeOptimizationProfileTest() { + strategy_ = GetParam(); ++#if IS_TRT_VERSION_GE(10, 0, 0, 0) ++ runtime_.reset(nvinfer1::createInferRuntime(logger_)); ++#endif + builder_ = TrtUniquePtrType( + nvinfer1::createInferBuilder(logger_)); + network_ = TrtUniquePtrType( + builder_->createNetworkV2(flags_)); + builder_config_ = TrtUniquePtrType( + builder_->createBuilderConfig()); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + builder_config_->setMaxWorkspaceSize(1 << 10); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ builder_config_->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, ++ 1 << 10); ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + } + + // Defines a simple network: output = input1 + input2. +@@ -117,12 +125,24 @@ class TrtShapeOptimizationProfileTest + int prof_idx = exec_contexts_[idx]->getOptimizationProfile(); + ASSERT_GE(prof_idx, 0); + for (int j = 0; j < dimvec.size(); j++) { ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + nvinfer1::Dims min = engine->getProfileDimensions( + j, prof_idx, nvinfer1::OptProfileSelector::kMIN); + nvinfer1::Dims max = engine->getProfileDimensions( + j, prof_idx, nvinfer1::OptProfileSelector::kMAX); + nvinfer1::Dims opt = engine->getProfileDimensions( + j, prof_idx, nvinfer1::OptProfileSelector::kOPT); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ nvinfer1::Dims min = ++ engine->getProfileShape(engine->getIOTensorName(j), prof_idx, ++ nvinfer1::OptProfileSelector::kMIN); ++ nvinfer1::Dims max = ++ engine->getProfileShape(engine->getIOTensorName(j), prof_idx, ++ nvinfer1::OptProfileSelector::kMAX); ++ nvinfer1::Dims opt = ++ engine->getProfileShape(engine->getIOTensorName(j), prof_idx, ++ nvinfer1::OptProfileSelector::kOPT); ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + + // This should always hold. + EXPECT_TRUE(DimsContained(dimvec[j], min, max)); +@@ -135,6 +155,7 @@ class TrtShapeOptimizationProfileTest + } + + Logger& logger_ = *Logger::GetLogger(); ++ TrtUniquePtrType runtime_; + TrtUniquePtrType builder_; + TrtUniquePtrType network_; + TrtUniquePtrType builder_config_; +@@ -168,8 +189,16 @@ TEST_P(TrtShapeOptimizationProfileTest, Static) { + TF_CHECK_OK(profile.ConfigureBuilder(builder_.get(), builder_config_.get(), + network_.get())); + ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + engine = TrtUniquePtrType( + builder_->buildEngineWithConfig(*network_, *builder_config_)); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ TrtUniquePtrType serialized( ++ builder_->buildSerializedNetwork(*network_, *builder_config_)); ++ engine.reset( ++ runtime_->deserializeCudaEngine(serialized->data(), serialized->size())); ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ + EXPECT_NE(nullptr, engine); + TF_CHECK_OK(profile.CreateExecutionContexts(engine.get(), &exec_contexts_)); + // A single execution context should be created for a graph with static input. +@@ -213,8 +242,16 @@ TEST_P(TrtShapeOptimizationProfileTest, Dynamic) { + // Configure and build engine. + TF_CHECK_OK(profile.ConfigureBuilder(builder_.get(), builder_config_.get(), + network_.get())); ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + engine = TrtUniquePtrType( + builder_->buildEngineWithConfig(*network_.get(), *builder_config_.get())); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ TrtUniquePtrType serialized( ++ builder_->buildSerializedNetwork(*network_.get(), ++ *builder_config_.get())); ++ engine.reset( ++ runtime_->deserializeCudaEngine(serialized->data(), serialized->size())); ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + ASSERT_NE(nullptr, engine); + + TF_CHECK_OK(profile.CreateExecutionContexts(engine.get(), &exec_contexts_)); +diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_testutils.h b/tensorflow/compiler/tf2tensorrt/utils/trt_testutils.h +index e0b9a0366a5..bbbe9512a5c 100644 +--- a/tensorflow/compiler/tf2tensorrt/utils/trt_testutils.h ++++ b/tensorflow/compiler/tf2tensorrt/utils/trt_testutils.h +@@ -83,7 +83,7 @@ nvinfer1::Dims CreateDims(const std::vector& d); + // matches nvinfer1::Dims to initializer list or vector of ints + // Example: EXPECT_THAT(my_dims, DimsAreArray({1, 2, 3})) + MATCHER_P(DimsAreArrayHelper, array_value, +- absl::StrFormat("%s [%s]", negation ? "are" : "are not", ++ absl::StrFormat("%s [%s]", negation ? "are not" : "are", + ::testing::PrintToString(array_value))) { + if (arg.nbDims != array_value.size()) return false; + for (int i = 0; i < arg.nbDims; ++i) { +@@ -100,7 +100,7 @@ using DimsAreArray = DimsAreArrayHelperMatcherP>; + // Checks that layer names are equal to initializer list or vector of strings. + // Example: EXPECT_THAT(my_network, LayerNamesAreArray({"conv1", "conv2"})) + MATCHER_P(LayerNamesAreArrayHelper, array_value, +- absl::StrFormat("layer names %s [%s]", negation ? "are" : "are not", ++ absl::StrFormat("layer names %s [%s]", negation ? "are not" : "are", + ::testing::PrintToString(array_value))) { + if (array_value.size() != arg->getNbLayers()) return false; + for (int i = 0; i < arg->getNbLayers(); ++i) { +diff --git a/tensorflow/compiler/tf2tensorrt/utils/trt_testutils_test.cc b/tensorflow/compiler/tf2tensorrt/utils/trt_testutils_test.cc +index d5d9fcf99f5..8a442478349 100644 +--- a/tensorflow/compiler/tf2tensorrt/utils/trt_testutils_test.cc ++++ b/tensorflow/compiler/tf2tensorrt/utils/trt_testutils_test.cc +@@ -77,7 +77,14 @@ TEST(INetworkDefinitionMatchers, CorrectlyMatch) { + ASSERT_NE(input, nullptr); + + const char* fc_layer_name = "my-fc-layer"; ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + auto layer = network->addFullyConnected(*input, 1, weights, weights); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ auto layer = ++ network->addMatrixMultiply(*input, nvinfer1::MatrixOperation::kNONE, ++ *input, nvinfer1::MatrixOperation::kNONE); ++ (void)weights; // Not used ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + ASSERT_NE(layer, nullptr); + layer->setName(fc_layer_name); + +@@ -86,7 +93,12 @@ TEST(INetworkDefinitionMatchers, CorrectlyMatch) { + AllOf(LayerNamesNonEmpty(), LayerNamesAreArray({fc_layer_name}))); + + // Add layer with default name and check layer name. ++#if !IS_TRT_VERSION_GE(10, 0, 0, 0) + layer = network->addFullyConnected(*input, 1, weights, weights); ++#else // IS_TRT_VERSION_GE(10, 0, 0, 0) ++ layer = network->addMatrixMultiply(*input, nvinfer1::MatrixOperation::kNONE, ++ *input, nvinfer1::MatrixOperation::kNONE); ++#endif // IS_TRT_VERSION_GE(10, 0, 0, 0) + EXPECT_THAT(network.get(), AllOf(LayerNamesNonEmpty(), + Not(LayerNamesAreArray({fc_layer_name})))); + } +diff --git a/tensorflow/lite/python/convert.py b/tensorflow/lite/python/convert.py +index cfaff27a849..a4eeb02fd6a 100644 +--- a/tensorflow/lite/python/convert.py ++++ b/tensorflow/lite/python/convert.py +@@ -14,7 +14,6 @@ + # ============================================================================== + """Converts a frozen graph into a TFLite FlatBuffer.""" + +-import distutils.spawn + import enum + import hashlib + import os as _os +@@ -45,6 +44,10 @@ from tensorflow.python.platform import resource_loader as _resource_loader + from tensorflow.python.util import deprecation + from tensorflow.python.util.tf_export import tf_export as _tf_export + ++try: ++ from shutil import which ++except ImportError: ++ from distutils.spawn import find_executable as which + + def _is_quantized_input_stats_required( + conversion_flags: _conversion_flags_pb2.TocoFlags, +@@ -399,7 +402,7 @@ def _run_deprecated_conversion_binary( + RuntimeError: When conversion fails, an exception is raised with the error + message embedded. + """ +- if distutils.spawn.find_executable(_deprecated_conversion_binary) is None: ++ if which(_deprecated_conversion_binary) is None: + raise ConverterError("""Could not find `toco_from_protos` binary, make sure + your virtualenv bin directory or pip local bin directory is in your path. + In particular, if you have installed TensorFlow with --user, make sure you +diff --git a/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py b/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py +index 06784c09106..8290ec796ef 100644 +--- a/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py ++++ b/tensorflow/python/compiler/tensorrt/test/tf_trt_integration_test_base.py +@@ -1117,7 +1117,7 @@ def _GetTestConfigsV1(): + convert_online, convert_offline = True, False + dynamic_engine, static_engine = True, False + use_calibration, no_calibration = True, False +- implicit_batch = False ++ dynamic_shape = False + + # Add all possible test cases and let the derived test class to decide + # whether to run specific ones with ShouldRunTest(). +@@ -1126,11 +1126,11 @@ def _GetTestConfigsV1(): + opts = list( + itertools.product([FP32, FP16, INT8], [convert_online, convert_offline], + [dynamic_engine, static_engine], [no_calibration], +- [implicit_batch])) ++ [dynamic_shape])) + # We always run calibration with offline tool. + # TODO(aaroey): static calibration engine is not supported yet. + opts.append( +- (INT8, convert_offline, dynamic_engine, use_calibration, implicit_batch)) ++ (INT8, convert_offline, dynamic_engine, use_calibration, dynamic_shape)) + return opts + + +@@ -1142,6 +1142,11 @@ def _GetTestConfigsV2(): + # TODO(laigd): add support for calibration. + no_calibration = False + use_calibration = True ++ dynamic_shape_opts = [False, True] ++ ++ if trt_utils.is_loaded_tensorrt_version_greater_equal(10, 0, 0): ++ # Implicit batch mode is not supported since TensorRT 10.0. ++ dynamic_shape_opts = [True] + + # Add all possible test cases and let the derived test class to decide + # whether to run specific ones with ShouldRunTest(). +@@ -1154,10 +1159,11 @@ def _GetTestConfigsV2(): + # - INT8 without calibration behaves like FP32/FP16. + opts = list( + itertools.product([FP32, FP16], [convert_offline], [dynamic_engine], +- [no_calibration], [False, True])) ++ [no_calibration], dynamic_shape_opts)) + # We always run calibration with offline tool. +- opts.append((INT8, convert_offline, dynamic_engine, use_calibration, False)) +- opts.append((INT8, convert_offline, dynamic_engine, use_calibration, True)) ++ for dynamic_shape in dynamic_shape_opts: ++ opts.append(( ++ INT8, convert_offline, dynamic_engine, use_calibration, dynamic_shape)) + return opts + + +diff --git a/tensorflow/python/compiler/tensorrt/trt_convert.py b/tensorflow/python/compiler/tensorrt/trt_convert.py +index 746f910e407..5fab7b0273b 100644 +--- a/tensorflow/python/compiler/tensorrt/trt_convert.py ++++ b/tensorflow/python/compiler/tensorrt/trt_convert.py +@@ -1214,7 +1214,7 @@ class TrtGraphConverterV2(object): + input_saved_model_signature_key: the key of the signature to optimize the + graph for. + use_dynamic_shape: whether to enable dynamic shape support. None is +- equivalent to False in the current implementation. ++ equivalent to True in the current implementation. + dynamic_shape_profile_strategy: one of the strings in + supported_profile_strategies(). None is equivalent to Range in the + current implementation. +@@ -1284,7 +1284,7 @@ class TrtGraphConverterV2(object): + self._calibrated = False + + if use_dynamic_shape is None: +- self._use_dynamic_shape = False ++ self._use_dynamic_shape = True + else: + self._use_dynamic_shape = use_dynamic_shape + +diff --git a/third_party/tensorrt/tensorrt_configure.bzl b/third_party/tensorrt/tensorrt_configure.bzl +index 3d127795638..28c222ab8ad 100644 +--- a/third_party/tensorrt/tensorrt_configure.bzl ++++ b/third_party/tensorrt/tensorrt_configure.bzl +@@ -26,7 +26,7 @@ _TF_TENSORRT_VERSION = "TF_TENSORRT_VERSION" + _TF_NEED_TENSORRT = "TF_NEED_TENSORRT" + + _TF_TENSORRT_LIBS = ["nvinfer", "nvinfer_plugin"] +-_TF_TENSORRT_HEADERS = ["NvInfer.h", "NvUtils.h", "NvInferPlugin.h"] ++_TF_TENSORRT_HEADERS = ["NvInfer.h", "NvInferPlugin.h"] + _TF_TENSORRT_HEADERS_V6 = [ + "NvInfer.h", + "NvUtils.h", +@@ -63,6 +63,21 @@ _TF_TENSORRT_HEADERS_V8_6 = [ + "NvInferVersion.h", + "NvUtils.h", + ] ++_TF_TENSORRT_HEADERS_V10 = [ ++ "NvInfer.h", ++ "NvInferConsistency.h", ++ "NvInferConsistencyImpl.h", ++ "NvInferImpl.h", ++ "NvInferLegacyDims.h", ++ "NvInferPlugin.h", ++ "NvInferPluginUtils.h", ++ "NvInferRuntime.h", ++ "NvInferRuntimeBase.h", ++ "NvInferRuntimeCommon.h", ++ "NvInferRuntimePlugin.h", ++ "NvInferSafeRuntime.h", ++ "NvInferVersion.h", ++] + + _DEFINE_TENSORRT_SONAME_MAJOR = "#define NV_TENSORRT_SONAME_MAJOR" + _DEFINE_TENSORRT_SONAME_MINOR = "#define NV_TENSORRT_SONAME_MINOR" +@@ -89,6 +104,8 @@ def _at_least_version(actual_version, required_version): + return actual >= required + + def _get_tensorrt_headers(tensorrt_version): ++ if _at_least_version(tensorrt_version, "10"): ++ return _TF_TENSORRT_HEADERS_V10 + if _at_least_version(tensorrt_version, "8.6"): + return _TF_TENSORRT_HEADERS_V8_6 + if _at_least_version(tensorrt_version, "8"): +diff --git a/third_party/xla/third_party/tsl/third_party/tensorrt/tensorrt_configure.bzl b/third_party/xla/third_party/tsl/third_party/tensorrt/tensorrt_configure.bzl +index 91b214fd990..d63828fd29b 100644 +--- a/third_party/xla/third_party/tsl/third_party/tensorrt/tensorrt_configure.bzl ++++ b/third_party/xla/third_party/tsl/third_party/tensorrt/tensorrt_configure.bzl +@@ -26,7 +26,7 @@ _TF_TENSORRT_VERSION = "TF_TENSORRT_VERSION" + _TF_NEED_TENSORRT = "TF_NEED_TENSORRT" + + _TF_TENSORRT_LIBS = ["nvinfer", "nvinfer_plugin"] +-_TF_TENSORRT_HEADERS = ["NvInfer.h", "NvUtils.h", "NvInferPlugin.h"] ++_TF_TENSORRT_HEADERS = ["NvInfer.h", "NvInferPlugin.h"] + _TF_TENSORRT_HEADERS_V6 = [ + "NvInfer.h", + "NvUtils.h", diff --git a/patches/io/fix-boringssl.patch b/patches/io/fix-boringssl.patch new file mode 100644 index 0000000..6081ad8 --- /dev/null +++ b/patches/io/fix-boringssl.patch @@ -0,0 +1,12 @@ +diff --git a/WORKSPACE b/WORKSPACE +index 19c30f7e..8a2e0770 100644 +--- a/WORKSPACE ++++ b/WORKSPACE +@@ -32,6 +32,7 @@ http_archive( + name = "boringssl", + patch_cmds = [ + """sed -i.bak 's/bio.c",/bio.c","src\\/decrepit\\/bio\\/base64_bio.c",/g' BUILD.generated.bzl""", ++ """sed -i.bak 's/-Werror//g' BUILD""" + ], + sha256 = "a9c3b03657d507975a32732f04563132b4553c20747cec6dc04de475c8bdf29f", + strip_prefix = "boringssl-80ca9f9f6ece29ab132cce4cf807a9465a18cfac", diff --git a/patches/text/tf-2.16.2.patch b/patches/text/tf-2.16.2.patch new file mode 100644 index 0000000..19c2067 --- /dev/null +++ b/patches/text/tf-2.16.2.patch @@ -0,0 +1,40 @@ +diff --git a/WORKSPACE b/WORKSPACE +index 3ac6e5c..a458efb 100644 +--- a/WORKSPACE ++++ b/WORKSPACE +@@ -58,10 +58,9 @@ http_archive( + name = "org_tensorflow", + patch_args = ["-p1"], + patches = ["//third_party/tensorflow:tf.patch"], +- strip_prefix = "tensorflow-2.16.1", +- sha256 = "54c976f828182f85e10e03840dd3b0504109f57760a498075574e35e9aa983fe", ++ strip_prefix = "tensorflow-810f233968cec850915324948bbbc338c97cf57f", + urls = [ +- "https://github.com/tensorflow/tensorflow/archive/v2.16.1.zip" ++ "https://github.com/tensorflow/tensorflow/archive/810f233968cec850915324948bbbc338c97cf57f.zip" + ], + ) + +diff --git a/oss_scripts/pip_package/setup.nightly.py b/oss_scripts/pip_package/setup.nightly.py +index d3a00b6..67cd584 100644 +--- a/oss_scripts/pip_package/setup.nightly.py ++++ b/oss_scripts/pip_package/setup.nightly.py +@@ -32,7 +32,7 @@ from setuptools.command.install import install + from setuptools.dist import Distribution + + project_name = 'tensorflow-text-nightly' +-project_version = 'REPLACE_ME' ++project_version = '2.16.2' + + + class BinaryDistribution(Distribution): +diff --git a/tensorflow_text/__init__.py b/tensorflow_text/__init__.py +index 76f52fb..81928d4 100644 +--- a/tensorflow_text/__init__.py ++++ b/tensorflow_text/__init__.py +@@ -110,4 +110,4 @@ tflite_registrar.SELECT_TFTEXT_OPS = [ + ] + + remove_undocumented(__name__, _allowed_symbols) +-__version__ = "2.16.1" ++__version__ = "2.16.2" diff --git a/sanity-check.sh b/sanity-check.sh index 5f678be..0dcd6c7 100755 --- a/sanity-check.sh +++ b/sanity-check.sh @@ -3,41 +3,41 @@ set -e usage() { - echo "Usage: $0 -p -t -k " - echo " -p Python version to use (6-11)" - echo " -t TensorFlow version to use (2.3.0, 2.4.0, etc.)" - echo " -k Keras version to use (2.4.3, 2.4.0, etc.)" + echo "Usage: $0 -p -t -k -x " + echo " -p Python version to use (6-12)" + echo " -t TensorFlow version to use (e.g. 2.3.0)" + echo " -x TF-Text version to use" } if [ $# -lt 6 ]; then - usage - exit + usage + exit fi -while getopts "ht:p:k:" opt; do +while getopts "ht:p:x:" opt; do case $opt in - p) - py3_ver=$OPTARG - ;; - t) - tf_ver=$OPTARG - ;; - k) - keras_ver=$OPTARG - ;; - h) - usage - exit - ;; - \?) - echo "Invalid option: -$OPTARG" >&2 - usage - exit - ;; + p) + py3_ver=$OPTARG + ;; + t) + tf_ver=$OPTARG + ;; + x) + text_ver=$OPTARG + ;; + h) + usage + exit + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + usage + exit + ;; esac done -if [ "$py3_ver" -lt 6 ] || [ "$py3_ver" -gt 11 ]; then - echo "Python version must be between 6 and 11" +if [ "$py3_ver" -lt 6 ] || [ "$py3_ver" -gt 12 ]; then + echo "Python version must be between 6 and 12" usage exit fi @@ -47,21 +47,21 @@ python3.$py3_ver -m venv venvs/py3$py3_ver . venvs/py3${py3_ver}/bin/activate tf_wheel=$(ls wheels/tensorflow/tensorflow-${tf_ver}-cp3${py3_ver}-*-linux_x86_64.whl) if [ ! -f "$tf_wheel" ]; then - echo "TensorFlow wheel $tf_wheel not found" - exit 1 + echo "TensorFlow wheel $tf_wheel not found" + exit 1 fi -keras_wheel=$(ls wheels/keras/keras-${keras_ver}-*.whl) -if [ ! -f "$keras_wheel" ]; then - echo "Keras wheel $keras_wheel not found" - exit 1 +text_wheel=$(ls wheels/text/tensorflow_text-${text_ver}-cp3${py3_ver}-*-linux_x86_64.whl) +if [ ! -f "$text_wheel" ]; then + echo "Tensorflow Text wheel $text_wheel not found" + exit 1 fi PIP_OPTS=(--disable-pip-version-check --no-cache-dir) python -m pip "${PIP_OPTS[@]}" install -q -U pip -pip "${PIP_OPTS[@]}" install -q -U "$tf_wheel" "$keras_wheel" +pip "${PIP_OPTS[@]}" install -q -U "$tf_wheel" "$text_wheel" -python -c 'import tensorflow as tf; print(tf.__version__); print(tf.keras.__version__); print(tf.constant(1));' +python -c 'import tensorflow as tf; import tensorflow_text; print(tf.__version__); print(tf.keras.__version__); print(tf.constant(1));' deactivate rm -rf venvs