From 39254f7f1415f19e67114c9aab8083fc572bf6f3 Mon Sep 17 00:00:00 2001 From: facebook-github-bot Date: Fri, 27 Sep 2024 06:30:26 +0000 Subject: [PATCH] =?UTF-8?q?Deploying=20to=20gh-pages=20from=20@=20pytorch/?= =?UTF-8?q?FBGEMM@d056aa3689380f7decad83c90bfc36f5dcf04195=20=F0=9F=9A=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- output.json | 56 +++++++++++++++++++++++++------------------------- searchindex.js | 2 +- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/output.json b/output.json index 871275e8e2..19bf34092c 100644 --- a/output.json +++ b/output.json @@ -11,67 +11,67 @@ {"filename": "general/documentation/Overview.rst", "lineno": 71, "status": "unchecked", "code": 0, "uri": "#fbgemm-gpu-build-setup-tools-install", "info": ""} {"filename": "general/documentation/Cpp.rst", "lineno": 60, "status": "unchecked", "code": 0, "uri": "#fbgemm-gpu-toc-api-cpp", "info": ""} {"filename": "general/documentation/Cpp.rst", "lineno": 68, "status": "unchecked", "code": 0, "uri": "#general-docs-build", "info": ""} -{"filename": "fbgemm-development/BuildInstructions.rst", "lineno": 4, "status": "working", "code": 0, "uri": "https://github.com/pytorch/FBGEMM/blob/main/.github/scripts/setup_env.bash", "info": ""} -{"filename": "general/Contributing.rst", "lineno": 27, "status": "working", "code": 0, "uri": "https://code.facebook.com/cla", "info": ""} +{"filename": "fbgemm-development/BuildInstructions.rst", "lineno": 41, "status": "working", "code": 0, "uri": "https://github.com/asmjit/asmjit", "info": ""} {"filename": "fbgemm-development/BuildInstructions.rst", "lineno": 55, "status": "working", "code": 0, "uri": "https://github.com/google/googletest", "info": ""} +{"filename": "fbgemm-development/BuildInstructions.rst", "lineno": 4, "status": "working", "code": 0, "uri": "https://github.com/pytorch/FBGEMM/blob/main/.github/scripts/setup_env.bash", "info": ""} +{"filename": "fbgemm_gpu-development/BuildInstructions.rst", "lineno": 139, "status": "working", "code": 0, "uri": "https://developer.nvidia.com/cudnn", "info": ""} +{"filename": "fbgemm-development/BuildInstructions.rst", "lineno": 48, "status": "working", "code": 0, "uri": "https://github.com/pytorch/cpuinfo", "info": ""} {"filename": "general/ContactUs.rst", "lineno": 17, "status": "redirected", "code": 301, "uri": "https://bit.ly/ptslack", "info": "https://docs.google.com/forms/d/e/1FAIpQLSeADnUNW36fjKjYzyHDOzEB_abKQE9b6gqqW9NXse6O0MWh0A/viewform"} -{"filename": "fbgemm-development/BuildInstructions.rst", "lineno": 41, "status": "working", "code": 0, "uri": "https://github.com/asmjit/asmjit", "info": ""} +{"filename": "fbgemm_gpu-development/InstallationInstructions.rst", "lineno": 40, "status": "redirected", "code": 301, "uri": "https://docs.nvidia.com/datacenter/tesla/tesla-installation-notes/index.html", "info": "https://docs.nvidia.com/cuda/cuda-installation-guide-linux/"} {"filename": "fbgemm_gpu-development/BuildInstructions.rst", "lineno": 31, "status": "working", "code": 0, "uri": "https://docs.conda.io/en/latest/miniconda.html", "info": ""} {"filename": "fbgemm_gpu-python-api/pooled_embedding_modules.rst", "lineno": 1, "status": "working", "code": 0, "uri": "https://docs.python.org/3/library/constants.html#None", "info": ""} -{"filename": "fbgemm_gpu-development/InstallationInstructions.rst", "lineno": 40, "status": "redirected", "code": 301, "uri": "https://docs.nvidia.com/datacenter/tesla/tesla-installation-notes/index.html", "info": "https://docs.nvidia.com/cuda/cuda-installation-guide-linux/"} +{"filename": "fbgemm_gpu-development/BuildInstructions.rst", "lineno": 114, "status": "redirected", "code": 301, "uri": "https://developer.nvidia.com/nvidia-management-library-nvml", "info": "https://developer.nvidia.com/management-library-nvml"} {"filename": "general/documentation/Python.rst", "lineno": 1, "status": "working", "code": 0, "uri": "https://docs.python.org/3/library/exceptions.html#AttributeError", "info": ""} -{"filename": "general/documentation/Python.rst", "lineno": 1, "status": "working", "code": 0, "uri": "https://docs.python.org/3/library/ctypes.html#ctypes.c_ulong", "info": ""} -{"filename": "fbgemm_gpu-python-api/table_batched_embedding_ops.rst", "lineno": 1, "status": "working", "code": 0, "uri": "https://docs.python.org/3/library/functions.html#bool", "info": ""} +{"filename": "general/Contributing.rst", "lineno": 27, "status": "working", "code": 0, "uri": "https://code.facebook.com/cla", "info": ""} {"filename": "general/documentation/Python.rst", "lineno": 1, "status": "working", "code": 0, "uri": "https://docs.python.org/3/library/exceptions.html#ValueError", "info": ""} +{"filename": "fbgemm_gpu-python-api/table_batched_embedding_ops.rst", "lineno": 1, "status": "working", "code": 0, "uri": "https://docs.python.org/3/library/functions.html#bool", "info": ""} +{"filename": "general/documentation/Python.rst", "lineno": 1, "status": "working", "code": 0, "uri": "https://docs.python.org/3/library/ctypes.html#ctypes.c_ulong", "info": ""} {"filename": "fbgemm_gpu-python-api/jagged_tensor_ops.rst", "lineno": 1, "status": "working", "code": 0, "uri": "https://docs.python.org/3/library/functions.html#int", "info": ""} {"filename": "fbgemm_gpu-python-api/jagged_tensor_ops.rst", "lineno": 1, "status": "working", "code": 0, "uri": "https://docs.python.org/3/library/functions.html#float", "info": ""} -{"filename": "fbgemm_gpu-development/InstallationInstructions.rst", "lineno": 110, "status": "working", "code": 0, "uri": "https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.5/page/How_to_Install_ROCm.html", "info": ""} {"filename": "fbgemm_gpu-python-api/table_batched_embedding_ops.rst", "lineno": 1, "status": "working", "code": 0, "uri": "https://docs.python.org/3/library/stdtypes.html#str", "info": ""} -{"filename": "fbgemm_gpu-python-api/table_batched_embedding_ops.rst", "lineno": 1, "status": "working", "code": 0, "uri": "https://docs.python.org/3/library/typing.html#typing.Dict", "info": ""} {"filename": "fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps.rst", "lineno": 172, "status": "working", "code": 0, "uri": "https://en.wikipedia.org/wiki/Hadamard_product_(matrices)", "info": ""} +{"filename": "fbgemm_gpu-python-api/table_batched_embedding_ops.rst", "lineno": 1, "status": "working", "code": 0, "uri": "https://docs.python.org/3/library/typing.html#typing.Dict", "info": ""} +{"filename": "fbgemm_gpu-development/InstallationInstructions.rst", "lineno": 110, "status": "working", "code": 0, "uri": "https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.5/page/How_to_Install_ROCm.html", "info": ""} {"filename": "fbgemm_gpu-python-api/pooled_embedding_modules.rst", "lineno": 1, "status": "working", "code": 0, "uri": "https://docs.python.org/3/library/typing.html#typing.List", "info": ""} {"filename": "fbgemm_gpu-python-api/table_batched_embedding_ops.rst", "lineno": 1, "status": "working", "code": 0, "uri": "https://docs.python.org/3/library/typing.html#typing.Tuple", "info": ""} -{"filename": "fbgemm_gpu-development/BuildInstructions.rst", "lineno": 139, "status": "working", "code": 0, "uri": "https://developer.nvidia.com/cudnn", "info": ""} -{"filename": "fbgemm_gpu-development/BuildInstructions.rst", "lineno": 114, "status": "redirected", "code": 301, "uri": "https://developer.nvidia.com/nvidia-management-library-nvml", "info": "https://developer.nvidia.com/management-library-nvml"} {"filename": "general/ContactUs.rst", "lineno": 11, "status": "working", "code": 0, "uri": "https://github.com/pytorch/FBGEMM/discussions", "info": ""} -{"filename": "general/Contributing.rst", "lineno": 8, "status": "working", "code": 0, "uri": "https://github.com/pytorch/FBGEMM/blob/main/CODE_OF_CONDUCT.md", "info": ""} {"filename": "general/ContactUs.rst", "lineno": 7, "status": "working", "code": 0, "uri": "https://github.com/pytorch/FBGEMM/issues", "info": ""} +{"filename": "general/Contributing.rst", "lineno": 8, "status": "working", "code": 0, "uri": "https://github.com/pytorch/FBGEMM/blob/main/CODE_OF_CONDUCT.md", "info": ""} {"filename": "fbgemm_gpu-development/BuildInstructions.rst", "lineno": 221, "status": "redirected", "code": 301, "uri": "https://github.com/ROCmSoftwarePlatform/MIOpen", "info": "https://github.com/ROCm/MIOpen"} -{"filename": "fbgemm_gpu-development/InstallationInstructions.rst", "lineno": 294, "status": "working", "code": 0, "uri": "https://github.com/pytorch/FBGEMM/issues/1618", "info": ""} {"filename": "fbgemm-development/BuildInstructions.rst", "lineno": 171, "status": "working", "code": 0, "uri": "https://github.com/pytorch/FBGEMM/issues/1094", "info": ""} -{"filename": "fbgemm-development/BuildInstructions.rst", "lineno": 48, "status": "working", "code": 0, "uri": "https://github.com/pytorch/cpuinfo", "info": ""} +{"filename": "fbgemm_gpu-development/InstallationInstructions.rst", "lineno": 294, "status": "working", "code": 0, "uri": "https://github.com/pytorch/FBGEMM/issues/1618", "info": ""} {"filename": "fbgemm-development/BuildInstructions.rst", "lineno": 171, "status": "working", "code": 0, "uri": "https://github.com/pytorch/FBGEMM/issues/1666", "info": ""} -{"filename": "fbgemm_gpu-development/InstallationInstructions.rst", "lineno": 40, "status": "working", "code": 0, "uri": "https://github.com/pytorch/test-infra/blob/main/.github/actions/setup-nvidia/action.yml", "info": ""} +{"filename": "fbgemm_gpu-development/InstallationInstructions.rst", "lineno": 188, "status": "working", "code": 0, "uri": "https://github.com/pytorch/pytorch/blob/main/RELEASE.md", "info": ""} {"filename": "fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps.rst", "lineno": 7, "status": "working", "code": 0, "uri": "https://github.com/pytorch/pytorch/issues/25032", "info": ""} -{"filename": "fbgemm_gpu-development/BuildInstructions.rst", "lineno": 96, "status": "working", "code": 0, "uri": "https://hub.docker.com/r/nvidia/cuda", "info": ""} {"filename": "fbgemm-development/BuildInstructions.rst", "lineno": 171, "status": "working", "code": 0, "uri": "https://github.com/pytorch/pytorch/issues/77939", "info": ""} {"filename": "general/documentation/Sphinx.rst", "lineno": 149, "status": "working", "code": 0, "uri": "https://graphviz.org/documentation/", "info": ""} -{"filename": "index.rst", "lineno": 7, "status": "redirected", "code": 302, "uri": "https://github.com/pytorch/pytorch/tree/master/aten/src/ATen/native/quantized/cpu", "info": "https://github.com/pytorch/pytorch/tree/main/aten/src/ATen/native/quantized/cpu"} -{"filename": "fbgemm_gpu-development/BuildInstructions.rst", "lineno": 174, "status": "working", "code": 0, "uri": "https://hub.docker.com/r/rocm/rocm-terminal", "info": ""} +{"filename": "fbgemm_gpu-development/BuildInstructions.rst", "lineno": 96, "status": "working", "code": 0, "uri": "https://hub.docker.com/r/nvidia/cuda", "info": ""} {"filename": "fbgemm_gpu-development/BuildInstructions.rst", "lineno": 183, "status": "working", "code": 0, "uri": "https://hub.docker.com/r/rocm/dev-ubuntu-20.04", "info": ""} +{"filename": "fbgemm_gpu-development/BuildInstructions.rst", "lineno": 174, "status": "working", "code": 0, "uri": "https://hub.docker.com/r/rocm/rocm-terminal", "info": ""} +{"filename": "fbgemm_gpu-development/InstallationInstructions.rst", "lineno": 40, "status": "working", "code": 0, "uri": "https://github.com/pytorch/test-infra/blob/main/.github/actions/setup-nvidia/action.yml", "info": ""} +{"filename": "index.rst", "lineno": 7, "status": "redirected", "code": 302, "uri": "https://github.com/pytorch/pytorch/tree/master/aten/src/ATen/native/quantized/cpu", "info": "https://github.com/pytorch/pytorch/tree/main/aten/src/ATen/native/quantized/cpu"} {"filename": "general/documentation/Python.rst", "lineno": 55, "status": "working", "code": 0, "uri": "https://peps.python.org/pep-0287/", "info": ""} {"filename": "fbgemm_gpu-cpp-api/experimental_ops.rst", "lineno": 6, "status": "working", "code": 0, "uri": "https://pytorch.org/cppdocs/api/classat_1_1_tensor.html#_CPPv4N2at6TensorE", "info": ""} {"filename": "fbgemm_gpu-cpp-api/quantize_ops.rst", "lineno": 11, "status": "working", "code": 0, "uri": "https://pytorch.org/cppdocs/api/classc10_1_1_error.html#_CPPv4N3c105ErrorE", "info": ""} -{"filename": "fbgemm_gpu-cpp-api/memory_utils.rst", "lineno": 4, "status": "working", "code": 0, "uri": "https://man7.org/linux/man-pages/man2/madvise.2.html", "info": ""} -{"filename": "fbgemm_gpu-development/InstallationInstructions.rst", "lineno": 188, "status": "working", "code": 0, "uri": "https://github.com/pytorch/pytorch/blob/main/RELEASE.md", "info": ""} -{"filename": "fbgemm_gpu-cpp-api/memory_utils.rst", "lineno": 4, "status": "working", "code": 0, "uri": "https://nvidia.github.io/cuda-python/module/cudart.html#cuda.cudart.cudaMemPrefetchAsync", "info": ""} {"filename": "fbgemm_gpu-python-api/pooled_embedding_modules.rst", "lineno": 1, "status": "working", "code": 0, "uri": "https://pytorch.org/docs/main/tensor_attributes.html#torch.device", "info": ""} -{"filename": "fbgemm_gpu-cpp-api/memory_utils.rst", "lineno": 4, "status": "working", "code": 0, "uri": "https://nvidia.github.io/cuda-python/module/cudart.html#cuda.cudart.cudaMemAdvise", "info": ""} -{"filename": "fbgemm_gpu-development/BuildInstructions.rst", "lineno": 352, "status": "working", "code": 0, "uri": "https://pytorch.org/get-started/locally/", "info": ""} +{"filename": "fbgemm_gpu-cpp-api/memory_utils.rst", "lineno": 4, "status": "working", "code": 0, "uri": "https://nvidia.github.io/cuda-python/module/cudart.html#cuda.cudart.cudaMemPrefetchAsync", "info": ""} {"filename": "fbgemm_gpu-python-api/pooled_embedding_modules.rst", "lineno": 1, "status": "working", "code": 0, "uri": "https://pytorch.org/docs/main/tensors.html#torch.Tensor", "info": ""} -{"filename": "fbgemm-development/BuildInstructions.rst", "lineno": 82, "status": "working", "code": 0, "uri": "https://visualstudio.microsoft.com/vs/older-downloads/", "info": ""} +{"filename": "fbgemm_gpu-development/BuildInstructions.rst", "lineno": 352, "status": "working", "code": 0, "uri": "https://pytorch.org/get-started/locally/", "info": ""} +{"filename": "fbgemm_gpu-cpp-api/memory_utils.rst", "lineno": 4, "status": "working", "code": 0, "uri": "https://man7.org/linux/man-pages/man2/madvise.2.html", "info": ""} +{"filename": "fbgemm_gpu-cpp-api/memory_utils.rst", "lineno": 4, "status": "working", "code": 0, "uri": "https://nvidia.github.io/cuda-python/module/cudart.html#cuda.cudart.cudaMemAdvise", "info": ""} {"filename": "fbgemm_gpu-development/BuildInstructions.rst", "lineno": 197, "status": "working", "code": 0, "uri": "https://rocm.docs.amd.com/en/latest/", "info": ""} {"filename": "general/documentation/Cpp.rst", "lineno": 6, "status": "working", "code": 0, "uri": "https://www.breathe-doc.org/", "info": ""} +{"filename": "fbgemm-development/BuildInstructions.rst", "lineno": 82, "status": "working", "code": 0, "uri": "https://visualstudio.microsoft.com/vs/older-downloads/", "info": ""} {"filename": "general/documentation/Overview.rst", "lineno": 142, "status": "working", "code": 0, "uri": "https://www.netlify.com/", "info": ""} -{"filename": "general/documentation/Cpp.rst", "lineno": 6, "status": "working", "code": 0, "uri": "https://www.doxygen.nl/", "info": ""} -{"filename": "general/documentation/Python.rst", "lineno": 6, "status": "working", "code": 0, "uri": "https://www.sphinx-doc.org/en/master/", "info": ""} -{"filename": "general/Contributing.rst", "lineno": 34, "status": "redirected", "code": 301, "uri": "https://www.facebook.com/whitehat/", "info": "https://bugbounty.meta.com/?utm_source=facebook.com&utm_medium=redirect"} {"filename": "general/documentation/Cpp.rst", "lineno": 6, "status": "working", "code": 0, "uri": "https://www.oracle.com/java/technologies/javase/javadoc-tool.html", "info": ""} -{"filename": "general/documentation/Sphinx.rst", "lineno": 149, "status": "working", "code": 0, "uri": "https://www.sphinx-doc.org/en/master/usage/extensions/graphviz.html", "info": ""} +{"filename": "general/documentation/Python.rst", "lineno": 6, "status": "working", "code": 0, "uri": "https://www.sphinx-doc.org/en/master/", "info": ""} +{"filename": "general/documentation/Cpp.rst", "lineno": 6, "status": "working", "code": 0, "uri": "https://www.doxygen.nl/", "info": ""} {"filename": "general/documentation/Python.rst", "lineno": 6, "status": "working", "code": 0, "uri": "https://www.sphinx-doc.org/en/master/usage/extensions/example_google.html", "info": ""} -{"filename": "general/documentation/Cpp.rst", "lineno": 75, "status": "working", "code": 0, "uri": "https://www.doxygen.nl/manual/commands.html#cmdlink", "info": ""} +{"filename": "general/documentation/Sphinx.rst", "lineno": 149, "status": "working", "code": 0, "uri": "https://www.sphinx-doc.org/en/master/usage/extensions/graphviz.html", "info": ""} {"filename": "general/documentation/Sphinx.rst", "lineno": 115, "status": "working", "code": 0, "uri": "https://www.sphinx-doc.org/en/master/usage/extensions/math.html#module-sphinx.ext.mathjax", "info": ""} {"filename": "general/documentation/Python.rst", "lineno": 55, "status": "working", "code": 0, "uri": "https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html", "info": ""} +{"filename": "general/documentation/Cpp.rst", "lineno": 75, "status": "working", "code": 0, "uri": "https://www.doxygen.nl/manual/commands.html#cmdlink", "info": ""} +{"filename": "general/Contributing.rst", "lineno": 34, "status": "redirected", "code": 301, "uri": "https://www.facebook.com/whitehat/", "info": "https://bugbounty.meta.com/?utm_source=facebook.com&utm_medium=redirect"} {"filename": "general/documentation/Sphinx.rst", "lineno": 82, "status": "working", "code": 0, "uri": "https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-literalinclude", "info": ""} {"filename": "general/documentation/Sphinx.rst", "lineno": 115, "status": "working", "code": 0, "uri": "https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#math", "info": ""} {"filename": "fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps.rst", "lineno": 7, "status": "working", "code": 0, "uri": "https://www.tensorflow.org/guide/ragged_tensor", "info": ""} diff --git a/searchindex.js b/searchindex.js index bf0c1f4a85..34b1e7f48b 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["fbgemm-cpp-api/QuantUtils", "fbgemm-cpp-api/tbe_cpu_autovec", "fbgemm-development/BuildInstructions", "fbgemm_gpu-cpp-api/embedding_ops", "fbgemm_gpu-cpp-api/experimental_ops", "fbgemm_gpu-cpp-api/input_combine", "fbgemm_gpu-cpp-api/jagged_tensor_ops", "fbgemm_gpu-cpp-api/layout_transform_ops", "fbgemm_gpu-cpp-api/memory_utils", "fbgemm_gpu-cpp-api/merge_pooled_embeddings", "fbgemm_gpu-cpp-api/quantize_ops", "fbgemm_gpu-cpp-api/sparse_ops", "fbgemm_gpu-cpp-api/split_table_batched_embeddings", "fbgemm_gpu-cpp-api/ssd_embedding_ops", "fbgemm_gpu-development/BuildInstructions", "fbgemm_gpu-development/InstallationInstructions", "fbgemm_gpu-development/TestInstructions", "fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps", "fbgemm_gpu-python-api/jagged_tensor_ops", "fbgemm_gpu-python-api/pooled_embedding_modules", "fbgemm_gpu-python-api/pooled_embedding_ops", "fbgemm_gpu-python-api/quantize_ops", "fbgemm_gpu-python-api/sparse_ops", "fbgemm_gpu-python-api/table_batched_embedding_ops", "general/ContactUs", "general/Contributing", "general/License", "general/documentation/Cpp", "general/documentation/Overview", "general/documentation/Python", "general/documentation/Sphinx", "index"], "filenames": ["fbgemm-cpp-api/QuantUtils.rst", "fbgemm-cpp-api/tbe_cpu_autovec.rst", "fbgemm-development/BuildInstructions.rst", "fbgemm_gpu-cpp-api/embedding_ops.rst", "fbgemm_gpu-cpp-api/experimental_ops.rst", "fbgemm_gpu-cpp-api/input_combine.rst", "fbgemm_gpu-cpp-api/jagged_tensor_ops.rst", "fbgemm_gpu-cpp-api/layout_transform_ops.rst", "fbgemm_gpu-cpp-api/memory_utils.rst", "fbgemm_gpu-cpp-api/merge_pooled_embeddings.rst", "fbgemm_gpu-cpp-api/quantize_ops.rst", "fbgemm_gpu-cpp-api/sparse_ops.rst", "fbgemm_gpu-cpp-api/split_table_batched_embeddings.rst", "fbgemm_gpu-cpp-api/ssd_embedding_ops.rst", "fbgemm_gpu-development/BuildInstructions.rst", "fbgemm_gpu-development/InstallationInstructions.rst", "fbgemm_gpu-development/TestInstructions.rst", "fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps.rst", "fbgemm_gpu-python-api/jagged_tensor_ops.rst", "fbgemm_gpu-python-api/pooled_embedding_modules.rst", "fbgemm_gpu-python-api/pooled_embedding_ops.rst", "fbgemm_gpu-python-api/quantize_ops.rst", "fbgemm_gpu-python-api/sparse_ops.rst", "fbgemm_gpu-python-api/table_batched_embedding_ops.rst", "general/ContactUs.rst", "general/Contributing.rst", "general/License.rst", "general/documentation/Cpp.rst", "general/documentation/Overview.rst", "general/documentation/Python.rst", "general/documentation/Sphinx.rst", "index.rst"], "titles": ["Quantization Utilities", "TBE CPU Autovectorization", "Build Instructions", "Embedding Operators", "Experimental Operators", "Combine Input Operators", "Jagged Tensor Operators", "Layout Transformation Operators", "CUDA Memory Operators", "Pooled Embeddings Operators", "Quantization Operators", "Sparse Data Operators", "Table Batched Embedding Operators", "SSD Embedding Operators", "Build Instructions", "Installation Instructions", "Test Instructions", "Jagged Tensor Operators", "Jagged Tensor Operators", "Pooled Embedding Modules", "Pooled Embedding Operators", "Quantization Operators", "Sparse Operators", "Table Batched Embedding (TBE) Training Module", "Contact Us", "Contributing", "License", "Adding Documentation to C++ Code", "Documentation", "Adding Documentation to Python Code", "Sphinx Documentation Pointers", "FBGEMM and FBGEMM_GPU Documentation Homepage"], "terms": {"templat": [0, 1, 14, 27], "typenam": [0, 1, 27], "t": [0, 2, 4, 8, 11, 14, 20, 22, 23, 25, 27, 28], "layout_t": 0, "layout": [0, 31], "kcx": 0, "void": [0, 3, 8, 10, 12, 13], "quantizegroupwis": 0, "const": [0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 29], "float": [0, 1, 10, 18, 21, 22, 23, 27, 29], "src": 0, "int": [0, 1, 10, 18, 19, 20, 21, 22, 23, 27, 29], "k": [0, 4], "c": [0, 12, 15, 17, 26, 28, 29, 30], "x": [0, 6, 13, 17, 27, 29], "g": [0, 2, 11, 13, 14, 27, 29], "scale": [0, 1, 4, 10], "std": [0, 1, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 27, 29], "int32_t": [0, 1, 27, 29], "zero_point": 0, "dst": 0, "point": [0, 10, 18, 27, 29], "data": [0, 1, 4, 8, 13, 17, 21, 22, 23, 26, 31], "type": [0, 1, 2, 4, 10, 15, 17, 18, 21, 22, 23, 27], "paramet": [0, 1, 4, 8, 10, 11, 13, 18, 19, 20, 21, 22, 23, 27, 28, 29], "output": [0, 1, 4, 6, 10, 11, 13, 18, 19, 20, 21, 22, 23, 27, 29], "int8_t": 0, "uint8_t": [0, 1, 10, 12], "ar": [0, 2, 6, 12, 13, 14, 15, 17, 18, 19, 20, 22, 23, 26, 27, 28, 29], "support": [0, 2, 4, 13, 14, 15, 17, 23, 29, 31], "input": [0, 1, 4, 6, 8, 10, 11, 13, 17, 18, 19, 20, 21, 22, 23, 27, 31], "tensor": [0, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 19, 20, 21, 22, 23, 28, 29, 31], "kxc": 0, "correspond": [0, 11, 12, 13, 17, 22, 23, 27, 29], "kcr": 0, "kctr": 0, "weight": [0, 1, 3, 11, 12, 13, 22, 23], "time": [0, 2, 14, 15, 17], "dimens": [0, 4, 6, 8, 11, 17, 18, 19, 20, 21, 22, 23, 29], "krsc": 0, "ktrsc": 0, "channel": [0, 14, 15, 24], "number": [0, 1, 2, 4, 10, 11, 13, 14, 17, 18, 19, 20, 22, 23, 28], "r": [0, 16, 23, 28], "": [0, 2, 8, 14, 16, 17, 25, 27, 28, 29], "group": [0, 4, 17, 27], "function": [0, 2, 13, 14, 23, 27, 29], "perform": [0, 2, 10, 11, 13, 17, 19, 23, 31], "channelwis": 0, "1": [0, 1, 2, 4, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 28, 29, 30], "groupwis": 0, "per": [0, 17, 23], "size": [0, 2, 4, 8, 10, 11, 17, 18, 19, 20, 22, 23], "should": [0, 10, 11, 12, 14, 15, 17, 25, 27, 28, 29], "equal": [0, 17, 22, 23, 29], "zero": [0, 18, 23, 29], "reprsent": 0, "fusedquantizedequant": 0, "int64_t": [0, 1, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13], "len": [0, 17, 20, 23], "tensorquantizationparam": 0, "qparam": 0, "thread_id": 0, "0": [0, 2, 4, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 29], "num_thread": 0, "noise_ratio": 0, "0f": 0, "fuse": [0, 10, 23], "integ": [0, 8, 10, 17, 22, 23], "dequant": [0, 10], "kernel": [0, 2, 8, 10, 13, 16, 31], "acceler": 0, "awar": 0, "train": [0, 13, 31], "fp32": [0, 1, 10, 21, 23], "valu": [0, 6, 8, 10, 11, 12, 13, 18, 22, 23, 27, 28, 29], "u": [0, 14, 30, 31], "int8": [0, 21], "us": [0, 1, 2, 4, 8, 11, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], "provid": [0, 2, 13, 14, 15, 16, 22, 26, 27, 28, 29, 31], "back": [0, 8, 12, 14, 15], "inputtyp": 0, "floatorhalftofusednbitrowwisequantizedsbhalf": [0, 21], "bit_rat": [0, 10, 21], "size_t": [0, 10, 13, 27], "input_row": 0, "input_column": 0, "convert": [0, 8, 10, 13, 17, 18, 21, 29], "fp16": [0, 10, 21, 23], "rowwis": [0, 10, 21, 23], "bitrat": 0, "specifi": [0, 2, 4, 10, 11, 13, 14, 18, 23], "bit": [0, 1, 10, 21], "bia": [0, 1, 4, 10], "each": [0, 1, 4, 10, 11, 13, 14, 17, 18, 19, 20, 22, 23, 29], "row": [0, 1, 6, 10, 12, 13, 17, 18, 19, 20, 23, 29], "store": [0, 10, 11, 12, 13], "itself": [0, 17, 28], "end": [0, 1, 15, 17, 30], "can": [0, 1, 2, 10, 11, 13, 14, 15, 17, 22, 23, 27, 28, 29, 30], "4": [0, 10, 14, 15, 17, 18, 19, 20, 21, 22, 23, 29], "8": [0, 10, 14, 17, 19, 20, 21, 22, 23], "uint32_t": 0, "xor128": 0, "random": [0, 21], "gener": [0, 2, 11, 13, 14, 15, 19, 20, 22, 23, 27, 30], "9": [0, 13, 14, 17, 19, 20, 22, 23], "base": [0, 2, 11, 12, 13, 14, 17, 23], "thi": [0, 2, 6, 8, 9, 10, 11, 13, 14, 15, 17, 19, 22, 23, 24, 25, 26, 27, 29, 30, 31], "paper": 0, "findminmax": 0, "m": [0, 14, 15, 16], "min": 0, "max": [0, 4, 23], "find": [0, 12, 14], "matrix": [0, 2, 18, 31], "bool": [0, 1, 4, 8, 9, 10, 12, 13, 23], "a_symmetr": 0, "b_symmetr": 0, "quantizationgranular": 0, "q_gran": 0, "has_bia": 0, "fuse_relu": 0, "bias_typ": 0, "direct": [0, 12, 15, 26, 27, 29, 30], "fals": [0, 1, 8, 13, 23, 28], "requantizeoutputprocessingavx2": 0, "out": [0, 1, 14, 24, 26, 28], "inp": 0, "block_type_t": 0, "block": [0, 1, 22, 27, 29, 30], "ld_out": 0, "ld_in": 0, "requantizationparams_t": 0, "requant": 0, "avx2": [0, 2], "i": [0, 1, 2, 4, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29, 30, 31], "c_per_g": 0, "requantizeoutputprocessinggconvavx512": 0, "avx512": 0, "intyp": 1, "indextyp": 1, "offsettyp": 1, "outtyp": 1, "embeddingspmdm_autovec": 1, "block_siz": 1, "output_s": [1, 11, 22], "index_s": 1, "data_s": 1, "indic": [1, 3, 12, 13, 17, 20, 22, 23], "offsets_or_length": 1, "normalize_by_length": 1, "is_weight_posit": 1, "use_offset": 1, "true": [1, 8, 13, 23], "output_strid": 1, "input_strid": 1, "scale_bias_last": [1, 10], "no_bag": 1, "is_bf16_out": 1, "is_bf16_in": 1, "version": [1, 2, 15], "embeddingspmdm_ref": 1, "index": [1, 11, 12, 13, 14, 15, 17, 22, 23, 27, 29], "offset": [1, 3, 6, 11, 12, 18, 20, 22, 23], "element": [1, 10, 12, 13, 17, 22, 23], "address": [1, 2, 13, 14], "sum": [1, 4, 11, 13, 18, 19, 20, 22, 23], "option": [1, 2, 3, 6, 8, 12, 14, 18, 19, 22, 23], "null": 1, "non": [1, 4, 8, 23], "whether": [1, 4, 8, 13, 14, 26], "normal": [1, 17], "length": [1, 4, 6, 11, 13, 18, 19, 22, 23, 29], "If": [1, 2, 13, 14, 15, 22, 23, 25, 27, 28, 29], "posit": [1, 4, 11, 13, 19, 20, 22], "set": [1, 8, 12, 13, 16, 17, 18, 23], "instead": [1, 14, 28], "same": [1, 2, 4, 8, 11, 14, 17, 18, 19, 20, 22, 23, 27, 28, 29], "appear": [1, 15], "embed": [1, 2, 14, 15, 22, 28, 31], "bag": [1, 11, 22, 23, 31], "bfloat16": [1, 10], "embeddingspmdmfp8_autovec": 1, "exponent_bit": 1, "exponent_bia": [1, 10], "expon": 1, "note": [2, 12, 14, 15, 22, 23, 27, 28, 29, 30], "The": [2, 4, 8, 10, 11, 13, 15, 16, 17, 18, 19, 20, 22, 23, 25, 27, 28, 29, 30], "most": [2, 14, 15, 17, 28], "date": [2, 14, 15, 28], "script": [2, 14, 15, 28], "bundl": [2, 14, 15, 28], "repo": [2, 14, 15, 28, 29], "under": [2, 14, 15, 25, 26, 28, 29], "setup_env": [2, 14, 15, 28], "bash": [2, 14, 15, 28], "step": [2, 13, 14, 15, 17, 23, 28, 29], "fbgemm_gpu": [2, 8, 14, 17, 19, 23, 24, 25, 26, 27, 29], "follow": [2, 11, 14, 15, 17, 22, 23, 26, 27, 28, 29], "toolchain": [2, 14, 15], "run": [2, 14, 15, 19, 28], "cpu": [2, 8, 9, 16, 22, 23, 28, 31], "higher": 2, "In": [2, 11, 13, 14, 15, 17, 25, 27, 29], "doe": [2, 3, 15, 27, 28, 29], "have": [2, 10, 11, 12, 14, 17, 22, 23, 28], "ani": [2, 11, 14, 18, 22, 25, 26, 28, 29], "intel": 2, "mkl": 2, "howev": [2, 14, 17, 26], "comparison": 2, "some": [2, 14, 17, 28], "benchmark": 2, "found": [2, 14, 15, 28], "path": [2, 13, 14, 16, 27, 30], "through": [2, 25, 27, 29], "intel_mkl_dir": 2, "variabl": [2, 23], "built": [2, 14, 15, 28, 31], "report": [2, 15, 23], "otherwis": [2, 8, 13, 15, 23, 26], "subset": 2, "all": [2, 11, 12, 13, 14, 15, 17, 19, 20, 23, 26, 28], "three": [2, 17], "git": [2, 14], "submodul": [2, 14], "custom": [2, 30], "desir": [2, 14, 17, 18, 27], "thei": [2, 14, 28, 30], "asmjit_src_dir": 2, "cpuinfo_src_dir": 2, "googletest_source_dir": 2, "With": 2, "inner": [2, 17], "take": [2, 14], "one": [2, 4, 10, 11, 12, 14, 18, 23, 27, 29], "doesn": 2, "fit": [2, 26], "approach": 2, "so": [2, 11, 14, 15, 16, 17, 19, 20], "implement": [2, 4, 10, 13, 14, 17, 23], "dynam": 2, "effici": [2, 31], "shape": [2, 4, 17, 19, 20, 22, 23], "specif": [2, 11, 13, 14, 23, 26], "vector": [2, 5, 6, 7, 8, 9, 13, 18, 29], "code": [2, 13, 14, 26, 28], "third": 2, "parti": 2, "call": [2, 8, 13, 15], "detect": [2, 16], "runtim": [2, 14], "pytorch": [2, 13, 17, 24, 28, 29, 31], "project": [2, 25], "dispatch": [2, 8], "optim": [2, 10, 13, 23], "test": [2, 10, 14, 15, 25, 31], "you": [2, 25, 27, 29], "don": [2, 11, 14, 28], "want": [2, 25], "togeth": [2, 27, 28], "default": [2, 11, 14, 15, 23], "turn": [2, 28], "off": [2, 15, 24], "simpli": [2, 14], "fbgemm_build_test": 2, "conda": [2, 16, 28], "For": [2, 16, 17, 22, 24, 26, 27, 28, 29, 30], "platform": [2, 14, 26], "machin": [2, 14, 15, 16, 31], "microsoft": [2, 10], "visual": 2, "studio": 2, "2019": 2, "newer": [2, 14], "recommend": [2, 6, 10, 14, 15, 17, 22], "here": [2, 8, 14, 15, 25, 27, 28, 29, 30], "necessari": [2, 14, 23], "ninja": [2, 14], "etc": [2, 14, 23], "n": [2, 10, 14, 15, 30], "env_nam": [2, 14, 15], "y": [2, 6, 14, 15, 18, 28], "doxygen": [2, 27, 28], "make": [2, 12, 14, 25, 27, 28, 29], "openbla": 2, "packag": [2, 14, 16, 28], "onli": [2, 4, 10, 11, 12, 13, 16, 17, 23, 25, 27, 28, 30], "clone": [2, 14], "along": [2, 14, 15, 19, 20, 22], "its": [2, 8, 10, 11, 14, 22, 26, 28, 30], "insid": [2, 13, 14, 15, 16, 28, 30], "recurs": [2, 14], "http": [2, 14, 15, 25, 27, 28, 29], "github": [2, 14, 25], "com": [2, 14, 25], "cd": [2, 14, 16, 28], "assum": [2, 11, 23], "process": [2, 6, 13, 15, 17, 25, 29], "straightforward": 2, "creat": [2, 8, 14, 17, 25, 27, 29, 30], "directori": [2, 14, 16, 25, 27, 28], "mkdir": 2, "argument": [2, 11, 27, 28, 29], "build_arg": 2, "duse_sanit": 2, "dfbgemm_library_typ": 2, "share": [2, 8], "dpython_execut": 2, "which": [2, 11, 13, 14, 15, 17, 20, 22, 28], "python3": [2, 15], "document": [2, 8, 25, 26], "dfbgemm_build_doc": 2, "ON": [2, 26], "j": [2, 17], "verbos": 2, "As": [2, 11, 14, 15, 17], "write": [2, 13, 14, 15, 28, 29], "fail": [2, 15, 16, 27], "due": [2, 14], "known": [2, 14, 23], "regress": 2, "To": [2, 13, 14, 16, 30], "work": [2, 14, 15, 17, 25], "around": 2, "append": [2, 14, 27, 29], "export": [2, 14, 16], "prior": [2, 14, 15, 26], "cflag": 2, "wno": 2, "error": [2, 10, 15, 23, 27, 28, 29], "mayb": 2, "uniniti": 2, "restrict": 2, "cxxflag": 2, "pleas": [2, 25, 27, 29], "see": [2, 8, 14, 15, 17, 27, 29, 30], "77939": 2, "1094": 2, "1666": 2, "more": [2, 8, 14, 22, 23, 27, 29, 30], "detail": [2, 13, 15], "exactli": 2, "extra": 2, "need": [2, 13, 14, 15, 16, 17, 25, 27, 29, 30], "ad": [2, 14, 25, 28], "invoc": [2, 14, 28], "llvm": [2, 14], "standard": [2, 14], "libc": [2, 14], "openmp": [2, 14], "libomp": 2, "locat": [2, 8, 12, 13, 14, 17], "cc_path": 2, "cxx_path": 2, "dcmake_c_compil": 2, "dcmake_cxx_compil": 2, "dcmake_c_flag": [2, 14], "fopenmp": 2, "stdlib": [2, 14], "conda_prefix": [2, 14], "includ": [2, 9, 13, 14, 26, 27, 29], "dcmake_cxx_flag": [2, 14], "likewis": 2, "also": [2, 13, 14, 23, 30], "veri": [2, 14, 27, 28, 29], "target": [2, 8, 10, 11, 14, 17, 20, 27, 28, 29, 30], "architectur": [2, 14, 15], "bc": [2, 14], "x64": 2, "program": [2, 25], "file": [2, 14, 15, 24, 25, 27, 28, 29, 30], "x86": [2, 31], "enterpris": 2, "vc": 2, "auxiliari": 2, "vcvarsal": 2, "bat": 2, "build_dir": 2, "dfbgemm_build_benchmark": 2, "dcmake_build_typ": 2, "releas": [2, 15], "cl": 2, "ex": 2, "v": [2, 4, 6, 16, 18], "int_nbit_split_embedding_codegen_lookup_funct": 3, "dev_weight": [3, 12], "uvm_weight": [3, 12], "weights_plac": [3, 12], "weights_offset": [3, 12], "weights_ti": [3, 12], "d_offset": [3, 10, 12], "total_d": [3, 12, 23], "max_int2_d": 3, "max_int4_d": 3, "max_int8_d": 3, "max_float16_d": 3, "max_float32_d": 3, "pooling_mod": [3, 23], "indice_weight": 3, "output_dtyp": [3, 10, 23], "lxu_cache_weight": [3, 12, 13], "lxu_cache_loc": [3, 12, 13], "row_align": [3, 12], "max_float8_d": 3, "fp8_exponent_bit": 3, "fp8_exponent_bia": 3, "int_nbit_split_embedding_uvm_caching_codegen_lookup_funct": 3, "cache_hash_size_cumsum": [3, 12], "total_cache_hash_s": [3, 12], "cache_index_table_map": [3, 12], "lxu_cache_st": [3, 12], "lxu_stat": 3, "simlar": 3, "uvm_cach": 3, "lookup": [3, 12, 13, 23], "pruned_hashmap_lookup_cuda": 3, "hash_tabl": 3, "hash_table_offset": 3, "pruned_array_lookup_cuda": 3, "index_remap": 3, "index_remappings_offset": 3, "bounds_check_indices_cuda": 3, "rows_per_t": 3, "bounds_check_mod": [3, 23], "warn": [3, 23, 27], "b_ofset": 3, "max_b": [3, 12], "int_nbit_split_embedding_codegen_lookup_function_cpu": 3, "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu": 3, "pruned_hashmap_insert_unweighted_cpu": 3, "dense_indic": 3, "pruned_hashmap_lookup_unweighted_cpu": 3, "pruned_array_lookup_cpu": 3, "tupl": [4, 5, 6, 11, 12, 13, 22, 23], "gqa_attn_splitk": 4, "xq": 4, "cache_k": 4, "cache_v": 4, "seq_posit": 4, "doubl": [4, 6, 10, 11], "qk_scale": 4, "num_split_k": 4, "kv_cache_quant_num_group": 4, "use_tensor_cor": 4, "cache_logical_dtype_int": 4, "decod": 4, "queri": 4, "split": [4, 23], "w": [4, 16], "bf16": [4, 10], "int4": [4, 10, 21], "kv": 4, "cuda": [4, 9, 19, 20, 21, 22, 23, 31], "gqa": 4, "cach": [4, 12, 13, 14, 23], "It": [4, 13, 14, 15, 17, 19, 20, 22], "current": [4, 13, 14, 15, 17, 23], "context": 4, "16384": 4, "fix": [4, 11, 23], "head": 4, "128": 4, "an": [4, 8, 11, 13, 15, 16, 17, 19, 20, 21, 22, 23, 27, 28, 29, 30], "arbitrari": [4, 13], "b": [4, 11, 14, 17, 18, 22, 23, 27, 28, 29, 30], "h_q": 4, "d": [4, 17, 18, 30], "where": [4, 6, 8, 11, 13, 17, 18, 19, 20, 22, 23], "batch": [4, 6, 11, 17, 18, 19, 20, 22, 31], "num": [4, 22], "max_t": 4, "h_kv": 4, "sequenc": [4, 22, 23], "contain": [4, 8, 13, 14, 17, 18, 19, 20, 22, 23, 29], "actual": [4, 14, 22], "token": [4, 17], "appli": [4, 11, 14, 17, 23], "after": [4, 11, 13, 14, 15, 16, 17, 22, 28, 29, 30], "qk": 4, "control": [4, 23], "amount": [4, 22, 23], "parallel": [4, 13], "wise": [4, 17, 23], "fp8": [4, 10], "quantiz": [4, 31], "singl": [4, 8, 10, 13], "now": 4, "core": 4, "wmma": 4, "instruct": [4, 25, 27, 28, 29, 31], "fast": 4, "kv_cach": 4, "2": [4, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 27, 29, 30], "return": [4, 8, 10, 11, 13, 18, 19, 20, 21, 22, 23, 27, 28, 29], "A": [4, 8, 10, 13, 14, 15, 17, 18, 19, 20, 22, 23, 26, 27, 28, 29], "combin": [4, 31], "metadata": [4, 13, 23], "softmax": 4, "tbe_input_combine_cpu": 5, "indices_list": 5, "offsets_list": 5, "per_sample_weight": [5, 23], "include_last_offset": 5, "padding_fused_tbe_input_combine_cpu": 5, "batch_siz": [5, 19, 20], "solv": 6, "issu": [6, 8, 14, 15, 24], "when": [6, 11, 13, 14, 16, 17, 23, 27, 28, 30], "differ": [6, 11, 13, 17, 20, 22, 23], "often": 6, "occur": [6, 13, 27], "spars": [6, 17, 23, 31], "featur": [6, 11, 14, 17, 19, 20, 22, 23, 24], "system": [6, 14, 15, 17], "well": [6, 11, 14, 27], "natur": [6, 17], "languag": [6, 17, 30], "jagged_to_padded_dense_forward": 6, "c10": [6, 10], "symintarrayref": 6, "max_length": [6, 18], "padding_valu": [6, 18], "jagged_dense_elementwise_add_jagged_output_cuda": 6, "x_valu": [6, 18], "x_offset": [6, 18, 29], "dens": [6, 18, 22, 29], "jagged_to_padded_dens": [6, 18], "jagged_dense_elementwise_add": [6, 18], "jagged_dense_elementwise_mul": [6, 18], "batched_dense_vec_jagged_2d_mul": [6, 18], "a_valu": [6, 18], "a_offset": [6, 18], "dense_to_jag": [6, 18], "symint": 6, "total_l": [6, 18], "jagged_dense_elementwise_add_jagged_output": [6, 18], "jagged_1d_to_dens": [6, 18], "max_l": 6, "jagged_2d_to_dens": [6, 14, 15, 18, 28, 29], "max_sequence_length": [6, 18, 29], "recat_embedding_grad_output_cuda": 7, "grad_output": 7, "num_features_per_rank": 7, "recat_embedding_grad_output_mixed_d_cuda": 7, "dim_sum_per_rank": 7, "recat_embedding_grad_output_mixed_d_batch_cuda": 7, "cumsum_dim_sum_per_rank": 7, "recat_embedding_grad_output_mixed_d_cpu": 7, "new_managed_tensor": 8, "self": [8, 13], "alloc": [8, 23, 27], "unifi": [8, 23], "manag": [8, 14, 15, 23], "uvm": [8, 16, 23], "Then": 8, "prefer": [8, 13, 15], "storag": [8, 10, 12, 13], "host": [8, 14, 20, 22, 23], "establish": 8, "map": [8, 11, 12, 13, 17, 22, 23], "devic": [8, 9, 14, 19, 20, 21, 22, 23], "new": [8, 10, 12, 27, 28, 29], "new_managed_tensor_meta": 8, "placehold": 8, "meta": [8, 26], "kei": [8, 13, 23], "empti": [8, 17, 18, 30], "new_host_mapped_tensor": 8, "new_unified_tensor": 8, "is_host_map": 8, "either": [8, 10, 11, 13, 14, 15, 21], "depend": [8, 10, 14, 15, 17], "new_unified_tensor_meta": 8, "new_vanilla_managed_tensor": 8, "allow": [8, 14], "automat": [8, 11, 16, 22, 28], "uvm_storag": 8, "check": [8, 23], "gpu": [8, 13, 14, 15, 16, 22, 23, 29, 31], "is_uvm_tensor": 8, "BUT": [8, 26], "uvm_to_cpu": 8, "effect": [8, 17], "move": [8, 13, 19, 20, 22], "from": [8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 22, 23, 25, 26, 27, 28, 29, 30], "uvm_to_devic": 8, "prototyp": 8, "whose": 8, "uvm_cuda_mem_advis": 8, "cuda_memory_advis": 8, "cudamemadvis": 8, "cudamemoryadvis": 8, "enum": [8, 10, 13], "avail": [8, 14, 15, 16, 23, 28], "python": [8, 13, 14, 16, 27, 28, 30], "side": [8, 13, 14, 27, 29, 31], "namespac": 8, "over": [8, 14, 22], "valid": 8, "inform": [8, 15, 17, 23, 29, 30], "uvm_cuda_mem_prefetch_async": 8, "device_t": 8, "cudamemprefetchasync": 8, "prefetch": [8, 13, 23], "destin": 8, "uvm_mem_advice_dont_fork": 8, "madvis": 8, "madv_dontfork": 8, "workaround": 8, "driver": [8, 14], "un": 8, "page": [8, 25, 30, 31], "tabl": [8, 11, 17, 31], "fork": [8, 25], "caus": [8, 14, 15, 22, 26, 28], "slowdown": 8, "next": [8, 13, 17, 27, 29], "access": [8, 13, 23], "uvm_to_cpu_clon": 8, "copi": 8, "contigu": [8, 11, 22], "thread": [8, 13], "memcpi": 8, "section": [9, 14, 15, 29], "variou": 9, "all_to_one_devic": 9, "inputtensor": 9, "target_devic": [9, 20], "permute_pooled_embs_split_gpu": 9, "pooled_emb": [9, 19, 20], "offset_dim_list": [9, 20], "permute_list": [9, 20], "inv_offset_dim_list": [9, 20], "inv_permute_list": [9, 20], "permute_pooled_embs_auto_grad_split_gpu": 9, "permute_pooled_embs_auto_grad_gpu": 9, "permute_pooled_embs_cpu_impl": 9, "allow_dupl": 9, "permute_pooled_embs_split_cpu": 9, "permute_pooled_embs_auto_grad_split_cpu": 9, "permute_pooled_embs_auto_grad": [9, 20], "permute_pooled_embs_auto_grad_cpu": 9, "model": [10, 11], "techniqu": 10, "reduc": [10, 13], "larg": [10, 14], "order": [10, 17, 23, 25], "achiev": [10, 15], "better": [10, 13, 27], "small": 10, "loss": [10, 26], "accuraci": 10, "_float_to_bfloat16_gpu": 10, "brain": 10, "_bfloat16_to_float_gpu": 10, "_float_to_fp8rowwise_gpu": 10, "forward": [10, 23], "dtype": [10, 19, 20, 21, 22, 23], "sparsetyp": [10, 23], "throw": [10, 23, 27], "_fp8rowwise_to_float_gpu": 10, "represent": [10, 17], "_float_to_fused8bitrowwise_gpu": 10, "_half_to_fused8bitrowwise_gpu": 10, "half": [10, 21], "_single_or_half_precision_to_fused8bitrowwise_gpu": 10, "_fused8bitrowwise_to_float_gpu": 10, "_fused8bitrowwise_to_half_gpu": 10, "_fused8bitrowwise_to_single_or_half_precision_gpu": 10, "quant_padding_float_typ": 10, "_fused8bitrowwise_to_float_mixed_dim_gpu": 10, "kfloat": 10, "khalf": 10, "_float_to_fusednbitrowwise_gpu": 10, "_half_to_fusednbitrowwise_gpu": 10, "_single_or_half_precision_to_fusednbitrowwise_gpu": 10, "_fusednbitrowwise_to_float_gpu": 10, "_fusednbitrowwise_to_half_gpu": 10, "_fusednbitrowwise_to_single_or_half_precision_gpu": 10, "_float_to_hfp8_gpu": 10, "ebit": 10, "max_po": 10, "hybrid": 10, "hfp8": 10, "_hfp8_to_float_gpu": 10, "_float_to_msfp_gpu": 10, "bounding_box_s": 10, "mbit": 10, "min_po": 10, "msfp": 10, "_msfp_to_float_gpu": 10, "_float_to_paddedfp8rowwise_gpu": 10, "row_dim": 10, "pad": [10, 13, 17, 18, 29], "_paddedfp8rowwise_to_float_gpu": 10, "output_last_dim": 10, "_fused8bitrowwise_to_float_cpu_out": 10, "_float_to_fused8bitrowwise_cpu_out": 10, "float_to_fused8bitrowwise_cpu": 10, "half_to_fused8bitrowwise_cpu": 10, "float_or_half_to_fused8bitrowwise_cpu": 10, "fused8bitrowwise_to_float_cpu": 10, "fused8bitrowwise_to_half_cpu": 10, "fused8bitrowwise_to_float_or_half_cpu": 10, "float_to_fp8rowwise_cpu": 10, "fp8rowwise_to_float_cpu": 10, "fusednbitrowwise_to_float_cpu": 10, "fusednbitrowwise_sbfront_to_float_cpu": 10, "int2": [10, 21], "front": 10, "float32": [10, 19, 20, 21], "torch": [10, 13, 14, 15, 18, 19, 20, 21, 22, 23, 28, 29], "quint4x2": 10, "quint2x4": 10, "quantizedcpu": 10, "backend": [10, 31], "purpos": [10, 17, 18, 23, 26], "becaus": [10, 14, 17, 23], "refer": [10, 14, 17, 22, 28, 29], "rate": [10, 21, 23], "hold": [10, 13, 17], "fusednbitrowwise_to_half_cpu": 10, "fusednbitrowwise_to_float_or_half_cpu": 10, "floattofp8quantized_ref": 10, "nrow": 10, "ncol": 10, "fp8quantizedtofloat_ref": 10, "expand_into_jagged_permute_cuda": 11, "permut": [11, 19, 20, 22], "input_offset": [11, 22], "output_offset": [11, 22], "expand_into_jagged_permut": [11, 22], "expand": [11, 22], "case": [11, 14, 15, 17, 22, 25], "ha": [11, 13, 15, 17, 20, 22, 23, 25, 27, 28], "across": [11, 14, 19, 20, 22], "rank": [11, 17, 22, 23], "level": [11, 22], "exclus": [11, 13, 22], "op": [11, 15, 18, 20, 21, 22, 29], "sit": [11, 22], "we": [11, 13, 14, 17, 25], "deriv": [11, 17, 22, 26], "arrai": [11, 18, 22, 29], "comput": [11, 14, 15, 20, 22, 23], "formula": [11, 22], "output_permut": [11, 22], "table_offset": 11, "bag_offset": [11, 22], "histogram_binning_calibration_cpu": 11, "logit": 11, "bin_num_exampl": 11, "bin_num_posit": 11, "positive_weight": 11, "lower_bound": 11, "upper_bound": 11, "bin_ctr_in_use_aft": 11, "bin_ctr_weight_valu": 11, "divid": [11, 17], "predict": 11, "rang": [11, 13, 17, 22], "e": [11, 13, 14, 17, 19, 20, 27, 29, 30], "bin": [11, 14], "two": [11, 17, 18, 22, 23, 28], "exampl": [11, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 27, 28, 29, 30], "fall": [11, 14, 15], "bucket": [11, 14], "basic": [11, 13, 29], "histogram": 11, "result": [11, 13, 14, 18], "statist": [11, 23], "real": 11, "ctr": 11, "num_po": 11, "num_exampl": 11, "final": 11, "calibr": 11, "pre": [11, 15], "cali": 11, "wai": [11, 22, 26], "within": [11, 23], "suffici": [11, 25, 28], "That": 11, "fine": 11, "grain": 11, "modul": [11, 14, 15, 29], "theoret": 11, "layer": [11, 13], "uncalibr": 11, "befor": [11, 13, 14, 30], "sigmoid": 11, "calibart": 11, "pass": [11, 23, 25, 28], "lower": 11, "bound": [11, 17, 23], "calibration_target": 11, "observ": 11, "statisct": 11, "final_calibrated_predict": 11, "bin_ctr_weight": 11, "bin_ctr": 11, "calibrated_predict": 11, "bin_id": 11, "generic_histogram_binning_calibration_by_feature_cpu": 11, "segment_valu": 11, "segment_length": 11, "num_seg": 11, "bin_boundari": 11, "extens": [11, 27, 28], "ectr": 11, "abov": [11, 13, 15, 17, 26, 27, 29, 30], "accept": [11, 25], "sort": [11, 12, 13, 14], "keyjaggedtensor": 11, "num_bin": 11, "longer": [11, 24, 27], "still": [11, 14], "parambin_ctr_weight_valu": 11, "get_unique_indices_cuda": 12, "linear_indic": 12, "max_indic": 12, "compute_count": 12, "dedupl": 12, "get_unique_indices_with_inverse_cuda": 12, "compute_inverse_indic": 12, "lru_cache_find_uncached_cuda": 12, "unique_indic": 12, "unique_indices_length": [12, 13], "time_stamp": 12, "lru_stat": 12, "gather_cache_stat": 12, "uvm_cache_stat": 12, "lock_cache_lin": 12, "lxu_cache_locking_count": 12, "lru": [12, 13, 23], "uncach": [12, 13], "them": 12, "host_lxu_cache_slot": 12, "h_in": 12, "cache_set": [12, 23], "linearize_cache_indices_cuda": 12, "b_offset": 12, "indices_base_offset": 12, "linear": [12, 13], "uniqu": [12, 13, 23, 30], "linearize_cache_indices_from_row_idx_cuda": 12, "update_table_indic": 12, "update_row_indic": 12, "format": [12, 19, 20, 28, 29], "inplac": 12, "updat": [12, 13, 14, 15, 16, 23, 25], "lru_cache_populate_cuda": 12, "hash_size_cumsum": 12, "linear_cache_indic": 12, "stochastic_round": [12, 23], "fetch": [12, 13], "insert": [12, 13, 30], "timestep": 12, "lru_cache_populate_byte_cuda": 12, "byte": [12, 13, 21], "direct_mapped_lru_cache_populate_byte_cuda": 12, "lxu_cache_miss_timestamp": 12, "assoc": 12, "variant": [12, 14, 15, 28], "lfu_cache_populate_cuda": 12, "lfu_stat": 12, "lfu": [12, 23], "lfu_cache_populate_byte_cuda": 12, "lxu_cache_lookup_cuda": 12, "invalid_index": 12, "num_uniq_cache_indic": 12, "lxu_cache_locations_output": 12, "look": [12, 23], "up": [12, 13, 16, 23], "slot": [12, 13], "sentinel": [12, 13], "miss": [12, 13, 14], "direct_mapped_lxu_cache_lookup_cuda": 12, "lxu_cache_flush_cuda": 12, "flush": [12, 13], "reset_weight_momentum_cuda": 12, "momentum1_dev": 12, "momentum1_uvm": 12, "momentum1_plac": 12, "momentum1_offset": 12, "pruned_indic": 12, "pruned_indices_offset": 12, "logical_table_id": 12, "buffer_id": 12, "lxu_cache_locking_counter_decrement_cuda": 12, "decrement": 12, "counter": [12, 23], "lxu_cache_locations_update_cuda": 12, "lxu_cache_locations_new": 12, "rocksdbwritemod": 13, "rocksdb": 13, "mode": [13, 16, 23], "offload": 13, "3": [13, 14, 15, 17, 18, 19, 20, 22, 23, 26, 29], "iter": 13, "fwd_rocksdb_read": 13, "l2": [13, 23], "fwd": 13, "fwd_l1_evict": 13, "l1": 13, "eviciton": 13, "evict": 13, "bwd_l1_cnflct_miss_write_back": 13, "conflict": 13, "bwd": 13, "fill": 13, "potenti": 13, "trigger": 13, "onc": [13, 15, 25], "full": [13, 14, 15, 30], "addition": 13, "do": [13, 14, 15, 23, 25], "io": 13, "enumer": [13, 20], "inlin": [13, 30], "hash_shard": 13, "id": [13, 15], "num_shard": 13, "hash": 13, "shard": 13, "algorithm": [13, 23], "cuda_callback_func": 13, "cudastream_t": 13, "stream": [13, 14, 23], "cudaerror_t": 13, "statu": 13, "functor": 13, "callback": 13, "cudastreamaddcallback": 13, "common": [13, 14, 15, 17, 29], "cudastreamcallback_t": 13, "cast": 13, "invok": [13, 14, 19, 22, 23, 28], "delet": 13, "anoth": [13, 30], "none": [13, 19, 22, 23], "masked_index_put_cuda": 13, "count": 13, "use_pipelin": 13, "preferred_sm": 13, "similar": [13, 14, 17, 23], "index_put": 13, "ignor": [13, 16, 23, 28], "2d": [13, 17, 18, 19, 20, 22, 23, 29], "put": [13, 28], "equival": [13, 17], "filter_": 13, "indices_": 13, "nonzero": 13, "flatten": 13, "1d": [13, 18, 22, 23, 29], "flag": [13, 14, 28], "overlap": 13, "other": [13, 15, 17, 22, 26, 27, 28, 29], "fraction": 13, "sm": 13, "resourc": 13, "competit": 13, "masked_index_select_cuda": 13, "index_select": 13, "ssd_generate_row_addrs_cuda": 13, "assigned_cache_slot": 13, "linear_index_inverse_indic": 13, "unique_indices_count_cumsum": 13, "cache_set_inverse_indic": 13, "inserted_ssd_weight": 13, "cache_set_sorted_unique_indic": 13, "memori": [13, 15, 23, 31], "tbe": [13, 31], "retriev": 13, "scratch": [13, 15], "hbm": [13, 23], "lxu": 13, "associ": 13, "enabl": [13, 14, 16, 23], "conveni": 13, "first": [13, 14, 22, 27, 29, 30], "pointer": [13, 28], "moreov": 13, "list": [13, 14, 17, 18, 19, 20, 23, 26, 27, 29], "post": 13, "backward": [13, 23], "origin": 13, "being": [13, 14, 28], "prefix": [13, 14, 30], "ssd_update_row_addrs_cuda": 13, "ssd_row_addrs_curr": 13, "inserted_ssd_weights_curr_next_map": 13, "lxu_cache_locations_curr": 13, "linear_index_inverse_indices_curr": 13, "unique_indices_count_cumsum_curr": 13, "cache_set_inverse_indices_curr": 13, "inserted_ssd_weights_next": 13, "unique_indices_length_curr": 13, "pipelin": [13, 23], "dure": [13, 14, 17, 23, 29], "reloc": 13, "correct": [13, 14], "between": [13, 17, 27, 28, 30], "been": [13, 14, 27], "compact_indices_cuda": 13, "compact_indic": 13, "compact_count": 13, "mask": 13, "compact": 13, "given": [13, 14, 17, 18], "operat": 13, "remov": 13, "7": [13, 14, 15, 17, 18, 19, 20, 22, 23], "5": [13, 14, 17, 19, 20, 22, 23], "repres": [13, 17, 19, 20, 22, 23], "keep": [13, 14], "class": [13, 19, 23, 28, 29], "cachelibcach": 13, "cachelib_cach": 13, "h": [13, 14, 18, 27], "cachelib": 13, "wrapper": 13, "cachlib": 13, "interact": 13, "maintain": 13, "relat": [13, 17], "initi": 13, "state": [13, 14, 23], "logic": [13, 17, 27], "caller": 13, "reset": 13, "captur": 13, "delai": 13, "markus": 13, "boost": 13, "get": 13, "handl": [13, 17], "read": [13, 17], "done": [13, 14, 15], "embeddingparameterserv": 13, "public": [13, 25, 28], "embeddingkvdb": 13, "ps_table_batched_embed": 13, "servic": [13, 26], "tp": 13, "client": 13, "cachecontext": 13, "kv_db_table_batched_embed": 13, "l2cach": 13, "num_miss": 13, "cached_addr_list": 13, "prealloc": 13, "invalid": [13, 23], "spot": 13, "stai": 13, "struct": 13, "queueitem": 13, "queue": 13, "item": [13, 19, 20, 22, 29], "background": 13, "param": [13, 27, 29], "read_handl": 13, "abstract": 13, "pair": [13, 30], "later": [13, 14], "separ": [13, 23, 28], "get_cach": 13, "monitor": 13, "checkout": 13, "explan": 13, "enable_shared_from_thi": 13, "execut": [13, 15, 16], "dram": [13, 23], "remot": 13, "scalabl": 13, "without": [13, 14, 22, 26], "blow": 13, "subclass": 13, "embeddingrocksdb": 13, "ssd_table_batched_embed": 13, "fbgemm": [14, 15, 18, 20, 21, 22, 24, 25, 26, 28, 29], "experiment": [14, 15, 31], "reproduc": [14, 15, 25, 26], "platform_nam": 14, "unam": 14, "miniconda_prefix": 14, "home": 14, "download": [14, 15], "wget": 14, "q": 14, "anaconda": 14, "miniconda3": 14, "latest": 14, "sh": 14, "o": [14, 15], "p": [14, 20], "load": [14, 17, 29], "shortcut": 14, "bashrc": 14, "command": [14, 15, 27, 28], "against": [14, 16], "env": [14, 15], "name": [14, 15, 23, 26, 27, 29], "python_vers": 14, "12": [14, 17, 19, 20, 22, 23], "upgrad": 14, "pyopenssl": 14, "22": [14, 17, 19, 20], "requir": [14, 15, 16, 17, 23, 28, 29], "recent": [14, 15, 23], "nvcc": 14, "capabl": [14, 16], "bare": 14, "metal": 14, "neither": [14, 26], "nor": [14, 26], "nvidia": [14, 23], "present": [14, 29], "sinc": [14, 17], "pull": [14, 15, 28], "linux": [14, 15], "distribut": [14, 26], "ubuntu": 14, "04": 14, "11": [14, 15, 17, 19, 20, 21], "entrypoint": 14, "devel": 14, "ubuntu22": 14, "rest": [14, 15], "mai": [14, 15, 17, 22, 26], "construct": [14, 15, 17], "mechan": 14, "nvml": 14, "org": [14, 15, 29], "cuda_vers": 14, "label": 14, "verifi": [14, 15, 27, 29], "cuda_runtim": 14, "libnvidia": [14, 15], "ml": [14, 15], "libnccl": [14, 16], "printenv": 14, "extract": 14, "url": [14, 15], "builder": 14, "blob": 14, "main": [14, 25], "install_cuda": 14, "cudnn_url": 14, "redist": 14, "x86_64": 14, "26_cuda12": 14, "archiv": 14, "tar": 14, "xz": 14, "unpack": 14, "xvf": 14, "applic": [14, 15, 23, 27, 29], "alreadi": [14, 15, 25, 27, 29], "repositori": [14, 25], "cmake": 14, "configur": [14, 27], "amd": [14, 15], "minim": 14, "6": [14, 15, 17, 19, 20, 22], "termin": 14, "while": [14, 28], "come": [14, 15], "reason": [14, 15, 28], "oper": [14, 15, 16, 23], "guid": [14, 29], "disabl": 14, "apt": 14, "prompt": 14, "debian_frontend": 14, "noninteract": 14, "db": 14, "radeon": 14, "amdgpu": 14, "focal": 14, "install_5": 14, "50601": 14, "1_all": 14, "deb": 14, "usecas": 14, "hiplibsdk": 14, "dkm": 14, "hipifi": 14, "hip": 14, "dev": 14, "20": [14, 19, 20, 22], "sysroot": 14, "avoid": [14, 22], "glibcxx": 14, "fbgemm_cpu": 14, "10": [14, 15, 17, 19, 20, 23], "older": [14, 15], "accompani": [14, 28], "appropri": 14, "sysroot_linux": 14, "gcc_version": 14, "forg": [14, 28], "gxx_linux": 14, "64": [14, 17], "17": [14, 19, 20, 22], "binari": [14, 26], "cento": 14, "librari": [14, 28, 31], "libstdc": 14, "what": [14, 28], "libcxx_path": 14, "print": [14, 15, 19, 20, 21, 22, 23, 29], "objdump": 14, "tc": 14, "grep": 14, "glibc_": 14, "sed": 14, "vu": 14, "cat": 14, "glibcxx_": 14, "possibl": [14, 17, 25, 26], "just": 14, "minimum": [14, 27, 28, 29], "llvm_version": 14, "16": [14, 17, 19, 20, 21], "libcxx": 14, "outdat": 14, "aarch64": [14, 15], "cannot": 14, "explicitli": [14, 23], "clangxx": 14, "rt": 14, "lib": [14, 15, 16], "ld_library_path": [14, 15, 16], "config": [14, 23], "var": 14, "nvcc_prepend_flag": 14, "correctli": [14, 15, 16, 27, 28], "xcompil": 14, "ccbin": 14, "clangxx_path": 14, "unsupport": 14, "even": [14, 26], "though": [14, 15], "libstd": 14, "mean": [14, 17, 23], "regardless": 14, "scenario": 14, "binpath": 14, "overrid": 14, "exist": [14, 27, 29], "ln": 14, "sf": 14, "path_to_either_gcc_or_clang": 14, "cc": 14, "These": 14, "stage": [14, 17], "click": 14, "hypothesi": [14, 15], "jinja2": 14, "ncurs": 14, "numpi": [14, 15], "scikit": [14, 15], "offici": 14, "homepag": 14, "authorit": [14, 15, 28], "how": [14, 15, 16, 19, 20, 22, 29], "nightli": [14, 15], "rc": 14, "alwai": 14, "reliabl": 14, "arriv": 14, "hour": 14, "than": [14, 17], "window": 14, "silent": [14, 23], "both": [14, 23, 24, 26, 28], "place": [14, 23], "artifact": 14, "select": 14, "thu": [14, 22, 23], "import": [14, 15, 19, 20, 23, 29, 30], "much": [14, 27], "determinist": 14, "whl": [14, 15], "cu121": [14, 15], "rocm5": [14, 15], "ensur": [14, 15, 25], "properli": 14, "__version__": 14, "cuda_cmake_macro": 14, "gemm": 14, "via": [14, 23], "manual": [14, 15, 27], "sha": 14, "pin": 14, "ci": [14, 15], "ci_commit_pin": 14, "txt": [14, 16, 28, 30], "dedb7bdf33": 14, "tag": [14, 27, 30], "fbgemm_vers": 14, "v0": 14, "fbgemm_": 14, "addit": [14, 16, 17, 18], "flow": [14, 23], "becom": 14, "stale": 14, "problem": 14, "re": [14, 15], "attempt": 14, "failur": [14, 15], "clear": [14, 25], "py": [14, 15, 16, 28, 29], "clean": [14, 28], "must": [14, 15, 16, 17, 21, 22, 23, 26, 30], "package_nam": 14, "fbgemm_gpu_": 14, "convent": 14, "major": 14, "minor": 14, "py312": 14, "python_tag": 14, "determin": [14, 17, 23], "processor": 14, "arch": 14, "python_plat_nam": 14, "manylinux2014_": 14, "maco": 14, "macosx_10_9_": 14, "arm64": 14, "macosx_11_0_": 14, "win_": 14, "cpu_onli": 14, "bdist_wheel": 14, "package_vari": 14, "plat": 14, "cxxprefix": 14, "presum": 14, "made": [14, 28], "debug": [14, 16], "assert": 14, "presenc": 14, "unabl": 14, "cudacxx": 14, "cuda_bin_path": 14, "cub": 14, "cub_dir": 14, "header": [14, 27, 30], "cudnn_include_dir": 14, "cudnn_librari": 14, "filepath": 14, "nvml_lib_path": 14, "nccl": [14, 16], "nccl_lib_path": 14, "sm70": [14, 15], "80": 14, "v100": [14, 15], "a100": [14, 15], "cuda_arch_list": 14, "unset": 14, "torch_cuda_arch_list": 14, "preced": 14, "dtorch_cuda_arch_list": 14, "By": [14, 25], "those": [14, 17, 18, 25, 29], "rocm_path": 14, "pytorch_rocm_arch": 14, "gfx906": 14, "gfx908": 14, "gfx90a": 14, "wiki": 14, "gentoo": 14, "rocminfo": 14, "gfx": 14, "dhip_root_dir": 14, "dtorch_use_hip_dsa": 14, "complet": [14, 20, 22, 25, 28], "lot": 14, "jinja": 14, "instanti": [14, 19], "sure": [14, 25, 27, 29], "accident": 14, "cours": 14, "fbgemm_gpu_lib_path": 14, "fbgemm_gpu_pi": [14, 15], "defin": [14, 17, 27], "nm": 14, "gdcu": 14, "referenc": 14, "certain": 14, "gdc": 14, "merge_pooled_embed": [14, 15, 20], "isol": [15, 28], "build": [15, 16, 27, 29, 31], "sm80": 15, "respect": 15, "guarante": 15, "especi": 15, "displai": [15, 30], "setup": 15, "smi": 15, "515": 15, "76": [15, 22], "persist": 15, "bu": [15, 30], "disp": 15, "volatil": 15, "uncorr": 15, "ecc": 15, "fan": 15, "temp": 15, "perf": 15, "pwr": 15, "usag": [15, 28, 29], "cap": 15, "util": [15, 31], "mig": 15, "a10g": 15, "00000000": 15, "00": 15, "1e": [15, 23], "31c": 15, "p0": 15, "59w": 15, "300w": 15, "0mib": 15, "23028mib": 15, "gi": 15, "pid": 15, "No": [15, 23], "expos": 15, "imag": 15, "launch": 15, "toolkit": 15, "interfac": 15, "concis": 15, "info": [15, 27, 29], "dieedg": 15, "avgpwr": 15, "sclk": 15, "mclk": 15, "pwrcap": 15, "vram": 15, "33": [15, 19, 20, 22], "0c": 15, "37": [15, 19, 20], "0w": 15, "300mhz": 15, "1200mhz": 15, "auto": [15, 28], "290": 15, "32": [15, 19, 20], "39": [15, 19, 20], "log": 15, "difficult": 15, "relev": [15, 27], "genai": 15, "triton_vers": 15, "45fff310c8": 15, "about": [15, 29], "link": [15, 28], "encount": [15, 23], "signatur": [15, 28], "traceback": 15, "last": [15, 22], "root": [15, 25], "miniconda": 15, "mycondaenv": 15, "site": 15, "_op": [15, 28], "line": [15, 29, 30], "565": 15, "__getattr__": 15, "overload_nam": 15, "_c": 15, "_jit_get_oper": 15, "qualified_op_nam": 15, "runtimeerror": 15, "except": [15, 27, 29], "wa": 15, "string": [15, 30], "post47": 15, "py3": 15, "egg": 15, "__init__": [15, 29], "21": [15, 19, 20, 22], "_fbgemm_gpu_doc": 15, "noqa": 15, "f401": 15, "e402": 15, "18": [15, 19, 20, 22], "569": 15, "rais": [15, 29], "attributeerror": [15, 29], "_opnamespac": 15, "object": [15, 17], "attribut": [15, 29], "cli": 15, "main_run": 15, "47": [15, 19, 20, 22], "_zn6fbgemm48floatorhalftofusednbitrowwisequantizedsbhalfavx2itli2eeevpkt_miph": 15, "libtorch": 15, "visibl": 15, "incorrectli": [15, 28], "declar": [15, 27], "were": [15, 18], "pr": [15, 27, 28, 29], "1618": 15, "former": 15, "resolv": 15, "latter": 15, "seriou": 15, "tha": 15, "develop": [15, 28], "bench": 16, "good": [16, 26], "instal": [16, 28, 31], "pip": [16, 28], "pytest": 16, "rsx": 16, "pytestcollectionwarn": 16, "split_table_batched_embeddings_test": 16, "quantize_ops_test": 16, "sparse_ops_test": 16, "split_embedding_inference_converter_test": 16, "cuda_visible_devic": 16, "cuda_launch_block": 16, "involv": [16, 17], "rpath": 16, "fbgemm_test_with_rocm": 16, "hip_launch_block": 16, "split_table_batched_embeddings_benchmark": 16, "consecut": 17, "nestedtensor": 17, "raggedtensor": 17, "tensorflow": 17, "notabl": 17, "sentenc": 17, "maxlength": 17, "numel": [17, 22], "greatest": 17, "divisor": 17, "smallest": 17, "sub": 17, "exclud": 17, "partit": 17, "impli": [17, 26], "denot": [17, 27, 29], "offest": 17, "outer": 17, "would": 17, "begin": 17, "maximum": [17, 18, 29], "densor": 17, "form": [17, 26], "figur": 17, "below": 17, "show": [17, 23, 28], "accomod": 17, "At": [17, 27, 28, 29], "multipl": [17, 18, 23, 29, 31], "hadamard": 17, "product": [17, 26], "bmatrix": 17, "rightarrow": 17, "25": [17, 19, 20, 22], "36": [17, 19, 20], "49": 17, "81": 17, "50": 17, "operand": 17, "word": 17, "ax": 17, "properti": 17, "elementwis": [17, 18], "start": [17, 18, 20, 29, 30], "dim": [17, 19, 20, 22], "onto": 17, "part": 17, "everi": [17, 22, 23], "converson": 17, "could": 17, "lead": 17, "smaller": 17, "expect": 17, "happen": 17, "give": 17, "situat": 17, "like": 17, "dense_tensor": 17, "jagged_tensor": 17, "break": 17, "exact": 17, "usual": 17, "area": 18, "outsid": 18, "coverag": 18, "total": [18, 19, 20, 22, 23], "identit": 18, "add": [18, 25, 27, 28, 29], "structur": 18, "jagged_dense_dense_elementwise_add_jagged_output": 18, "y_0": 18, "y_1": 18, "multipli": [18, 23], "max_n": 18, "matmul": 18, "stacked_jagged_1d_to_dens": 18, "arg": [18, 23, 29], "kwarg": 18, "stacked_jagged_2d_to_dens": 18, "permute_pooled_embedding_modul": 19, "permutepooledembed": 19, "embs_dim": [19, 20], "sourc": [19, 23, 25, 26, 27, 28, 29], "column": [19, 20], "essenti": [19, 20], "second": [19, 20, 22, 27, 29], "suppos": [19, 20], "int64": [19, 20, 22], "perm": 19, "arang": [19, 20], "reshap": [19, 20], "13": [19, 20, 23], "14": [19, 20, 29], "15": [19, 20, 22], "19": [19, 20, 22], "23": [19, 20, 22], "24": [19, 20, 22], "26": [19, 20, 21], "27": [19, 20, 22], "28": [19, 20], "29": [19, 20, 22], "30": [19, 20, 22], "31": [19, 20, 22], "34": [19, 20, 22], "35": [19, 20], "38": [19, 20, 22], "40": [19, 20, 22], "41": [19, 20], "42": [19, 20, 22, 29], "43": [19, 20], "44": [19, 20], "45": [19, 20], "46": [19, 20, 22], "describ": [19, 20, 22, 23, 25], "__call__": 19, "b_local": [19, 20], "total_global_d": [19, 20], "local": [19, 20, 27, 29], "global": [19, 20, 23], "pooled_embed": 20, "uncat_dim_s": 20, "cat_dim": 20, "concaten": 20, "vice": 20, "versa": 20, "aggreg": 20, "permute_pooled_emb": 20, "cumul": [20, 22], "invers": 20, "itertool": 20, "accumul": 20, "inv_embs_dim": 20, "inv_permut": 20, "uint8": 21, "randn": 21, "8247": 21, "0031": 21, "0068": 21, "2081": 21, "5427": 21, "5772": 21, "0291": 21, "7626": 21, "159": 21, "86": 21, "48": [21, 22], "213": 21, "188": 21, "248": 21, "254": 21, "186": 21, "permute_2d_sparse_data": 22, "permuted_lengths_sum": 22, "3d": 22, "jag": [22, 29, 31], "less": 22, "repetit": 22, "sampl": [22, 23], "synchron": 22, "suppli": [22, 30], "int32": 22, "randint": 22, "low": [22, 31], "high": [22, 31], "100": 22, "61": 22, "98": 22, "56": 22, "94": 22, "89": 22, "65": 22, "71": 22, "54": 22, "78": 22, "68": 22, "60": 22, "51": 22, "52": 22, "97": 22, "66": 22, "permute_1d_sparse_data": 22, "referr": 22, "withh": 22, "84": 22, "feature_offset": 22, "asynchronous_complete_cumsum": 22, "t_in": 22, "nonblock": 22, "asynchron": 22, "offsets_rang": 22, "range_s": 22, "split_table_batched_embeddings_ops_train": 23, "splittablebatchedembeddingbagscodegen": 23, "embedding_spec": 23, "embeddingloc": 23, "computedevic": 23, "feature_table_map": 23, "cache_algorithm": 23, "cachealgorithm": 23, "cache_load_factor": 23, "cache_reserved_memori": 23, "cache_precis": 23, "weights_precis": 23, "enforce_hbm": 23, "emboptimtyp": 23, "exact_sgd": 23, "record_cache_metr": 23, "recordcachemetr": 23, "gather_uvm_cache_stat": 23, "gradient_clip": 23, "max_gradi": 23, "max_norm": 23, "learning_r": 23, "01": 23, "ep": 23, "08": 23, "momentum": 23, "weight_decai": 23, "weight_decay_mod": 23, "weightdecaymod": 23, "eta": 23, "001": 23, "beta1": 23, "beta2": 23, "999": 23, "step_ema": 23, "10000": 23, "step_swap": 23, "step_start": 23, "step_mod": 23, "stepmod": 23, "use_it": 23, "counter_based_regular": 23, "counterbasedregularizationdefinit": 23, "cowclip_regular": 23, "cowclipdefinit": 23, "poolingmod": 23, "str": 23, "boundscheckmod": 23, "uvm_non_rowwise_momentum": 23, "use_experimental_tb": 23, "prefetch_pipelin": 23, "stats_reporter_config": 23, "tbestatsreporterconfig": 23, "table_nam": 23, "optimizer_state_dtyp": 23, "dict": 23, "multipass_prefetch_config": 23, "multipassprefetchconfig": 23, "global_weight_decai": 23, "globalweightdecaydefinit": 23, "uvm_host_map": 23, "spec": 23, "physic": 23, "placement": 23, "virtual": 23, "managed_cach": 23, "mtia": 23, "least": 23, "frequent": 23, "factor": 23, "capac": 23, "reserv": [23, 26], "optimtyp": 23, "adam": 23, "exact_adagrad": 23, "adagrad": 23, "exact_rowwise_adagrad": 23, "aadagrad": 23, "sgd": 23, "lamb": 23, "lars_sgd": 23, "lar": 23, "partial_rowwise_adam": 23, "partial": 23, "partial_rowwise_lamb": 23, "ensemble_rowwise_adagrad": 23, "ensembl": 23, "Not": 23, "gradient": 23, "record": 23, "hit": 23, "request": [23, 24, 28], "record_cache_miss_count": 23, "metric": 23, "record_tablewise_cache_miss": 23, "collect": [23, 31], "stochast": 23, "round": 23, "clip": 23, "norm": 23, "learn": 23, "0e": 23, "epsilon": 23, "nn": 23, "decai": 23, "decoupl": 23, "pool": [23, 31], "union": 23, "skip": 23, "fatal": 23, "messag": 23, "adjust": 23, "v2": 23, "polici": 23, "forward_stream": 23, "stat": 23, "multipass": 23, "malloc": 23, "cudahostregist": 23, "cudamallocmanag": 23, "feature_requires_grad": 23, "batch_size_per_feature_per_rank": 23, "total_unique_indic": 23, "vbe": 23, "user": 23, "autograd": 23, "chosen": 23, "conatin": 23, "unweight": 23, "f": 23, "split_table_batched_embeddings_ops_common": 23, "init_embedding_weights_uniform": 23, "split_embedding_weight": 23, "9426": 23, "7046": 23, "4214": 23, "0419": 23, "1331": 23, "7856": 23, "8124": 23, "2021": 23, "5771": 23, "5911": 23, "7792": 23, "1068": 23, "6203": 23, "4813": 23, "1677": 23, "4790": 23, "5587": 23, "0941": 23, "5754": 23, "3475": 23, "8952": 23, "1964": 23, "0810": 23, "4174": 23, "2513": 23, "4039": 23, "3775": 23, "3273": 23, "5399": 23, "0229": 23, "1455": 23, "8770": 23, "9520": 23, "4593": 23, "7169": 23, "6307": 23, "1765": 23, "8757": 23, "8614": 23, "2051": 23, "0603": 23, "9980": 23, "7958": 23, "5826": 23, "long": 23, "5197": 23, "2957": 23, "3578": 23, "1487": 23, "4873": 23, "3044": 23, "9801": 23, "2769": 23, "7164": 23, "8528": 23, "7159": 23, "6719": 23, "0784": 23, "2016": 23, "2176": 23, "1988": 23, "3825": 23, "5008": 23, "8991": 23, "1405": 23, "2637": 23, "9427": 23, "8902": 23, "3754": 23, "5013": 23, "6105": 23, "9968": 23, "3057": 23, "7621": 23, "9821": 23, "7314": 23, "6195": 23, "grad_fn": 23, "cppnode": 23, "splitlookupfunction_sgd_op": 23, "set_learning_r": 23, "lr": 23, "set_optimizer_step": 23, "setp": 23, "view": [23, 28], "split_optimizer_st": 23, "momentum1": 23, "momentum2": 23, "prev_it": 23, "cowclip": 23, "row_count": 23, "update_hyper_paramet": 23, "params_dict": 23, "hyper": 23, "extern": [23, 30], "question": 24, "concern": 24, "discuss": 24, "kick": 24, "regard": 24, "feel": 24, "free": 24, "reach": 24, "easi": 25, "transpar": 25, "activ": 25, "welcom": [25, 31], "your": [25, 28, 29], "branch": 25, "ve": 25, "chang": [25, 27, 29], "api": [25, 27, 28, 29], "suit": 25, "lint": 25, "haven": 25, "submit": [25, 27, 29], "facebook": [25, 26, 31], "open": 25, "track": 25, "bug": 25, "descript": [25, 27, 28, 29, 30], "abl": 25, "bounti": 25, "safe": 25, "disclosur": 25, "secur": 25, "go": 25, "outlin": 25, "agre": 25, "tree": 25, "claus": 26, "bsd": 26, "softwar": 26, "copyright": 26, "inc": 26, "affili": 26, "right": [26, 30], "redistribut": 26, "modif": 26, "permit": 26, "condit": 26, "met": 26, "retain": 26, "notic": 26, "disclaim": 26, "materi": 26, "contributor": 26, "endors": 26, "promot": 26, "written": 26, "permiss": 26, "BY": 26, "THE": 26, "holder": 26, "AND": 26, "AS": 26, "express": [26, 30], "OR": 26, "warranti": 26, "NOT": 26, "limit": [26, 28], "TO": 26, "OF": 26, "merchant": 26, "FOR": 26, "particular": 26, "IN": 26, "NO": 26, "event": 26, "shall": 26, "BE": 26, "liabl": 26, "indirect": 26, "incident": 26, "special": 26, "exemplari": 26, "consequenti": 26, "damag": 26, "procur": 26, "substitut": 26, "profit": 26, "busi": 26, "interrupt": 26, "theori": 26, "liabil": 26, "contract": 26, "strict": 26, "tort": 26, "neglig": 26, "aris": 26, "IF": 26, "advis": 26, "SUCH": 26, "javadoc": 27, "style": [27, 29], "comment": [27, 28, 30], "sphinx": [27, 28, 29], "breath": 27, "kept": 27, "cpp": [27, 29, 30], "cu": 27, "cuh": 27, "everyth": 27, "ifndef": 27, "doxygen_this_will_be_skip": 27, "endif": 27, "hidden": 27, "html": [27, 28, 29], "descriptionss": 27, "publish": [27, 29], "docstr": [27, 28, 29], "method": [27, 28, 29], "organ": 27, "yet": 27, "top": [27, 31], "defgroup": 27, "directli": [27, 29], "behavior": [27, 29], "tparam": 27, "thrown": [27, 29], "ingroup": 27, "brief": 27, "short": 27, "example_method": [27, 29], "def": [27, 29], "foo": [27, 29], "lst": [27, 29], "And": [27, 29], "verbatim": [27, 29], "text": [27, 29, 30], "diagram": [27, 29], "unpars": 27, "prev": [27, 29], "usabl": [27, 29], "space": [27, 28, 29], "endcod": 27, "align": [27, 29], "param1": [27, 29], "param2": 27, "bad_alloc": 27, "logic_error": 27, "href": 27, "www": [27, 29], "nl": 27, "cmdlink": 27, "On": [27, 29], "doxygengroup": 27, "rst": [27, 29, 30], "content": [27, 30, 31], "toctre": [27, 29], "ini": 27, "taken": 27, "care": 27, "doc": [27, 28, 29, 30], "netlifi": [27, 28, 29], "preview": [27, 29], "serv": 28, "yourself": 28, "shoe": 28, "who": 28, "understand": 28, "live": 28, "easier": 28, "leav": 28, "task": 28, "tool": 28, "graphviz": [28, 30], "assembl": 28, "prepend": 28, "sphinx_lint": 28, "technic": 28, "why": 28, "occasion": 28, "unresolv": 28, "might": 28, "opt": 28, "pycapsul": 28, "neg": 28, "silenc": 28, "nitpick": 28, "conf": 28, "domain": 28, "deploi": 28, "app": 28, "googl": 29, "c_size_t": 29, "ret": 29, "emplace_back": 29, "valueerror": 29, "restructuredtext": 29, "en": 29, "master": 29, "__": 29, "pep": 29, "0287": 29, "autofunct": 29, "toc": 29, "c_ulong": 29, "mani": 29, "attach": 29, "fact": 29, "helper": 29, "codebas": 29, "add_doc": 29, "forc": 29, "hoc": 29, "the_new_doc_modul": 29, "remain": 29, "render": [29, 30], "anchor": 30, "_doc": 30, "underscor": 30, "_": 30, "There": 30, "elsewher": 30, "ref": 30, "literalinclud": 30, "rel": 30, "enclos": 30, "bracket": 30, "skiplin": 30, "math": 30, "k_": 30, "k_n": 30, "expressino": 30, "int_a": 30, "frac": 30, "2v": 30, "dx": 30, "left": 30, "dv": 30, "_a": 30, "du": 30, "digraph": 30, "altern": 30, "dot": 30, "examplegraph": 30, "precis": 31, "convolut": 31, "server": 31, "infer": 31, "transform": 31, "contribut": 31, "contact": 31, "licens": 31, "autovector": 31, "ssd": 31}, "objects": {"": [[13, 0, 1, "_CPPv4N16RocksdbWriteMode29BWD_L1_CNFLCT_MISS_WRITE_BACKE", "BWD_L1_CNFLCT_MISS_WRITE_BACK"], [1, 1, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec"], [1, 2, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::IndexType"], [1, 2, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::OffsetType"], [1, 2, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::OutType"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::block_size"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::data_size"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::exponent_bias"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::exponent_bits"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::index_size"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::indices"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::input"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::input_stride"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::is_bf16_out"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::is_weight_positional"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::normalize_by_lengths"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::offsets_or_lengths"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::out"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::output_size"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::output_stride"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::use_offsets"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::weights"], [1, 1, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec"], [1, 2, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::InType"], [1, 2, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::IndexType"], [1, 2, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::OffsetType"], [1, 2, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::OutType"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::block_size"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::data_size"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::index_size"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::indices"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::input"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::input_stride"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::is_bf16_in"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::is_bf16_out"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::is_weight_positional"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::no_bag"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::normalize_by_lengths"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::offsets_or_lengths"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::out"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::output_size"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::output_stride"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::scale_bias_last"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::use_offsets"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::weights"], [13, 0, 1, "_CPPv4N16RocksdbWriteMode5FLUSHE", "FLUSH"], [10, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref"], [10, 3, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::ebits"], [10, 3, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::exponent_bias"], [10, 3, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::input"], [10, 3, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::ncols"], [10, 3, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::nrows"], [10, 3, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::output"], [10, 1, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu"], [10, 3, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::forward"], [10, 3, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::input"], [10, 3, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::output_dtype"], [13, 0, 1, "_CPPv4N16RocksdbWriteMode15FWD_L1_EVICTIONE", "FWD_L1_EVICTION"], [13, 0, 1, "_CPPv4N16RocksdbWriteMode16FWD_ROCKSDB_READE", "FWD_ROCKSDB_READ"], [0, 1, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax"], [0, 3, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::len"], [0, 3, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::m"], [0, 3, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::max"], [0, 3, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::min"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf"], [0, 2, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::InputType"], [0, 3, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::bit_rate"], [0, 3, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input"], [0, 3, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input_columns"], [0, 3, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input_rows"], [0, 3, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::output"], [10, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref"], [10, 3, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::ebits"], [10, 3, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::exponent_bias"], [10, 3, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::input"], [10, 3, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::max_pos"], [10, 3, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::ncols"], [10, 3, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::nrows"], [10, 3, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::output"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize"], [0, 2, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::T"], [0, 3, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::dst"], [0, 3, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::len"], [0, 3, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::noise_ratio"], [0, 3, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::num_threads"], [0, 3, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::qparams"], [0, 3, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::src"], [0, 3, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::thread_id"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::C"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::G"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::K"], [0, 2, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::LAYOUT"], [0, 2, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::T"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::X"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::dst"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::scales"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::src"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::zero_points"], [13, 4, 1, "_CPPv416RocksdbWriteMode", "RocksdbWriteMode"], [13, 0, 1, "_CPPv4N16RocksdbWriteMode29BWD_L1_CNFLCT_MISS_WRITE_BACKE", "RocksdbWriteMode::BWD_L1_CNFLCT_MISS_WRITE_BACK"], [13, 0, 1, "_CPPv4N16RocksdbWriteMode5FLUSHE", "RocksdbWriteMode::FLUSH"], [13, 0, 1, "_CPPv4N16RocksdbWriteMode15FWD_L1_EVICTIONE", "RocksdbWriteMode::FWD_L1_EVICTION"], [13, 0, 1, "_CPPv4N16RocksdbWriteMode16FWD_ROCKSDB_READE", "RocksdbWriteMode::FWD_ROCKSDB_READ"], [0, 1, 1, "_CPPv46Xor128v", "Xor128"], [10, 1, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu"], [10, 3, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::forward"], [10, 3, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::input"], [10, 3, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::output_dtype"], [10, 1, 1, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE", "_bfloat16_to_float_gpu"], [10, 3, 1, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE", "_bfloat16_to_float_gpu::input"], [10, 1, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu"], [10, 3, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu::forward"], [10, 3, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu::input"], [10, 1, 1, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE", "_float_to_bfloat16_gpu"], [10, 3, 1, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE", "_float_to_bfloat16_gpu::input"], [10, 1, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out"], [10, 3, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out::input"], [10, 3, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out::output"], [10, 1, 1, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor", "_float_to_fused8bitrowwise_gpu"], [10, 3, 1, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor", "_float_to_fused8bitrowwise_gpu::input"], [10, 1, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu"], [10, 3, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu::bit_rate"], [10, 3, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu::input"], [10, 1, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu"], [10, 3, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::ebits"], [10, 3, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::exponent_bias"], [10, 3, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::input"], [10, 3, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::max_pos"], [10, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu"], [10, 3, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::bias"], [10, 3, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::bounding_box_size"], [10, 3, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::ebits"], [10, 3, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::input"], [10, 3, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::max_pos"], [10, 3, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::mbits"], [10, 3, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::min_pos"], [10, 1, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu"], [10, 3, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::forward"], [10, 3, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::input"], [10, 3, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::row_dim"], [10, 1, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out"], [10, 3, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out::input"], [10, 3, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out::output"], [10, 1, 1, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE", "_fused8bitrowwise_to_float_gpu"], [10, 3, 1, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE", "_fused8bitrowwise_to_float_gpu::input"], [10, 1, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu"], [10, 3, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::D_offsets"], [10, 3, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::input"], [10, 3, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::output_dtype"], [10, 1, 1, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE", "_fused8bitrowwise_to_half_gpu"], [10, 3, 1, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE", "_fused8bitrowwise_to_half_gpu::input"], [10, 1, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu"], [10, 3, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::input"], [10, 3, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::output_dtype"], [10, 3, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::quant_padding_float_type"], [10, 3, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::scale_bias_last"], [10, 1, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu"], [10, 3, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu::bit_rate"], [10, 3, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu::input"], [10, 1, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu"], [10, 3, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu::bit_rate"], [10, 3, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu::input"], [10, 1, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu"], [10, 3, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::bit_rate"], [10, 3, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::input"], [10, 3, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::output_dtype"], [10, 1, 1, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor", "_half_to_fused8bitrowwise_gpu"], [10, 3, 1, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor", "_half_to_fused8bitrowwise_gpu::input"], [10, 1, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu"], [10, 3, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu::bit_rate"], [10, 3, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu::input"], [10, 1, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu"], [10, 3, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::ebits"], [10, 3, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::exponent_bias"], [10, 3, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::input"], [10, 1, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu"], [10, 3, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::bias"], [10, 3, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::ebits"], [10, 3, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::input"], [10, 3, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::mbits"], [10, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu"], [10, 3, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::forward"], [10, 3, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::input"], [10, 3, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::output_dtype"], [10, 3, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::output_last_dim"], [10, 3, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::row_dim"], [10, 1, 1, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor", "_single_or_half_precision_to_fused8bitrowwise_gpu"], [10, 3, 1, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor", "_single_or_half_precision_to_fused8bitrowwise_gpu::input"], [10, 1, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu"], [10, 3, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu::bit_rate"], [10, 3, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu::input"], [9, 1, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device"], [9, 3, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device::inputTensors"], [9, 3, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device::target_device"], [6, 1, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul"], [6, 3, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::a_offsets"], [6, 3, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::a_values"], [6, 3, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::v"], [3, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::B_ofsets"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::bounds_check_mode"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::indices"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::max_B"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::offsets"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::rows_per_table"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::warning"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::weights"], [13, 1, 1, "_CPPv420compact_indices_cudaNSt6vectorI6TensorEE6TensorNSt6vectorI6TensorEE6Tensor6Tensor", "compact_indices_cuda"], [13, 3, 1, "_CPPv420compact_indices_cudaNSt6vectorI6TensorEE6TensorNSt6vectorI6TensorEE6Tensor6Tensor", "compact_indices_cuda::compact_count"], [13, 3, 1, "_CPPv420compact_indices_cudaNSt6vectorI6TensorEE6TensorNSt6vectorI6TensorEE6Tensor6Tensor", "compact_indices_cuda::compact_indices"], [13, 3, 1, "_CPPv420compact_indices_cudaNSt6vectorI6TensorEE6TensorNSt6vectorI6TensorEE6Tensor6Tensor", "compact_indices_cuda::count"], [13, 3, 1, "_CPPv420compact_indices_cudaNSt6vectorI6TensorEE6TensorNSt6vectorI6TensorEE6Tensor6Tensor", "compact_indices_cuda::indices"], [13, 3, 1, "_CPPv420compact_indices_cudaNSt6vectorI6TensorEE6TensorNSt6vectorI6TensorEE6Tensor6Tensor", "compact_indices_cuda::masks"], [13, 1, 1, "_CPPv418cuda_callback_func12cudaStream_t11cudaError_tPv", "cuda_callback_func"], [13, 3, 1, "_CPPv418cuda_callback_func12cudaStream_t11cudaError_tPv", "cuda_callback_func::functor"], [13, 3, 1, "_CPPv418cuda_callback_func12cudaStream_t11cudaError_tPv", "cuda_callback_func::status"], [13, 3, 1, "_CPPv418cuda_callback_func12cudaStream_t11cudaError_tPv", "cuda_callback_func::stream"], [6, 1, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEENSt8optionalIN2at6SymIntEEE", "dense_to_jagged"], [6, 3, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEENSt8optionalIN2at6SymIntEEE", "dense_to_jagged::dense"], [6, 3, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEENSt8optionalIN2at6SymIntEEE", "dense_to_jagged::offsets"], [6, 3, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEENSt8optionalIN2at6SymIntEEE", "dense_to_jagged::total_L"], [12, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::D_offsets"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::cache_index_table_map"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::gather_cache_stats"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::hash_size_cumsum"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::linear_cache_indices"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lru_state"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_miss_timestamp"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_weights"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::row_alignment"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::time_stamp"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::total_cache_hash_size"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::uvm_cache_stats"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights_offsets"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights_tys"], [12, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda"], [12, 3, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::gather_cache_stats"], [12, 3, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::invalid_index"], [12, 3, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::linear_cache_indices"], [12, 3, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::uvm_cache_stats"], [27, 1, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method"], [27, 2, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::Alignment"], [27, 2, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::T"], [27, 3, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::param1"], [27, 3, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::param2"], [11, 1, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda"], [11, 3, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::input_offsets"], [11, 3, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::output_offsets"], [11, 3, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::output_size"], [11, 3, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::permute"], [10, 1, 1, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor", "float_or_half_to_fused8bitrowwise_cpu"], [10, 3, 1, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor", "float_or_half_to_fused8bitrowwise_cpu::input"], [10, 1, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu"], [10, 3, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu::forward"], [10, 3, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu::input"], [10, 1, 1, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor", "float_to_fused8bitrowwise_cpu"], [10, 3, 1, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor", "float_to_fused8bitrowwise_cpu::input"], [10, 1, 1, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor", "fused8bitrowwise_to_float_cpu"], [10, 3, 1, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor", "fused8bitrowwise_to_float_cpu::input"], [10, 1, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu"], [10, 3, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::input"], [10, 3, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::output_dtype"], [10, 3, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::quant_padding_float_type"], [10, 3, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::scale_bias_last"], [10, 1, 1, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor", "fused8bitrowwise_to_half_cpu"], [10, 3, 1, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor", "fused8bitrowwise_to_half_cpu::input"], [10, 1, 1, "_CPPv437fusednbitrowwise_sbfront_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_sbfront_to_float_cpu"], [10, 3, 1, "_CPPv437fusednbitrowwise_sbfront_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_sbfront_to_float_cpu::bit_rate"], [10, 3, 1, "_CPPv437fusednbitrowwise_sbfront_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_sbfront_to_float_cpu::input"], [10, 1, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu"], [10, 3, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu::bit_rate"], [10, 3, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu::input"], [10, 1, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu"], [10, 3, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::bit_rate"], [10, 3, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::input"], [10, 3, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::output_dtype"], [10, 1, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu"], [10, 3, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu::bit_rate"], [10, 3, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu::input"], [11, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_boundaries"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_ctr_in_use_after"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_ctr_weight_value"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_num_examples"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_num_positives"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::logit"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::num_segments"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::positive_weight"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::segment_lengths"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::segment_value"], [12, 1, 1, "_CPPv423get_unique_indices_cudaRKN2at6TensorEK7int64_tKb", "get_unique_indices_cuda"], [12, 3, 1, "_CPPv423get_unique_indices_cudaRKN2at6TensorEK7int64_tKb", "get_unique_indices_cuda::compute_count"], [12, 3, 1, "_CPPv423get_unique_indices_cudaRKN2at6TensorEK7int64_tKb", "get_unique_indices_cuda::linear_indices"], [12, 3, 1, "_CPPv423get_unique_indices_cudaRKN2at6TensorEK7int64_tKb", "get_unique_indices_cuda::max_indices"], [12, 1, 1, "_CPPv436get_unique_indices_with_inverse_cudaRKN2at6TensorEK7int64_tKbKb", "get_unique_indices_with_inverse_cuda"], [12, 3, 1, "_CPPv436get_unique_indices_with_inverse_cudaRKN2at6TensorEK7int64_tKbKb", "get_unique_indices_with_inverse_cuda::compute_count"], [12, 3, 1, "_CPPv436get_unique_indices_with_inverse_cudaRKN2at6TensorEK7int64_tKbKb", "get_unique_indices_with_inverse_cuda::compute_inverse_indices"], [12, 3, 1, "_CPPv436get_unique_indices_with_inverse_cudaRKN2at6TensorEK7int64_tKbKb", "get_unique_indices_with_inverse_cuda::linear_indices"], [12, 3, 1, "_CPPv436get_unique_indices_with_inverse_cudaRKN2at6TensorEK7int64_tKbKb", "get_unique_indices_with_inverse_cuda::max_indices"], [4, 1, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::XQ"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::cache_K"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::cache_V"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::cache_logical_dtype_int"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::kv_cache_quant_num_groups"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::num_split_ks"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::qk_scale"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::seq_positions"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::use_tensor_cores"], [10, 1, 1, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor", "half_to_fused8bitrowwise_cpu"], [10, 3, 1, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor", "half_to_fused8bitrowwise_cpu::input"], [13, 1, 1, "_CPPv410hash_shard7int64_t6size_t", "hash_shard"], [13, 3, 1, "_CPPv410hash_shard7int64_t6size_t", "hash_shard::id"], [13, 3, 1, "_CPPv410hash_shard7int64_t6size_t", "hash_shard::num_shards"], [11, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_ctr_in_use_after"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_ctr_weight_value"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_num_examples"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_num_positives"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::logit"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::lower_bound"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::positive_weight"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::upper_bound"], [12, 1, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot"], [12, 3, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot::C"], [12, 3, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot::h_in"], [3, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::D_offsets"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::dev_weights"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::fp8_exponent_bias"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::fp8_exponent_bits"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::indice_weights"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::indices"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::lxu_cache_locations"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::lxu_cache_weights"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float16_D"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float32_D"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float8_D"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int2_D"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int4_D"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int8_D"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::offsets"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::output_dtype"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::pooling_mode"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::row_alignment"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::total_D"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::uvm_weights"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_offsets"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_placements"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_tys"], [3, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::D_offsets"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::dev_weights"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::fp8_exponent_bias"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::fp8_exponent_bits"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::indice_weights"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::indices"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::lxu_cache_locations"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::lxu_cache_weights"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float16_D"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float32_D"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float8_D"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int2_D"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int4_D"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int8_D"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::offsets"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::output_dtype"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::pooling_mode"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::row_alignment"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::total_D"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::uvm_weights"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_offsets"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_placements"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_tys"], [3, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::D_offsets"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::cache_hash_size_cumsum"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::cache_index_table_map"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::dev_weights"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::fp8_exponent_bias"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::fp8_exponent_bits"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::indice_weights"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::indices"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_locations"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_state"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_weights"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_state"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float16_D"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float32_D"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float8_D"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int2_D"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int4_D"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int8_D"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::offsets"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::output_dtype"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::pooling_mode"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::row_alignment"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::total_D"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::total_cache_hash_size"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::uvm_weights"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_offsets"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_placements"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_tys"], [3, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::D_offsets"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::cache_hash_size_cumsum"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::cache_index_table_map"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::dev_weights"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::fp8_exponent_bias"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::fp8_exponent_bits"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::indice_weights"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::indices"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_locations"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_state"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_weights"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_state"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float16_D"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float32_D"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float8_D"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int2_D"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int4_D"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int8_D"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::offsets"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::output_dtype"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::pooling_mode"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::row_alignment"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::total_D"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::total_cache_hash_size"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::uvm_weights"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_offsets"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_placements"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_tys"], [8, 1, 1, "_CPPv413is_uvm_tensorRK6Tensor", "is_uvm_tensor"], [8, 3, 1, "_CPPv413is_uvm_tensorRK6Tensor", "is_uvm_tensor::self"], [6, 1, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense"], [6, 3, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::max_L"], [6, 3, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::offsets"], [6, 3, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::padding_value"], [6, 3, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::values"], [6, 1, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense"], [6, 3, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::max_sequence_length"], [6, 3, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::offsets"], [6, 3, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::values"], [6, 1, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add"], [6, 3, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::x_offsets"], [6, 3, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::x_values"], [6, 3, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::y"], [6, 1, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output"], [6, 3, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::x_offsets"], [6, 3, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::x_values"], [6, 3, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::y"], [6, 1, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda"], [6, 3, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::x_offsets"], [6, 3, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::x_values"], [6, 3, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::y"], [6, 1, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul"], [6, 3, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::x_offsets"], [6, 3, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::x_values"], [6, 3, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::y"], [6, 1, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense"], [6, 3, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::max_lengths"], [6, 3, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::offsets"], [6, 3, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::padding_value"], [6, 3, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::values"], [6, 1, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward"], [6, 3, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::max_lengths"], [6, 3, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::offsets"], [6, 3, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::padding_value"], [6, 3, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::values"], [13, 5, 1, "_CPPv4N5kv_db12CacheContextE", "kv_db::CacheContext"], [13, 5, 1, "_CPPv4N5kv_db13EmbeddingKVDBE", "kv_db::EmbeddingKVDB"], [13, 5, 1, "_CPPv4N5kv_db9QueueItemE", "kv_db::QueueItem"], [13, 5, 1, "_CPPv4N8l2_cache13CacheLibCacheE", "l2_cache::CacheLibCache"], [12, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::D_offsets"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::cache_hash_size_cumsum"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::cache_index_table_map"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lfu_state"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::linear_cache_indices"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lxu_cache_weights"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::row_alignment"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::total_cache_hash_size"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights_offsets"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights_tys"], [12, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::D_offsets"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::cache_hash_size_cumsum"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::cache_index_table_map"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lfu_state"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::linear_cache_indices"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lxu_cache_weights"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::stochastic_rounding"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::total_cache_hash_size"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::weights"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::weights_offsets"], [12, 1, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t", "linearize_cache_indices_cuda"], [12, 3, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t", "linearize_cache_indices_cuda::B_offsets"], [12, 3, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t", "linearize_cache_indices_cuda::cache_hash_size_cumsum"], [12, 3, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t", "linearize_cache_indices_cuda::indices"], [12, 3, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t", "linearize_cache_indices_cuda::indices_base_offset"], [12, 3, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t", "linearize_cache_indices_cuda::max_B"], [12, 3, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t", "linearize_cache_indices_cuda::offsets"], [12, 1, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda"], [12, 3, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::cache_hash_size_cumsum"], [12, 3, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::update_row_indices"], [12, 3, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::update_table_indices"], [12, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::compute_inverse_indices"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::gather_cache_stats"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::lock_cache_line"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::lru_state"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::lxu_cache_locking_counter"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::max_indices"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::time_stamp"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::unique_indices"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::unique_indices_length"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::uvm_cache_stats"], [12, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::D_offsets"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::cache_index_table_map"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::gather_cache_stats"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::hash_size_cumsum"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::linear_cache_indices"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lru_state"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lxu_cache_weights"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::row_alignment"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::time_stamp"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::total_cache_hash_size"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::uvm_cache_stats"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights_offsets"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights_tys"], [12, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::D_offsets"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::cache_index_table_map"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::gather_cache_stats"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::hash_size_cumsum"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::linear_cache_indices"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lock_cache_line"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lru_state"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_locking_counter"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_weights"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::stochastic_rounding"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::time_stamp"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::total_cache_hash_size"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::uvm_cache_stats"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::weights"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::weights_offsets"], [12, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::D_offsets"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::cache_hash_size_cumsum"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::cache_index_table_map"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::lxu_cache_weights"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::stochastic_rounding"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::total_D"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::uvm_weights"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::weights_offsets"], [12, 1, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorENSt8optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda"], [12, 3, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorENSt8optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::lxu_cache_locations"], [12, 3, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorENSt8optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::lxu_cache_locations_new"], [12, 3, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorENSt8optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::num_uniq_cache_indices"], [12, 1, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda"], [12, 3, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda::lxu_cache_locations"], [12, 3, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda::lxu_cache_locking_counter"], [12, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda"], [12, 3, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::gather_cache_stats"], [12, 3, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::invalid_index"], [12, 3, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::linear_cache_indices"], [12, 3, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::lxu_cache_locations_output"], [12, 3, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::num_uniq_cache_indices"], [12, 3, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::uvm_cache_stats"], [13, 1, 1, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_put_cuda"], [13, 3, 1, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_put_cuda::count"], [13, 3, 1, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_put_cuda::indices"], [13, 3, 1, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_put_cuda::preferred_sms"], [13, 3, 1, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_put_cuda::self"], [13, 3, 1, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_put_cuda::use_pipeline"], [13, 3, 1, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_put_cuda::values"], [13, 1, 1, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_select_cuda"], [13, 3, 1, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_select_cuda::count"], [13, 3, 1, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_select_cuda::indices"], [13, 3, 1, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_select_cuda::preferred_sms"], [13, 3, 1, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_select_cuda::self"], [13, 3, 1, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_select_cuda::use_pipeline"], [13, 3, 1, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_select_cuda::values"], [8, 1, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor"], [8, 3, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor::self"], [8, 3, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor::sizes"], [8, 1, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor"], [8, 3, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor::self"], [8, 3, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor::sizes"], [8, 1, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta"], [8, 3, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta::self"], [8, 3, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta::sizes"], [8, 1, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor"], [8, 3, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::is_host_mapped"], [8, 3, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::self"], [8, 3, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::sizes"], [8, 1, 1, "_CPPv423new_unified_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor_meta"], [8, 3, 1, "_CPPv423new_unified_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor_meta::is_host_mapped"], [8, 3, 1, "_CPPv423new_unified_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor_meta::self"], [8, 3, 1, "_CPPv423new_unified_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor_meta::sizes"], [8, 1, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor"], [8, 3, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor::self"], [8, 3, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor::sizes"], [5, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu"], [5, 3, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::batch_size"], [5, 3, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::include_last_offsets"], [5, 3, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::indices_list"], [5, 3, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::offsets_list"], [5, 3, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::per_sample_weights"], [9, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad"], [9, 3, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::inv_offset_dim_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::inv_permute_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::offset_dim_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::permute_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::pooled_embs"], [9, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::inv_offset_dim_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::inv_permute_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::offset_dim_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::permute_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::pooled_embs"], [9, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::inv_offset_dim_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::inv_permute_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::offset_dim_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::permute_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::pooled_embs"], [9, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::inv_offset_dim_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::inv_permute_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::offset_dim_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::permute_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::pooled_embs"], [9, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::inv_offset_dim_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::inv_permute_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::offset_dim_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::permute_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::pooled_embs"], [9, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl"], [9, 3, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::allow_duplicates"], [9, 3, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::inv_offset_dim_list"], [9, 3, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::inv_permute_list"], [9, 3, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::offset_dim_list"], [9, 3, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::permute_list"], [9, 3, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::pooled_embs"], [9, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::inv_offset_dim_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::inv_permute_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::offset_dim_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::permute_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::pooled_embs"], [9, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::inv_offset_dim_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::inv_permute_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::offset_dim_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::permute_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::pooled_embs"], [3, 1, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu"], [3, 3, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::index_remappings"], [3, 3, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::index_remappings_offsets"], [3, 3, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::indices"], [3, 3, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::offsets"], [3, 1, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda"], [3, 3, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::index_remappings"], [3, 3, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::index_remappings_offsets"], [3, 3, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::indices"], [3, 3, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::offsets"], [3, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu"], [3, 3, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::dense_indices"], [3, 3, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::hash_table"], [3, 3, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::hash_table_offsets"], [3, 3, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::indices"], [3, 3, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::offsets"], [3, 1, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda"], [3, 3, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::hash_table"], [3, 3, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::hash_table_offsets"], [3, 3, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::indices"], [3, 3, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::offsets"], [3, 1, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu"], [3, 3, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::hash_table"], [3, 3, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::hash_table_offsets"], [3, 3, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::indices"], [3, 3, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::offsets"], [13, 5, 1, "_CPPv4N2ps24EmbeddingParameterServerE", "ps::EmbeddingParameterServer"], [7, 1, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda"], [7, 3, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda::grad_output"], [7, 3, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda::num_features_per_rank"], [7, 1, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda"], [7, 3, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::cumsum_dim_sum_per_rank"], [7, 3, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::dim_sum_per_rank"], [7, 3, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::grad_output"], [7, 1, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu"], [7, 3, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu::dim_sum_per_rank"], [7, 3, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu::grad_output"], [7, 1, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda"], [7, 3, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda::dim_sum_per_rank"], [7, 3, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda::grad_output"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::A_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::BIAS_TYPE"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::B_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::DIRECT"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::FUSE_RELU"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::HAS_BIAS"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::Q_GRAN"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::block"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::inp"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::ld_in"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::ld_out"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::out"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::r"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::A_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::BIAS_TYPE"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::B_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::C_PER_G"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::FUSE_RELU"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::HAS_BIAS"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::Q_GRAN"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::block"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::inp"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::ld_in"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::ld_out"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::out"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::r"], [12, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::D_offsets"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::buffer_ids"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::cache_hash_size_cumsum"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::dev_weights"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::logical_table_ids"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::lxu_cache_weights"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_dev"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_offsets"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_placements"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_uvm"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::pruned_indices"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::pruned_indices_offsets"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::total_cache_hash_size"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::uvm_weights"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::weights_offsets"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::weights_placements"], [13, 5, 1, "_CPPv4N3ssd16EmbeddingRocksDBE", "ssd::EmbeddingRocksDB"], [13, 1, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::assigned_cache_slots"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::cache_set_inverse_indices"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::cache_set_sorted_unique_indices"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::inserted_ssd_weights"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::linear_index_inverse_indices"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::lxu_cache_locations"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::lxu_cache_weights"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::unique_indices_count_cumsum"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::unique_indices_length"], [13, 1, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::cache_set_inverse_indices_curr"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::inserted_ssd_weights_curr_next_map"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::inserted_ssd_weights_next"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::linear_index_inverse_indices_curr"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::lxu_cache_locations_curr"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::lxu_cache_weights"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::ssd_row_addrs_curr"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::unique_indices_count_cumsum_curr"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::unique_indices_length_curr"], [5, 1, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu"], [5, 3, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::include_last_offsets"], [5, 3, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::indices_list"], [5, 3, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::offsets_list"], [5, 3, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::per_sample_weights"], [8, 1, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise"], [8, 3, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise::cuda_memory_advise"], [8, 3, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise::self"], [8, 1, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorNSt8optionalI6TensorEE", "uvm_cuda_mem_prefetch_async"], [8, 3, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorNSt8optionalI6TensorEE", "uvm_cuda_mem_prefetch_async::device_t"], [8, 3, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorNSt8optionalI6TensorEE", "uvm_cuda_mem_prefetch_async::self"], [8, 1, 1, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor", "uvm_mem_advice_dont_fork"], [8, 3, 1, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor", "uvm_mem_advice_dont_fork::self"], [8, 1, 1, "_CPPv411uvm_storageRK6Tensor", "uvm_storage"], [8, 3, 1, "_CPPv411uvm_storageRK6Tensor", "uvm_storage::self"], [8, 1, 1, "_CPPv410uvm_to_cpuRK6Tensor", "uvm_to_cpu"], [8, 3, 1, "_CPPv410uvm_to_cpuRK6Tensor", "uvm_to_cpu::self"], [8, 1, 1, "_CPPv416uvm_to_cpu_cloneRK6Tensor", "uvm_to_cpu_clone"], [8, 3, 1, "_CPPv416uvm_to_cpu_cloneRK6Tensor", "uvm_to_cpu_clone::self"], [8, 1, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device"], [8, 3, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device::prototype"], [8, 3, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device::self"], [22, 6, 0, "-", "fbgemm_gpu"]], "fbgemm_gpu.docs.examples": [[29, 7, 1, "", "example_method"]], "fbgemm_gpu.permute_pooled_embedding_modules": [[19, 8, 1, "", "PermutePooledEmbeddings"]], "fbgemm_gpu.permute_pooled_embedding_modules.PermutePooledEmbeddings": [[19, 9, 1, "", "__call__"]], "fbgemm_gpu.split_table_batched_embeddings_ops_training": [[23, 8, 1, "", "SplitTableBatchedEmbeddingBagsCodegen"]], "fbgemm_gpu.split_table_batched_embeddings_ops_training.SplitTableBatchedEmbeddingBagsCodegen": [[23, 9, 1, "", "forward"], [23, 9, 1, "", "set_learning_rate"], [23, 9, 1, "", "set_optimizer_step"], [23, 9, 1, "", "split_embedding_weights"], [23, 9, 1, "", "split_optimizer_states"], [23, 9, 1, "", "update_hyper_parameters"]], "torch.ops.fbgemm": [[21, 7, 1, "", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf"], [22, 7, 1, "", "asynchronous_complete_cumsum"], [18, 7, 1, "", "batched_dense_vec_jagged_2d_mul"], [18, 7, 1, "", "dense_to_jagged"], [22, 7, 1, "", "expand_into_jagged_permute"], [18, 7, 1, "", "jagged_1d_to_dense"], [18, 7, 1, "", "jagged_2d_to_dense"], [18, 7, 1, "", "jagged_dense_dense_elementwise_add_jagged_output"], [18, 7, 1, "", "jagged_dense_elementwise_add"], [18, 7, 1, "", "jagged_dense_elementwise_add_jagged_output"], [18, 7, 1, "", "jagged_dense_elementwise_mul"], [18, 7, 1, "", "jagged_to_padded_dense"], [20, 7, 1, "", "merge_pooled_embeddings"], [22, 7, 1, "", "offsets_range"], [22, 7, 1, "", "permute_1D_sparse_data"], [22, 7, 1, "", "permute_2D_sparse_data"], [20, 7, 1, "", "permute_pooled_embs"], [18, 7, 1, "", "stacked_jagged_1d_to_dense"], [18, 7, 1, "", "stacked_jagged_2d_to_dense"]]}, "objtypes": {"0": "cpp:enumerator", "1": "cpp:function", "2": "cpp:templateParam", "3": "cpp:functionParam", "4": "cpp:enum", "5": "cpp:class", "6": "py:module", "7": "py:function", "8": "py:class", "9": "py:method"}, "objnames": {"0": ["cpp", "enumerator", "C++ enumerator"], "1": ["cpp", "function", "C++ function"], "2": ["cpp", "templateParam", "C++ template parameter"], "3": ["cpp", "functionParam", "C++ function parameter"], "4": ["cpp", "enum", "C++ enum"], "5": ["cpp", "class", "C++ class"], "6": ["py", "module", "Python module"], "7": ["py", "function", "Python function"], "8": ["py", "class", "Python class"], "9": ["py", "method", "Python method"]}, "titleterms": {"quantiz": [0, 10, 21], "util": 0, "refer": [0, 30], "implement": [0, 1], "method": [0, 1], "avx": 0, "2": 0, "512": 0, "tbe": [1, 23], "cpu": [1, 3, 6, 7, 10, 11, 14, 15], "autovector": 1, "fp8": 1, "16": 1, "32": 1, "autovec": 1, "build": [2, 14, 28], "instruct": [2, 14, 15, 16], "fbgemm": [2, 31], "requir": 2, "hardwar": 2, "softwar": 2, "depend": 2, "asmjit": 2, "cpuinfo": 2, "googletest": 2, "set": [2, 14, 15, 28], "up": [2, 14, 15, 28], "an": [2, 14], "isol": [2, 14], "environ": [2, 14, 15, 16, 28], "instal": [2, 14, 15], "tool": [2, 14], "c": [2, 14, 27, 31], "compil": [2, 14], "other": [2, 14, 30], "librari": [2, 15], "prepar": [2, 14], "linux": 2, "maco": 2, "cmake": 2, "gcc": [2, 14], "issu": [2, 25], "12": 2, "clang": [2, 14], "bazel": 2, "window": 2, "embed": [3, 9, 12, 13, 19, 20, 23], "oper": [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 17, 18, 20, 21, 22, 31], "cuda": [3, 6, 7, 8, 10, 11, 13, 14, 15, 16], "experiment": 4, "attent": 4, "combin": [5, 17], "input": 5, "jag": [6, 17, 18], "tensor": [6, 17, 18], "layout": 7, "transform": 7, "memori": 8, "pool": [9, 19, 20], "merg": 9, "permut": 9, "spars": [11, 22], "data": 11, "tabl": [12, 23], "batch": [12, 23], "ssd": 13, "miniconda": 14, "conda": [14, 15], "onli": [14, 15], "genai": 14, "docker": [14, 15], "imag": 14, "cudnn": 14, "cutlass": 14, "rocm": [14, 15, 16], "miopen": 14, "symlink": 14, "pytorch": [14, 15], "through": [14, 15], "pip": [14, 15], "post": [14, 15], "check": [14, 15], "triton": [14, 15], "pre": 14, "setup": [14, 16], "The": 14, "process": 14, "wheel": 14, "variabl": 14, "For": 14, "develop": [14, 31], "undefin": [14, 15], "symbol": [14, 15], "glibc": 14, "version": 14, "compat": 14, "nvidia": 15, "driver": 15, "contain": 15, "runtim": 15, "amdgpu": 15, "python": [15, 29, 31], "fbgemm_gpu": [15, 16, 28, 31], "packag": 15, "public": 15, "pypi": 15, "test": 16, "run": 16, "variant": 16, "benchmark": 16, "high": 17, "level": 17, "overview": [17, 31], "format": 17, "valu": 17, "offset": 17, "max": 17, "length": 17, "exampl": 17, "arithmet": 17, "convers": 17, "dens": 17, "modul": [19, 23, 31], "train": 23, "contact": 24, "u": 24, "github": 24, "slack": 24, "contribut": 25, "code": [25, 27, 29, 30], "conduct": 25, "pull": 25, "request": 25, "contributor": 25, "licens": [25, 26], "agreement": 25, "cla": 25, "ad": [27, 29, 30], "document": [27, 28, 29, 30, 31], "gener": [28, 29, 31], "guidelin": 28, "specif": 28, "guid": 28, "toolchain": 28, "lint": 28, "deploy": 28, "preview": 28, "todo": 29, "auto": 29, "sphinx": 30, "pointer": 30, "section": 30, "referenc": 30, "sourc": 30, "latex": 30, "graph": 30, "homepag": 31, "info": 31, "api": 31}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"Adding Documentation to Python Code": [[29, "adding-documentation-to-python-code"]], "Todo": [[29, "id1"]], "Adding Documentation to Auto-Generated Python Code": [[29, "adding-documentation-to-auto-generated-python-code"]], "FBGEMM and FBGEMM_GPU Documentation Homepage": [[31, "fbgemm-and-fbgemm-gpu-documentation-homepage"]], "General Info": [[31, null]], "FBGEMM Development": [[31, null]], "FBGEMM_GPU Development": [[31, null]], "FBGEMM_GPU Overview": [[31, null]], "FBGEMM C++ API": [[31, null]], "FBGEMM_GPU C++ API": [[31, null]], "FBGEMM_GPU Python Operators API": [[31, null]], "FBGEMM_GPU Python Modules API": [[31, null]], "Sphinx Documentation Pointers": [[30, "sphinx-documentation-pointers"]], "References Other Sections of the Documentation": [[30, "references-other-sections-of-the-documentation"]], "Referencing the Source Code": [[30, "referencing-the-source-code"]], "Adding LaTeX": [[30, "adding-latex"]], "Adding Graphs": [[30, "adding-graphs"]], "Documentation": [[28, "documentation"]], "General Documentation Guidelines": [[28, "general-documentation-guidelines"]], "Specific Documentation Guides": [[28, "specific-documentation-guides"]], "Building the Documentation": [[28, "building-the-documentation"]], "Set Up Build Environment": [[28, "set-up-build-environment"]], "Build FBGEMM_GPU": [[28, "build-fbgemm-gpu"]], "Set Up the Documentation Toolchain": [[28, "set-up-the-documentation-toolchain"]], "Build the Documentation": [[28, "build-the-documentation"]], "Linting the Documentation": [[28, "linting-the-documentation"]], "Deployment Preview": [[28, "deployment-preview"]], "Jagged Tensor Operators": [[17, "jagged-tensor-operators"], [6, "jagged-tensor-operators"], [18, "module-fbgemm_gpu"]], "High Level Overview": [[17, "high-level-overview"]], "Jagged Tensor Format": [[17, "jagged-tensor-format"]], "Values": [[17, "values"]], "Offsets": [[17, "offsets"]], "Max Lengths": [[17, "max-lengths"]], "Jagged Tensor Example": [[17, "jagged-tensor-example"]], "Jagged Tensor Operations": [[17, "jagged-tensor-operations"]], "Arithmetic Operations": [[17, "arithmetic-operations"]], "Conversion Operations": [[17, "conversion-operations"]], "Jagged to Dense": [[17, "jagged-to-dense"]], "Dense to Jagged": [[17, "dense-to-jagged"]], "Combined Arithmetic + Conversion Operations": [[17, "combined-arithmetic-conversion-operations"]], "Test Instructions": [[16, "test-instructions"]], "Setup the FBGEMM_GPU Test Environment": [[16, "setup-the-fbgemm-gpu-test-environment"]], "Running FBGEMM_GPU Tests": [[16, "running-fbgemm-gpu-tests"]], "Testing with the CUDA Variant": [[16, "testing-with-the-cuda-variant"]], "Testing with the ROCm Variant": [[16, "testing-with-the-rocm-variant"]], "Running FBGEMM_GPU Benchmarks": [[16, "running-fbgemm-gpu-benchmarks"]], "Pooled Embedding Modules": [[19, "module-fbgemm_gpu"]], "Table Batched Embedding (TBE) Training Module": [[23, "table-batched-embedding-tbe-training-module"]], "Layout Transformation Operators": [[7, "layout-transformation-operators"]], "CUDA Operators": [[7, "cuda-operators"], [6, "cuda-operators"], [10, "cuda-operators"], [11, "cuda-operators"], [13, "cuda-operators"], [3, "cuda-operators"]], "CPU Operators": [[7, "cpu-operators"], [6, "cpu-operators"], [10, "cpu-operators"], [11, "cpu-operators"], [3, "cpu-operators"]], "Combine Input Operators": [[5, "combine-input-operators"]], "Experimental Operators": [[4, "experimental-operators"]], "Attention Operators": [[4, "attention-operators"]], "Pooled Embeddings Operators": [[9, "pooled-embeddings-operators"]], "Merge Operators": [[9, "merge-operators"]], "Permutation Operators": [[9, "permutation-operators"]], "Quantization Operators": [[10, "quantization-operators"], [21, "module-fbgemm_gpu"]], "CUDA Memory Operators": [[8, "cuda-memory-operators"]], "Sparse Data Operators": [[11, "sparse-data-operators"]], "SSD Embedding Operators": [[13, "ssd-embedding-operators"]], "Build Instructions": [[14, "build-instructions"], [2, "build-instructions"]], "Set Up an Isolated Build Environment": [[14, "set-up-an-isolated-build-environment"], [2, "set-up-an-isolated-build-environment"]], "Install Miniconda": [[14, "install-miniconda"]], "Set Up the Conda Environment": [[14, "set-up-the-conda-environment"]], "Set Up for CPU-Only Build": [[14, "set-up-for-cpu-only-build"]], "Set Up for CUDA / GenAI-Only Build": [[14, "set-up-for-cuda-genai-only-build"]], "CUDA Docker Image": [[14, "cuda-docker-image"]], "Install CUDA": [[14, "install-cuda"]], "Install cuDNN": [[14, "install-cudnn"]], "Install CUTLASS": [[14, "install-cutlass"]], "Set Up for ROCm Build": [[14, "set-up-for-rocm-build"]], "ROCm Docker Image": [[14, "rocm-docker-image"]], "Install ROCm": [[14, "install-rocm"]], "Install MIOpen": [[14, "install-miopen"]], "Install the Build Tools": [[14, "install-the-build-tools"], [2, "install-the-build-tools"]], "C/C++ Compiler (GCC)": [[14, "c-c-compiler-gcc"]], "C/C++ Compiler (Clang)": [[14, "c-c-compiler-clang"]], "Compiler Symlinks": [[14, "compiler-symlinks"]], "Other Build Tools": [[14, "other-build-tools"], [2, "other-build-tools"]], "Install PyTorch": [[14, "install-pytorch"], [15, "install-pytorch"]], "Installation Through Conda": [[14, "installation-through-conda"]], "Installation Through PyTorch PIP": [[14, "installation-through-pytorch-pip"]], "Post-Install Checks": [[14, "post-install-checks"]], "Install PyTorch-Triton": [[14, "install-pytorch-triton"]], "Other Pre-Build Setup": [[14, "other-pre-build-setup"]], "Preparing the Build": [[14, "preparing-the-build"], [2, "preparing-the-build"]], "The Build Process": [[14, "the-build-process"]], "Set Wheel Build Variables": [[14, "set-wheel-build-variables"]], "CPU-Only Build": [[14, "cpu-only-build"]], "CUDA Build": [[14, "cuda-build"]], "GenAI-Only Build": [[14, "genai-only-build"]], "ROCm Build": [[14, "rocm-build"]], "Post-Build Checks (For Developers)": [[14, "post-build-checks-for-developers"]], "Undefined Symbols Check": [[14, "undefined-symbols-check"]], "GLIBC Version Compatibility Check": [[14, "glibc-version-compatibility-check"]], "Table Batched Embedding Operators": [[12, "table-batched-embedding-operators"]], "Installation Instructions": [[15, "installation-instructions"]], "Set Up CPU-Only Environment": [[15, "set-up-cpu-only-environment"]], "Set Up CUDA Environment": [[15, "set-up-cuda-environment"]], "Install NVIDIA Drivers": [[15, "install-nvidia-drivers"]], "Set Up the CUDA Docker Container and Conda Environment": [[15, "set-up-the-cuda-docker-container-and-conda-environment"]], "Install the CUDA Runtime": [[15, "install-the-cuda-runtime"]], "Set Up ROCm Environment": [[15, "set-up-rocm-environment"]], "Install AMDGPU Drivers": [[15, "install-amdgpu-drivers"]], "Set Up the ROCm Docker Container and Conda Environment": [[15, "set-up-the-rocm-docker-container-and-conda-environment"]], "Install Python Libraries": [[15, "install-python-libraries"]], "Install Triton": [[15, "install-triton"]], "Install the FBGEMM_GPU Package": [[15, "install-the-fbgemm-gpu-package"]], "Install through PyTorch PIP": [[15, "install-through-pytorch-pip"]], "Install through Public PyPI": [[15, "install-through-public-pypi"]], "Post-Installation Checks": [[15, "post-installation-checks"]], "Undefined Symbols": [[15, "undefined-symbols"]], "Contributing": [[25, "contributing"]], "Code of Conduct": [[25, "code-of-conduct"]], "Pull Requests": [[25, "pull-requests"]], "Contributor License Agreement (\u201cCLA\u201d)": [[25, "contributor-license-agreement-cla"]], "Issues": [[25, "issues"]], "License": [[25, "license"], [26, "license"]], "Contact Us": [[24, "contact-us"]], "GitHub": [[24, "github"]], "Slack": [[24, "slack"]], "Adding Documentation to C++ Code": [[27, "adding-documentation-to-c-code"]], "FBGEMM Requirements": [[2, "fbgemm-requirements"]], "Hardware Requirements": [[2, "hardware-requirements"]], "Software Dependencies": [[2, "software-dependencies"]], "asmjit": [[2, "asmjit"]], "cpuinfo": [[2, "cpuinfo"]], "GoogleTest": [[2, "googletest"]], "C/C++ Compiler": [[2, "c-c-compiler"]], "Build the FBGEMM Library": [[2, "build-the-fbgemm-library"]], "Building on Linux and macOS (CMake + GCC)": [[2, "building-on-linux-and-macos-cmake-gcc"]], "Build Issues with GCC 12+": [[2, "build-issues-with-gcc-12"]], "Building on Linux and macOS (CMake + Clang)": [[2, "building-on-linux-and-macos-cmake-clang"]], "Building on Linux (Bazel)": [[2, "building-on-linux-bazel"]], "Building on Windows": [[2, "building-on-windows"]], "Quantization Utilities": [[0, "quantization-utilities"]], "Reference Implementation Methods": [[0, "reference-implementation-methods"]], "AVX-2 Implementation Methods": [[0, "avx-2-implementation-methods"]], "AVX-512 Implementation Methods": [[0, "avx-512-implementation-methods"]], "Embedding Operators": [[3, "embedding-operators"]], "TBE CPU Autovectorization": [[1, "tbe-cpu-autovectorization"]], "FP8/16/32 Autovec Implementation Methods": [[1, "fp8-16-32-autovec-implementation-methods"]], "Pooled Embedding Operators": [[20, "module-fbgemm_gpu"]], "Sparse Operators": [[22, "module-fbgemm_gpu"]]}, "indexentries": {"findminmax (c++ function)": [[0, "_CPPv410FindMinMaxPKfPfPf7int64_t"]], "floatorhalftofusednbitrowwisequantizedsbhalf (c++ function)": [[0, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE"]], "fusedquantizedequantize (c++ function)": [[0, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif"]], "quantizegroupwise (c++ function)": [[0, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T"]], "xor128 (c++ function)": [[0, "_CPPv46Xor128v"]], "requantizeoutputprocessingavx2 (c++ function)": [[0, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE"]], "requantizeoutputprocessinggconvavx512 (c++ function)": [[0, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE"]], "embeddingspmdmfp8_autovec (c++ function)": [[1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib"]], "embeddingspmdm_autovec (c++ function)": [[1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb"]], "bounds_check_indices_cuda (c++ function)": [[3, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t"]], "int_nbit_split_embedding_codegen_lookup_function (c++ function)": [[3, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE"]], "int_nbit_split_embedding_codegen_lookup_function_cpu (c++ function)": [[3, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE"]], "int_nbit_split_embedding_uvm_caching_codegen_lookup_function (c++ function)": [[3, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE"]], "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu (c++ function)": [[3, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE"]], "pruned_array_lookup_cpu (c++ function)": [[3, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor"]], "pruned_array_lookup_cuda (c++ function)": [[3, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_insert_unweighted_cpu (c++ function)": [[3, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_lookup_cuda (c++ function)": [[3, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_lookup_unweighted_cpu (c++ function)": [[3, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor"]], "gqa_attn_splitk (c++ function)": [[4, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t"]], "padding_fused_tbe_input_combine_cpu (c++ function)": [[5, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t"]], "tbe_input_combine_cpu (c++ function)": [[5, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE"]], "batched_dense_vec_jagged_2d_mul (c++ function)": [[6, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor"]], "dense_to_jagged (c++ function)": [[6, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEENSt8optionalIN2at6SymIntEEE"]], "jagged_1d_to_dense (c++ function)": [[6, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t"]], "jagged_2d_to_dense (c++ function)": [[6, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE"]], "jagged_dense_elementwise_add (c++ function)": [[6, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_add_jagged_output (c++ function)": [[6, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_add_jagged_output_cuda (c++ function)": [[6, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_mul (c++ function)": [[6, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_to_padded_dense (c++ function)": [[6, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd"]], "jagged_to_padded_dense_forward (c++ function)": [[6, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd"]], "recat_embedding_grad_output_cuda (c++ function)": [[7, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE"]], "recat_embedding_grad_output_mixed_d_batch_cuda (c++ function)": [[7, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor"]], "recat_embedding_grad_output_mixed_d_cpu (c++ function)": [[7, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE"]], "recat_embedding_grad_output_mixed_d_cuda (c++ function)": [[7, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE"]], "is_uvm_tensor (c++ function)": [[8, "_CPPv413is_uvm_tensorRK6Tensor"]], "new_host_mapped_tensor (c++ function)": [[8, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_managed_tensor (c++ function)": [[8, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_managed_tensor_meta (c++ function)": [[8, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_unified_tensor (c++ function)": [[8, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb"]], "new_unified_tensor_meta (c++ function)": [[8, "_CPPv423new_unified_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEEb"]], "new_vanilla_managed_tensor (c++ function)": [[8, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "uvm_cuda_mem_advise (c++ function)": [[8, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t"]], "uvm_cuda_mem_prefetch_async (c++ function)": [[8, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorNSt8optionalI6TensorEE"]], "uvm_mem_advice_dont_fork (c++ function)": [[8, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor"]], "uvm_storage (c++ function)": [[8, "_CPPv411uvm_storageRK6Tensor"]], "uvm_to_cpu (c++ function)": [[8, "_CPPv410uvm_to_cpuRK6Tensor"]], "uvm_to_cpu_clone (c++ function)": [[8, "_CPPv416uvm_to_cpu_cloneRK6Tensor"]], "uvm_to_device (c++ function)": [[8, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor"]], "all_to_one_device (c++ function)": [[9, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE"]], "permute_pooled_embs_auto_grad (c++ function)": [[9, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_cpu (c++ function)": [[9, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_gpu (c++ function)": [[9, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_split_cpu (c++ function)": [[9, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_auto_grad_split_gpu (c++ function)": [[9, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_cpu_impl (c++ function)": [[9, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb"]], "permute_pooled_embs_split_cpu (c++ function)": [[9, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_split_gpu (c++ function)": [[9, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "fp8quantizedtofloat_ref (c++ function)": [[10, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi"]], "fp8rowwise_to_float_cpu (c++ function)": [[10, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t"]], "floattofp8quantized_ref (c++ function)": [[10, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd"]], "_fp8rowwise_to_float_gpu (c++ function)": [[10, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t"]], "_bfloat16_to_float_gpu (c++ function)": [[10, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE"]], "_float_to_fp8rowwise_gpu (c++ function)": [[10, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb"]], "_float_to_bfloat16_gpu (c++ function)": [[10, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE"]], "_float_to_fused8bitrowwise_cpu_out (c++ function)": [[10, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor"]], "_float_to_fused8bitrowwise_gpu (c++ function)": [[10, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor"]], "_float_to_fusednbitrowwise_gpu (c++ function)": [[10, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t"]], "_float_to_hfp8_gpu (c++ function)": [[10, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd"]], "_float_to_msfp_gpu (c++ function)": [[10, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd"]], "_float_to_paddedfp8rowwise_gpu (c++ function)": [[10, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t"]], "_fused8bitrowwise_to_float_cpu_out (c++ function)": [[10, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor"]], "_fused8bitrowwise_to_float_gpu (c++ function)": [[10, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE"]], "_fused8bitrowwise_to_float_mixed_dim_gpu (c++ function)": [[10, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t"]], "_fused8bitrowwise_to_half_gpu (c++ function)": [[10, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE"]], "_fused8bitrowwise_to_single_or_half_precision_gpu (c++ function)": [[10, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb"]], "_fusednbitrowwise_to_float_gpu (c++ function)": [[10, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t"]], "_fusednbitrowwise_to_half_gpu (c++ function)": [[10, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t"]], "_fusednbitrowwise_to_single_or_half_precision_gpu (c++ function)": [[10, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t"]], "_half_to_fused8bitrowwise_gpu (c++ function)": [[10, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor"]], "_half_to_fusednbitrowwise_gpu (c++ function)": [[10, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t"]], "_hfp8_to_float_gpu (c++ function)": [[10, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t"]], "_msfp_to_float_gpu (c++ function)": [[10, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t"]], "_paddedfp8rowwise_to_float_gpu (c++ function)": [[10, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t"]], "_single_or_half_precision_to_fused8bitrowwise_gpu (c++ function)": [[10, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor"]], "_single_or_half_precision_to_fusednbitrowwise_gpu (c++ function)": [[10, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t"]], "float_or_half_to_fused8bitrowwise_cpu (c++ function)": [[10, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor"]], "float_to_fp8rowwise_cpu (c++ function)": [[10, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb"]], "float_to_fused8bitrowwise_cpu (c++ function)": [[10, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor"]], "fused8bitrowwise_to_float_cpu (c++ function)": [[10, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor"]], "fused8bitrowwise_to_float_or_half_cpu (c++ function)": [[10, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb"]], "fused8bitrowwise_to_half_cpu (c++ function)": [[10, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor"]], "fusednbitrowwise_sbfront_to_float_cpu (c++ function)": [[10, "_CPPv437fusednbitrowwise_sbfront_to_float_cpuRK6TensorK7int64_t"]], "fusednbitrowwise_to_float_cpu (c++ function)": [[10, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t"]], "fusednbitrowwise_to_float_or_half_cpu (c++ function)": [[10, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t"]], "fusednbitrowwise_to_half_cpu (c++ function)": [[10, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t"]], "half_to_fused8bitrowwise_cpu (c++ function)": [[10, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor"]], "expand_into_jagged_permute_cuda (c++ function)": [[11, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t"]], "generic_histogram_binning_calibration_by_feature_cpu (c++ function)": [[11, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td"]], "histogram_binning_calibration_cpu (c++ function)": [[11, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td"]], "direct_mapped_lru_cache_populate_byte_cuda (c++ function)": [[12, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE"]], "direct_mapped_lxu_cache_lookup_cuda (c++ function)": [[12, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE"]], "get_unique_indices_cuda (c++ function)": [[12, "_CPPv423get_unique_indices_cudaRKN2at6TensorEK7int64_tKb"]], "get_unique_indices_with_inverse_cuda (c++ function)": [[12, "_CPPv436get_unique_indices_with_inverse_cudaRKN2at6TensorEK7int64_tKbKb"]], "host_lxu_cache_slot (c++ function)": [[12, "_CPPv419host_lxu_cache_slot7int64_t7int64_t"]], "lfu_cache_populate_byte_cuda (c++ function)": [[12, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t"]], "lfu_cache_populate_cuda (c++ function)": [[12, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb"]], "linearize_cache_indices_cuda (c++ function)": [[12, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t"]], "linearize_cache_indices_from_row_idx_cuda (c++ function)": [[12, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE"]], "lru_cache_find_uncached_cuda (c++ function)": [[12, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb"]], "lru_cache_populate_byte_cuda (c++ function)": [[12, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE"]], "lru_cache_populate_cuda (c++ function)": [[12, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE"]], "lxu_cache_flush_cuda (c++ function)": [[12, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb"]], "lxu_cache_locations_update_cuda (c++ function)": [[12, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorENSt8optionalIN2at6TensorEEE"]], "lxu_cache_locking_counter_decrement_cuda (c++ function)": [[12, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE"]], "lxu_cache_lookup_cuda (c++ function)": [[12, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE"]], "reset_weight_momentum_cuda (c++ function)": [[12, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t"]], "rocksdbwritemode (c++ enum)": [[13, "_CPPv416RocksdbWriteMode"]], "rocksdbwritemode::bwd_l1_cnflct_miss_write_back (c++ enumerator)": [[13, "_CPPv4N16RocksdbWriteMode29BWD_L1_CNFLCT_MISS_WRITE_BACKE"]], "rocksdbwritemode::flush (c++ enumerator)": [[13, "_CPPv4N16RocksdbWriteMode5FLUSHE"]], "rocksdbwritemode::fwd_l1_eviction (c++ enumerator)": [[13, "_CPPv4N16RocksdbWriteMode15FWD_L1_EVICTIONE"]], "rocksdbwritemode::fwd_rocksdb_read (c++ enumerator)": [[13, "_CPPv4N16RocksdbWriteMode16FWD_ROCKSDB_READE"]], "compact_indices_cuda (c++ function)": [[13, "_CPPv420compact_indices_cudaNSt6vectorI6TensorEE6TensorNSt6vectorI6TensorEE6Tensor6Tensor"]], "cuda_callback_func (c++ function)": [[13, "_CPPv418cuda_callback_func12cudaStream_t11cudaError_tPv"]], "hash_shard (c++ function)": [[13, "_CPPv410hash_shard7int64_t6size_t"]], "kv_db::cachecontext (c++ class)": [[13, "_CPPv4N5kv_db12CacheContextE"]], "kv_db::embeddingkvdb (c++ class)": [[13, "_CPPv4N5kv_db13EmbeddingKVDBE"]], "kv_db::queueitem (c++ struct)": [[13, "_CPPv4N5kv_db9QueueItemE"]], "l2_cache::cachelibcache (c++ class)": [[13, "_CPPv4N8l2_cache13CacheLibCacheE"]], "masked_index_put_cuda (c++ function)": [[13, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t"]], "masked_index_select_cuda (c++ function)": [[13, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t"]], "ps::embeddingparameterserver (c++ class)": [[13, "_CPPv4N2ps24EmbeddingParameterServerE"]], "ssd::embeddingrocksdb (c++ class)": [[13, "_CPPv4N3ssd16EmbeddingRocksDBE"]], "ssd_generate_row_addrs_cuda (c++ function)": [[13, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "ssd_update_row_addrs_cuda (c++ function)": [[13, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "batched_dense_vec_jagged_2d_mul() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.batched_dense_vec_jagged_2d_mul"]], "dense_to_jagged() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.dense_to_jagged"]], "fbgemm_gpu": [[18, "module-fbgemm_gpu"], [19, "module-fbgemm_gpu"], [20, "module-fbgemm_gpu"], [21, "module-fbgemm_gpu"], [22, "module-fbgemm_gpu"]], "jagged_1d_to_dense() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.jagged_1d_to_dense"]], "jagged_2d_to_dense() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.jagged_2d_to_dense"]], "jagged_dense_dense_elementwise_add_jagged_output() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.jagged_dense_dense_elementwise_add_jagged_output"]], "jagged_dense_elementwise_add() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.jagged_dense_elementwise_add"]], "jagged_dense_elementwise_add_jagged_output() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.jagged_dense_elementwise_add_jagged_output"]], "jagged_dense_elementwise_mul() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.jagged_dense_elementwise_mul"]], "jagged_to_padded_dense() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.jagged_to_padded_dense"]], "module": [[18, "module-fbgemm_gpu"], [19, "module-fbgemm_gpu"], [20, "module-fbgemm_gpu"], [21, "module-fbgemm_gpu"], [22, "module-fbgemm_gpu"]], "stacked_jagged_1d_to_dense() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.stacked_jagged_1d_to_dense"]], "stacked_jagged_2d_to_dense() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.stacked_jagged_2d_to_dense"]], "permutepooledembeddings (class in fbgemm_gpu.permute_pooled_embedding_modules)": [[19, "fbgemm_gpu.permute_pooled_embedding_modules.PermutePooledEmbeddings"]], "__call__() (fbgemm_gpu.permute_pooled_embedding_modules.permutepooledembeddings method)": [[19, "fbgemm_gpu.permute_pooled_embedding_modules.PermutePooledEmbeddings.__call__"]], "merge_pooled_embeddings() (in module torch.ops.fbgemm)": [[20, "torch.ops.fbgemm.merge_pooled_embeddings"]], "permute_pooled_embs() (in module torch.ops.fbgemm)": [[20, "torch.ops.fbgemm.permute_pooled_embs"]], "floatorhalftofusednbitrowwisequantizedsbhalf() (in module torch.ops.fbgemm)": [[21, "torch.ops.fbgemm.FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf"]], "asynchronous_complete_cumsum() (in module torch.ops.fbgemm)": [[22, "torch.ops.fbgemm.asynchronous_complete_cumsum"]], "expand_into_jagged_permute() (in module torch.ops.fbgemm)": [[22, "torch.ops.fbgemm.expand_into_jagged_permute"]], "offsets_range() (in module torch.ops.fbgemm)": [[22, "torch.ops.fbgemm.offsets_range"]], "permute_1d_sparse_data() (in module torch.ops.fbgemm)": [[22, "torch.ops.fbgemm.permute_1D_sparse_data"]], "permute_2d_sparse_data() (in module torch.ops.fbgemm)": [[22, "torch.ops.fbgemm.permute_2D_sparse_data"]], "splittablebatchedembeddingbagscodegen (class in fbgemm_gpu.split_table_batched_embeddings_ops_training)": [[23, "fbgemm_gpu.split_table_batched_embeddings_ops_training.SplitTableBatchedEmbeddingBagsCodegen"]], "forward() (fbgemm_gpu.split_table_batched_embeddings_ops_training.splittablebatchedembeddingbagscodegen method)": [[23, "fbgemm_gpu.split_table_batched_embeddings_ops_training.SplitTableBatchedEmbeddingBagsCodegen.forward"]], "set_learning_rate() (fbgemm_gpu.split_table_batched_embeddings_ops_training.splittablebatchedembeddingbagscodegen method)": [[23, "fbgemm_gpu.split_table_batched_embeddings_ops_training.SplitTableBatchedEmbeddingBagsCodegen.set_learning_rate"]], "set_optimizer_step() (fbgemm_gpu.split_table_batched_embeddings_ops_training.splittablebatchedembeddingbagscodegen method)": [[23, "fbgemm_gpu.split_table_batched_embeddings_ops_training.SplitTableBatchedEmbeddingBagsCodegen.set_optimizer_step"]], "split_embedding_weights() (fbgemm_gpu.split_table_batched_embeddings_ops_training.splittablebatchedembeddingbagscodegen method)": [[23, "fbgemm_gpu.split_table_batched_embeddings_ops_training.SplitTableBatchedEmbeddingBagsCodegen.split_embedding_weights"]], "split_optimizer_states() (fbgemm_gpu.split_table_batched_embeddings_ops_training.splittablebatchedembeddingbagscodegen method)": [[23, "fbgemm_gpu.split_table_batched_embeddings_ops_training.SplitTableBatchedEmbeddingBagsCodegen.split_optimizer_states"]], "update_hyper_parameters() (fbgemm_gpu.split_table_batched_embeddings_ops_training.splittablebatchedembeddingbagscodegen method)": [[23, "fbgemm_gpu.split_table_batched_embeddings_ops_training.SplitTableBatchedEmbeddingBagsCodegen.update_hyper_parameters"]], "example_method (c++ function)": [[27, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf"]], "example_method() (in module fbgemm_gpu.docs.examples)": [[29, "fbgemm_gpu.docs.examples.example_method"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["fbgemm-cpp-api/QuantUtils", "fbgemm-cpp-api/tbe_cpu_autovec", "fbgemm-development/BuildInstructions", "fbgemm_gpu-cpp-api/embedding_ops", "fbgemm_gpu-cpp-api/experimental_ops", "fbgemm_gpu-cpp-api/input_combine", "fbgemm_gpu-cpp-api/jagged_tensor_ops", "fbgemm_gpu-cpp-api/layout_transform_ops", "fbgemm_gpu-cpp-api/memory_utils", "fbgemm_gpu-cpp-api/merge_pooled_embeddings", "fbgemm_gpu-cpp-api/quantize_ops", "fbgemm_gpu-cpp-api/sparse_ops", "fbgemm_gpu-cpp-api/split_table_batched_embeddings", "fbgemm_gpu-cpp-api/ssd_embedding_ops", "fbgemm_gpu-development/BuildInstructions", "fbgemm_gpu-development/InstallationInstructions", "fbgemm_gpu-development/TestInstructions", "fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps", "fbgemm_gpu-python-api/jagged_tensor_ops", "fbgemm_gpu-python-api/pooled_embedding_modules", "fbgemm_gpu-python-api/pooled_embedding_ops", "fbgemm_gpu-python-api/quantize_ops", "fbgemm_gpu-python-api/sparse_ops", "fbgemm_gpu-python-api/table_batched_embedding_ops", "general/ContactUs", "general/Contributing", "general/License", "general/documentation/Cpp", "general/documentation/Overview", "general/documentation/Python", "general/documentation/Sphinx", "index"], "filenames": ["fbgemm-cpp-api/QuantUtils.rst", "fbgemm-cpp-api/tbe_cpu_autovec.rst", "fbgemm-development/BuildInstructions.rst", "fbgemm_gpu-cpp-api/embedding_ops.rst", "fbgemm_gpu-cpp-api/experimental_ops.rst", "fbgemm_gpu-cpp-api/input_combine.rst", "fbgemm_gpu-cpp-api/jagged_tensor_ops.rst", "fbgemm_gpu-cpp-api/layout_transform_ops.rst", "fbgemm_gpu-cpp-api/memory_utils.rst", "fbgemm_gpu-cpp-api/merge_pooled_embeddings.rst", "fbgemm_gpu-cpp-api/quantize_ops.rst", "fbgemm_gpu-cpp-api/sparse_ops.rst", "fbgemm_gpu-cpp-api/split_table_batched_embeddings.rst", "fbgemm_gpu-cpp-api/ssd_embedding_ops.rst", "fbgemm_gpu-development/BuildInstructions.rst", "fbgemm_gpu-development/InstallationInstructions.rst", "fbgemm_gpu-development/TestInstructions.rst", "fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps.rst", "fbgemm_gpu-python-api/jagged_tensor_ops.rst", "fbgemm_gpu-python-api/pooled_embedding_modules.rst", "fbgemm_gpu-python-api/pooled_embedding_ops.rst", "fbgemm_gpu-python-api/quantize_ops.rst", "fbgemm_gpu-python-api/sparse_ops.rst", "fbgemm_gpu-python-api/table_batched_embedding_ops.rst", "general/ContactUs.rst", "general/Contributing.rst", "general/License.rst", "general/documentation/Cpp.rst", "general/documentation/Overview.rst", "general/documentation/Python.rst", "general/documentation/Sphinx.rst", "index.rst"], "titles": ["Quantization Utilities", "TBE CPU Autovectorization", "Build Instructions", "Embedding Operators", "Experimental Operators", "Combine Input Operators", "Jagged Tensor Operators", "Layout Transformation Operators", "CUDA Memory Operators", "Pooled Embeddings Operators", "Quantization Operators", "Sparse Data Operators", "Table Batched Embedding Operators", "SSD Embedding Operators", "Build Instructions", "Installation Instructions", "Test Instructions", "Jagged Tensor Operators", "Jagged Tensor Operators", "Pooled Embedding Modules", "Pooled Embedding Operators", "Quantization Operators", "Sparse Operators", "Table Batched Embedding (TBE) Training Module", "Contact Us", "Contributing", "License", "Adding Documentation to C++ Code", "Documentation", "Adding Documentation to Python Code", "Sphinx Documentation Pointers", "FBGEMM and FBGEMM_GPU Documentation Homepage"], "terms": {"templat": [0, 1, 14, 27], "typenam": [0, 1, 27], "t": [0, 2, 4, 8, 11, 14, 20, 22, 23, 25, 27, 28], "layout_t": 0, "layout": [0, 31], "kcx": 0, "void": [0, 3, 8, 10, 12, 13], "quantizegroupwis": 0, "const": [0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 29], "float": [0, 1, 10, 18, 21, 22, 23, 27, 29], "src": 0, "int": [0, 1, 10, 18, 19, 20, 21, 22, 23, 27, 29], "k": [0, 4], "c": [0, 12, 15, 17, 26, 28, 29, 30], "x": [0, 6, 13, 17, 27, 29], "g": [0, 2, 11, 13, 14, 27, 29], "scale": [0, 1, 4, 10], "std": [0, 1, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 27, 29], "int32_t": [0, 1, 27, 29], "zero_point": 0, "dst": 0, "point": [0, 10, 18, 27, 29], "data": [0, 1, 4, 8, 13, 17, 21, 22, 23, 26, 31], "type": [0, 1, 2, 4, 10, 15, 17, 18, 21, 22, 23, 27], "paramet": [0, 1, 4, 8, 10, 11, 13, 18, 19, 20, 21, 22, 23, 27, 28, 29], "output": [0, 1, 4, 6, 10, 11, 13, 18, 19, 20, 21, 22, 23, 27, 29], "int8_t": 0, "uint8_t": [0, 1, 10, 12], "ar": [0, 2, 6, 12, 13, 14, 15, 17, 18, 19, 20, 22, 23, 26, 27, 28, 29], "support": [0, 2, 4, 13, 14, 15, 17, 23, 29, 31], "input": [0, 1, 4, 6, 8, 10, 11, 13, 17, 18, 19, 20, 21, 22, 23, 27, 31], "tensor": [0, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 19, 20, 21, 22, 23, 28, 29, 31], "kxc": 0, "correspond": [0, 11, 12, 13, 17, 22, 23, 27, 29], "kcr": 0, "kctr": 0, "weight": [0, 1, 3, 11, 12, 13, 22, 23], "time": [0, 2, 14, 15, 17], "dimens": [0, 4, 6, 8, 11, 17, 18, 19, 20, 21, 22, 23, 29], "krsc": 0, "ktrsc": 0, "channel": [0, 14, 15, 24], "number": [0, 1, 2, 4, 10, 11, 13, 14, 17, 18, 19, 20, 22, 23, 28], "r": [0, 16, 23, 28], "": [0, 2, 8, 14, 16, 17, 25, 27, 28, 29], "group": [0, 4, 17, 27], "function": [0, 2, 13, 14, 23, 27, 29], "perform": [0, 2, 10, 11, 13, 17, 19, 23, 31], "channelwis": 0, "1": [0, 1, 2, 4, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 28, 29, 30], "groupwis": 0, "per": [0, 17, 23], "size": [0, 2, 4, 8, 10, 11, 17, 18, 19, 20, 22, 23], "should": [0, 10, 11, 12, 14, 15, 17, 25, 27, 28, 29], "equal": [0, 17, 22, 23, 29], "zero": [0, 18, 23, 29], "reprsent": 0, "fusedquantizedequant": 0, "int64_t": [0, 1, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13], "len": [0, 17, 20, 23], "tensorquantizationparam": 0, "qparam": 0, "thread_id": 0, "0": [0, 2, 4, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 29], "num_thread": 0, "noise_ratio": 0, "0f": 0, "fuse": [0, 10, 23], "integ": [0, 8, 10, 17, 22, 23], "dequant": [0, 10], "kernel": [0, 2, 8, 10, 13, 16, 31], "acceler": 0, "awar": 0, "train": [0, 13, 31], "fp32": [0, 1, 10, 21, 23], "valu": [0, 6, 8, 10, 11, 12, 13, 18, 22, 23, 27, 28, 29], "u": [0, 14, 30, 31], "int8": [0, 21], "us": [0, 1, 2, 4, 8, 11, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31], "provid": [0, 2, 13, 14, 15, 16, 22, 26, 27, 28, 29, 31], "back": [0, 8, 12, 14, 15], "inputtyp": 0, "floatorhalftofusednbitrowwisequantizedsbhalf": [0, 21], "bit_rat": [0, 10, 21], "size_t": [0, 10, 13, 27], "input_row": 0, "input_column": 0, "convert": [0, 8, 10, 13, 17, 18, 21, 29], "fp16": [0, 10, 21, 23], "rowwis": [0, 10, 21, 23], "bitrat": 0, "specifi": [0, 2, 4, 10, 11, 13, 14, 18, 23], "bit": [0, 1, 10, 21], "bia": [0, 1, 4, 10], "each": [0, 1, 4, 10, 11, 13, 14, 17, 18, 19, 20, 22, 23, 29], "row": [0, 1, 6, 10, 12, 13, 17, 18, 19, 20, 23, 29], "store": [0, 10, 11, 12, 13], "itself": [0, 17, 28], "end": [0, 1, 15, 17, 30], "can": [0, 1, 2, 10, 11, 13, 14, 15, 17, 22, 23, 27, 28, 29, 30], "4": [0, 10, 14, 15, 17, 18, 19, 20, 21, 22, 23, 29], "8": [0, 10, 14, 17, 19, 20, 21, 22, 23], "uint32_t": 0, "xor128": 0, "random": [0, 21], "gener": [0, 2, 11, 13, 14, 15, 19, 20, 22, 23, 27, 30], "9": [0, 13, 14, 17, 19, 20, 22, 23], "base": [0, 2, 11, 12, 13, 14, 17, 23], "thi": [0, 2, 6, 8, 9, 10, 11, 13, 14, 15, 17, 19, 22, 23, 24, 25, 26, 27, 29, 30, 31], "paper": 0, "findminmax": 0, "m": [0, 14, 15, 16], "min": 0, "max": [0, 4, 23], "find": [0, 12, 14], "matrix": [0, 2, 18, 31], "bool": [0, 1, 4, 8, 9, 10, 12, 13, 23], "a_symmetr": 0, "b_symmetr": 0, "quantizationgranular": 0, "q_gran": 0, "has_bia": 0, "fuse_relu": 0, "bias_typ": 0, "direct": [0, 12, 15, 26, 27, 29, 30], "fals": [0, 1, 8, 13, 23, 28], "requantizeoutputprocessingavx2": 0, "out": [0, 1, 14, 24, 26, 28], "inp": 0, "block_type_t": 0, "block": [0, 1, 22, 27, 29, 30], "ld_out": 0, "ld_in": 0, "requantizationparams_t": 0, "requant": 0, "avx2": [0, 2], "i": [0, 1, 2, 4, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29, 30, 31], "c_per_g": 0, "requantizeoutputprocessinggconvavx512": 0, "avx512": 0, "intyp": 1, "indextyp": 1, "offsettyp": 1, "outtyp": 1, "embeddingspmdm_autovec": 1, "block_siz": 1, "output_s": [1, 11, 22], "index_s": 1, "data_s": 1, "indic": [1, 3, 12, 13, 17, 20, 22, 23], "offsets_or_length": 1, "normalize_by_length": 1, "is_weight_posit": 1, "use_offset": 1, "true": [1, 8, 13, 23], "output_strid": 1, "input_strid": 1, "scale_bias_last": [1, 10], "no_bag": 1, "is_bf16_out": 1, "is_bf16_in": 1, "version": [1, 2, 15], "embeddingspmdm_ref": 1, "index": [1, 11, 12, 13, 14, 15, 17, 22, 23, 27, 29], "offset": [1, 3, 6, 11, 12, 18, 20, 22, 23], "element": [1, 10, 12, 13, 17, 22, 23], "address": [1, 2, 13, 14], "sum": [1, 4, 11, 13, 18, 19, 20, 22, 23], "option": [1, 2, 3, 6, 8, 12, 14, 18, 19, 22, 23], "null": 1, "non": [1, 4, 8, 23], "whether": [1, 4, 8, 13, 14, 26], "normal": [1, 17], "length": [1, 4, 6, 11, 13, 18, 19, 22, 23, 29], "If": [1, 2, 13, 14, 15, 22, 23, 25, 27, 28, 29], "posit": [1, 4, 11, 13, 19, 20, 22], "set": [1, 8, 12, 13, 16, 17, 18, 23], "instead": [1, 14, 28], "same": [1, 2, 4, 8, 11, 14, 17, 18, 19, 20, 22, 23, 27, 28, 29], "appear": [1, 15], "embed": [1, 2, 14, 15, 22, 28, 31], "bag": [1, 11, 22, 23, 31], "bfloat16": [1, 10], "embeddingspmdmfp8_autovec": 1, "exponent_bit": 1, "exponent_bia": [1, 10], "expon": 1, "note": [2, 12, 14, 15, 22, 23, 27, 28, 29, 30], "The": [2, 4, 8, 10, 11, 13, 15, 16, 17, 18, 19, 20, 22, 23, 25, 27, 28, 29, 30], "most": [2, 14, 15, 17, 28], "date": [2, 14, 15, 28], "script": [2, 14, 15, 28], "bundl": [2, 14, 15, 28], "repo": [2, 14, 15, 28, 29], "under": [2, 14, 15, 25, 26, 28, 29], "setup_env": [2, 14, 15, 28], "bash": [2, 14, 15, 28], "step": [2, 13, 14, 15, 17, 23, 28, 29], "fbgemm_gpu": [2, 8, 14, 17, 19, 23, 24, 25, 26, 27, 29], "follow": [2, 11, 14, 15, 17, 22, 23, 26, 27, 28, 29], "toolchain": [2, 14, 15], "run": [2, 14, 15, 19, 28], "cpu": [2, 8, 9, 16, 22, 23, 28, 31], "higher": 2, "In": [2, 11, 13, 14, 15, 17, 25, 27, 29], "doe": [2, 3, 15, 27, 28, 29], "have": [2, 10, 11, 12, 14, 17, 22, 23, 28], "ani": [2, 11, 14, 18, 22, 25, 26, 28, 29], "intel": 2, "mkl": 2, "howev": [2, 14, 17, 26], "comparison": 2, "some": [2, 14, 17, 28], "benchmark": 2, "found": [2, 14, 15, 28], "path": [2, 13, 14, 16, 27, 30], "through": [2, 25, 27, 29], "intel_mkl_dir": 2, "variabl": [2, 23], "built": [2, 14, 15, 28, 31], "report": [2, 15, 23], "otherwis": [2, 8, 13, 15, 23, 26], "subset": 2, "all": [2, 11, 12, 13, 14, 15, 17, 19, 20, 23, 26, 28], "three": [2, 17], "git": [2, 14], "submodul": [2, 14], "custom": [2, 30], "desir": [2, 14, 17, 18, 27], "thei": [2, 14, 28, 30], "asmjit_src_dir": 2, "cpuinfo_src_dir": 2, "googletest_source_dir": 2, "With": 2, "inner": [2, 17], "take": [2, 14], "one": [2, 4, 10, 11, 12, 14, 18, 23, 27, 29], "doesn": 2, "fit": [2, 26], "approach": 2, "so": [2, 11, 14, 15, 16, 17, 19, 20], "implement": [2, 4, 10, 13, 14, 17, 23], "dynam": 2, "effici": [2, 31], "shape": [2, 4, 17, 19, 20, 22, 23], "specif": [2, 11, 13, 14, 23, 26], "vector": [2, 5, 6, 7, 8, 9, 13, 18, 29], "code": [2, 13, 14, 26, 28], "third": 2, "parti": 2, "call": [2, 8, 13, 15], "detect": [2, 16], "runtim": [2, 14], "pytorch": [2, 13, 17, 24, 28, 29, 31], "project": [2, 25], "dispatch": [2, 8], "optim": [2, 10, 13, 23], "test": [2, 10, 14, 15, 25, 31], "you": [2, 25, 27, 29], "don": [2, 11, 14, 28], "want": [2, 25], "togeth": [2, 27, 28], "default": [2, 11, 14, 15, 23], "turn": [2, 28], "off": [2, 15, 24], "simpli": [2, 14], "fbgemm_build_test": 2, "conda": [2, 16, 28], "For": [2, 16, 17, 22, 24, 26, 27, 28, 29, 30], "platform": [2, 14, 26], "machin": [2, 14, 15, 16, 31], "microsoft": [2, 10], "visual": 2, "studio": 2, "2019": 2, "newer": [2, 14], "recommend": [2, 6, 10, 14, 15, 17, 22], "here": [2, 8, 14, 15, 25, 27, 28, 29, 30], "necessari": [2, 14, 23], "ninja": [2, 14], "etc": [2, 14, 23], "n": [2, 10, 14, 15, 30], "env_nam": [2, 14, 15], "y": [2, 6, 14, 15, 18, 28], "doxygen": [2, 27, 28], "make": [2, 12, 14, 25, 27, 28, 29], "openbla": 2, "packag": [2, 14, 16, 28], "onli": [2, 4, 10, 11, 12, 13, 16, 17, 23, 25, 27, 28, 30], "clone": [2, 14], "along": [2, 14, 15, 19, 20, 22], "its": [2, 8, 10, 11, 14, 22, 26, 28, 30], "insid": [2, 13, 14, 15, 16, 28, 30], "recurs": [2, 14], "http": [2, 14, 15, 25, 27, 28, 29], "github": [2, 14, 25], "com": [2, 14, 25], "cd": [2, 14, 16, 28], "assum": [2, 11, 23], "process": [2, 6, 13, 15, 17, 25, 29], "straightforward": 2, "creat": [2, 8, 14, 17, 25, 27, 29, 30], "directori": [2, 14, 16, 25, 27, 28], "mkdir": 2, "argument": [2, 11, 27, 28, 29], "build_arg": 2, "duse_sanit": 2, "dfbgemm_library_typ": 2, "share": [2, 8], "dpython_execut": 2, "which": [2, 11, 13, 14, 15, 17, 20, 22, 28], "python3": [2, 15], "document": [2, 8, 25, 26], "dfbgemm_build_doc": 2, "ON": [2, 26], "j": [2, 17], "verbos": 2, "As": [2, 11, 14, 15, 17], "write": [2, 13, 14, 15, 28, 29], "fail": [2, 15, 16, 27], "due": [2, 14], "known": [2, 14, 23], "regress": 2, "To": [2, 13, 14, 16, 30], "work": [2, 14, 15, 17, 25], "around": 2, "append": [2, 14, 27, 29], "export": [2, 14, 16], "prior": [2, 14, 15, 26], "cflag": 2, "wno": 2, "error": [2, 10, 15, 23, 27, 28, 29], "mayb": 2, "uniniti": 2, "restrict": 2, "cxxflag": 2, "pleas": [2, 25, 27, 29], "see": [2, 8, 14, 15, 17, 27, 29, 30], "77939": 2, "1094": 2, "1666": 2, "more": [2, 8, 14, 22, 23, 27, 29, 30], "detail": [2, 13, 15], "exactli": 2, "extra": 2, "need": [2, 13, 14, 15, 16, 17, 25, 27, 29, 30], "ad": [2, 14, 25, 28], "invoc": [2, 14, 28], "llvm": [2, 14], "standard": [2, 14], "libc": [2, 14], "openmp": [2, 14], "libomp": 2, "locat": [2, 8, 12, 13, 14, 17], "cc_path": 2, "cxx_path": 2, "dcmake_c_compil": 2, "dcmake_cxx_compil": 2, "dcmake_c_flag": [2, 14], "fopenmp": 2, "stdlib": [2, 14], "conda_prefix": [2, 14], "includ": [2, 9, 13, 14, 26, 27, 29], "dcmake_cxx_flag": [2, 14], "likewis": 2, "also": [2, 13, 14, 23, 30], "veri": [2, 14, 27, 28, 29], "target": [2, 8, 10, 11, 14, 17, 20, 27, 28, 29, 30], "architectur": [2, 14, 15], "bc": [2, 14], "x64": 2, "program": [2, 25], "file": [2, 14, 15, 24, 25, 27, 28, 29, 30], "x86": [2, 31], "enterpris": 2, "vc": 2, "auxiliari": 2, "vcvarsal": 2, "bat": 2, "build_dir": 2, "dfbgemm_build_benchmark": 2, "dcmake_build_typ": 2, "releas": [2, 15], "cl": 2, "ex": 2, "v": [2, 4, 6, 16, 18], "int_nbit_split_embedding_codegen_lookup_funct": 3, "dev_weight": [3, 12], "uvm_weight": [3, 12], "weights_plac": [3, 12], "weights_offset": [3, 12], "weights_ti": [3, 12], "d_offset": [3, 10, 12], "total_d": [3, 12, 23], "max_int2_d": 3, "max_int4_d": 3, "max_int8_d": 3, "max_float16_d": 3, "max_float32_d": 3, "pooling_mod": [3, 23], "indice_weight": 3, "output_dtyp": [3, 10, 23], "lxu_cache_weight": [3, 12, 13], "lxu_cache_loc": [3, 12, 13], "row_align": [3, 12], "max_float8_d": 3, "fp8_exponent_bit": 3, "fp8_exponent_bia": 3, "int_nbit_split_embedding_uvm_caching_codegen_lookup_funct": 3, "cache_hash_size_cumsum": [3, 12], "total_cache_hash_s": [3, 12], "cache_index_table_map": [3, 12], "lxu_cache_st": [3, 12], "lxu_stat": 3, "simlar": 3, "uvm_cach": 3, "lookup": [3, 12, 13, 23], "pruned_hashmap_lookup_cuda": 3, "hash_tabl": 3, "hash_table_offset": 3, "pruned_array_lookup_cuda": 3, "index_remap": 3, "index_remappings_offset": 3, "bounds_check_indices_cuda": 3, "rows_per_t": 3, "bounds_check_mod": [3, 23], "warn": [3, 23, 27], "b_ofset": 3, "max_b": [3, 12], "int_nbit_split_embedding_codegen_lookup_function_cpu": 3, "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu": 3, "pruned_hashmap_insert_unweighted_cpu": 3, "dense_indic": 3, "pruned_hashmap_lookup_unweighted_cpu": 3, "pruned_array_lookup_cpu": 3, "tupl": [4, 5, 6, 11, 12, 13, 22, 23], "gqa_attn_splitk": 4, "xq": 4, "cache_k": 4, "cache_v": 4, "seq_posit": 4, "doubl": [4, 6, 10, 11], "qk_scale": 4, "num_split_k": 4, "kv_cache_quant_num_group": 4, "use_tensor_cor": 4, "cache_logical_dtype_int": 4, "decod": 4, "queri": 4, "split": [4, 23], "w": [4, 16], "bf16": [4, 10], "int4": [4, 10, 21], "kv": 4, "cuda": [4, 9, 19, 20, 21, 22, 23, 31], "gqa": 4, "cach": [4, 12, 13, 14, 23], "It": [4, 13, 14, 15, 17, 19, 20, 22], "current": [4, 13, 14, 15, 17, 23], "context": 4, "16384": 4, "fix": [4, 11, 23], "head": 4, "128": 4, "an": [4, 8, 11, 13, 15, 16, 17, 19, 20, 21, 22, 23, 27, 28, 29, 30], "arbitrari": [4, 13], "b": [4, 11, 14, 17, 18, 22, 23, 27, 28, 29, 30], "h_q": 4, "d": [4, 17, 18, 30], "where": [4, 6, 8, 11, 13, 17, 18, 19, 20, 22, 23], "batch": [4, 6, 11, 17, 18, 19, 20, 22, 31], "num": [4, 22], "max_t": 4, "h_kv": 4, "sequenc": [4, 22, 23], "contain": [4, 8, 13, 14, 17, 18, 19, 20, 22, 23, 29], "actual": [4, 14, 22], "token": [4, 17], "appli": [4, 11, 14, 17, 23], "after": [4, 11, 13, 14, 15, 16, 17, 22, 28, 29, 30], "qk": 4, "control": [4, 23], "amount": [4, 22, 23], "parallel": [4, 13], "wise": [4, 17, 23], "fp8": [4, 10], "quantiz": [4, 31], "singl": [4, 8, 10, 13], "now": 4, "core": 4, "wmma": 4, "instruct": [4, 25, 27, 28, 29, 31], "fast": 4, "kv_cach": 4, "2": [4, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 27, 29, 30], "return": [4, 8, 10, 11, 13, 18, 19, 20, 21, 22, 23, 27, 28, 29], "A": [4, 8, 10, 13, 14, 15, 17, 18, 19, 20, 22, 23, 26, 27, 28, 29], "combin": [4, 31], "metadata": [4, 13, 23], "softmax": 4, "tbe_input_combine_cpu": 5, "indices_list": 5, "offsets_list": 5, "per_sample_weight": [5, 23], "include_last_offset": 5, "padding_fused_tbe_input_combine_cpu": 5, "batch_siz": [5, 19, 20], "solv": 6, "issu": [6, 8, 14, 15, 24], "when": [6, 11, 13, 14, 16, 17, 23, 27, 28, 30], "differ": [6, 11, 13, 17, 20, 22, 23], "often": 6, "occur": [6, 13, 27], "spars": [6, 17, 23, 31], "featur": [6, 11, 14, 17, 19, 20, 22, 23, 24], "system": [6, 14, 15, 17], "well": [6, 11, 14, 27], "natur": [6, 17], "languag": [6, 17, 30], "jagged_to_padded_dense_forward": 6, "c10": [6, 10], "symintarrayref": 6, "max_length": [6, 18], "padding_valu": [6, 18], "jagged_dense_elementwise_add_jagged_output_cuda": 6, "x_valu": [6, 18], "x_offset": [6, 18, 29], "dens": [6, 18, 22, 29], "jagged_to_padded_dens": [6, 18], "jagged_dense_elementwise_add": [6, 18], "jagged_dense_elementwise_mul": [6, 18], "batched_dense_vec_jagged_2d_mul": [6, 18], "a_valu": [6, 18], "a_offset": [6, 18], "dense_to_jag": [6, 18], "symint": 6, "total_l": [6, 18], "jagged_dense_elementwise_add_jagged_output": [6, 18], "jagged_1d_to_dens": [6, 18], "max_l": 6, "jagged_2d_to_dens": [6, 14, 15, 18, 28, 29], "max_sequence_length": [6, 18, 29], "recat_embedding_grad_output_cuda": 7, "grad_output": 7, "num_features_per_rank": 7, "recat_embedding_grad_output_mixed_d_cuda": 7, "dim_sum_per_rank": 7, "recat_embedding_grad_output_mixed_d_batch_cuda": 7, "cumsum_dim_sum_per_rank": 7, "recat_embedding_grad_output_mixed_d_cpu": 7, "new_managed_tensor": 8, "self": [8, 13], "alloc": [8, 23, 27], "unifi": [8, 23], "manag": [8, 14, 15, 23], "uvm": [8, 16, 23], "Then": 8, "prefer": [8, 13, 15], "storag": [8, 10, 12, 13], "host": [8, 14, 20, 22, 23], "establish": 8, "map": [8, 11, 12, 13, 17, 22, 23], "devic": [8, 9, 14, 19, 20, 21, 22, 23], "new": [8, 10, 12, 27, 28, 29], "new_managed_tensor_meta": 8, "placehold": 8, "meta": [8, 26], "kei": [8, 13, 23], "empti": [8, 17, 18, 30], "new_host_mapped_tensor": 8, "new_unified_tensor": 8, "is_host_map": 8, "either": [8, 10, 11, 13, 14, 15, 21], "depend": [8, 10, 14, 15, 17], "new_unified_tensor_meta": 8, "new_vanilla_managed_tensor": 8, "allow": [8, 14], "automat": [8, 11, 16, 22, 28], "uvm_storag": 8, "check": [8, 23], "gpu": [8, 13, 14, 15, 16, 22, 23, 29, 31], "is_uvm_tensor": 8, "BUT": [8, 26], "uvm_to_cpu": 8, "effect": [8, 17], "move": [8, 13, 19, 20, 22], "from": [8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 22, 23, 25, 26, 27, 28, 29, 30], "uvm_to_devic": 8, "prototyp": 8, "whose": 8, "uvm_cuda_mem_advis": 8, "cuda_memory_advis": 8, "cudamemadvis": 8, "cudamemoryadvis": 8, "enum": [8, 10, 13], "avail": [8, 14, 15, 16, 23, 28], "python": [8, 13, 14, 16, 27, 28, 30], "side": [8, 13, 14, 27, 29, 31], "namespac": 8, "over": [8, 14, 22], "valid": 8, "inform": [8, 15, 17, 23, 29, 30], "uvm_cuda_mem_prefetch_async": 8, "device_t": 8, "cudamemprefetchasync": 8, "prefetch": [8, 13, 23], "destin": 8, "uvm_mem_advice_dont_fork": 8, "madvis": 8, "madv_dontfork": 8, "workaround": 8, "driver": [8, 14], "un": 8, "page": [8, 25, 30, 31], "tabl": [8, 11, 17, 31], "fork": [8, 25], "caus": [8, 14, 15, 22, 26, 28], "slowdown": 8, "next": [8, 13, 17, 27, 29], "access": [8, 13, 23], "uvm_to_cpu_clon": 8, "copi": 8, "contigu": [8, 11, 22], "thread": [8, 13], "memcpi": 8, "section": [9, 14, 15, 29], "variou": 9, "all_to_one_devic": 9, "inputtensor": 9, "target_devic": [9, 20], "permute_pooled_embs_split_gpu": 9, "pooled_emb": [9, 19, 20], "offset_dim_list": [9, 20], "permute_list": [9, 20], "inv_offset_dim_list": [9, 20], "inv_permute_list": [9, 20], "permute_pooled_embs_auto_grad_split_gpu": 9, "permute_pooled_embs_auto_grad_gpu": 9, "permute_pooled_embs_cpu_impl": 9, "allow_dupl": 9, "permute_pooled_embs_split_cpu": 9, "permute_pooled_embs_auto_grad_split_cpu": 9, "permute_pooled_embs_auto_grad": [9, 20], "permute_pooled_embs_auto_grad_cpu": 9, "model": [10, 11], "techniqu": 10, "reduc": [10, 13], "larg": [10, 14], "order": [10, 17, 23, 25], "achiev": [10, 15], "better": [10, 13, 27], "small": 10, "loss": [10, 26], "accuraci": 10, "_float_to_bfloat16_gpu": 10, "brain": 10, "_bfloat16_to_float_gpu": 10, "_float_to_fp8rowwise_gpu": 10, "forward": [10, 23], "dtype": [10, 19, 20, 21, 22, 23], "sparsetyp": [10, 23], "throw": [10, 23, 27], "_fp8rowwise_to_float_gpu": 10, "represent": [10, 17], "_float_to_fused8bitrowwise_gpu": 10, "_half_to_fused8bitrowwise_gpu": 10, "half": [10, 21], "_single_or_half_precision_to_fused8bitrowwise_gpu": 10, "_fused8bitrowwise_to_float_gpu": 10, "_fused8bitrowwise_to_half_gpu": 10, "_fused8bitrowwise_to_single_or_half_precision_gpu": 10, "quant_padding_float_typ": 10, "_fused8bitrowwise_to_float_mixed_dim_gpu": 10, "kfloat": 10, "khalf": 10, "_float_to_fusednbitrowwise_gpu": 10, "_half_to_fusednbitrowwise_gpu": 10, "_single_or_half_precision_to_fusednbitrowwise_gpu": 10, "_fusednbitrowwise_to_float_gpu": 10, "_fusednbitrowwise_to_half_gpu": 10, "_fusednbitrowwise_to_single_or_half_precision_gpu": 10, "_float_to_hfp8_gpu": 10, "ebit": 10, "max_po": 10, "hybrid": 10, "hfp8": 10, "_hfp8_to_float_gpu": 10, "_float_to_msfp_gpu": 10, "bounding_box_s": 10, "mbit": 10, "min_po": 10, "msfp": 10, "_msfp_to_float_gpu": 10, "_float_to_paddedfp8rowwise_gpu": 10, "row_dim": 10, "pad": [10, 13, 17, 18, 29], "_paddedfp8rowwise_to_float_gpu": 10, "output_last_dim": 10, "_fused8bitrowwise_to_float_cpu_out": 10, "_float_to_fused8bitrowwise_cpu_out": 10, "float_to_fused8bitrowwise_cpu": 10, "half_to_fused8bitrowwise_cpu": 10, "float_or_half_to_fused8bitrowwise_cpu": 10, "fused8bitrowwise_to_float_cpu": 10, "fused8bitrowwise_to_half_cpu": 10, "fused8bitrowwise_to_float_or_half_cpu": 10, "float_to_fp8rowwise_cpu": 10, "fp8rowwise_to_float_cpu": 10, "fusednbitrowwise_to_float_cpu": 10, "fusednbitrowwise_sbfront_to_float_cpu": 10, "int2": [10, 21], "front": 10, "float32": [10, 19, 20, 21], "torch": [10, 13, 14, 15, 18, 19, 20, 21, 22, 23, 28, 29], "quint4x2": 10, "quint2x4": 10, "quantizedcpu": 10, "backend": [10, 31], "purpos": [10, 17, 18, 23, 26], "becaus": [10, 14, 17, 23], "refer": [10, 14, 17, 22, 28, 29], "rate": [10, 21, 23], "hold": [10, 13, 17], "fusednbitrowwise_to_half_cpu": 10, "fusednbitrowwise_to_float_or_half_cpu": 10, "floattofp8quantized_ref": 10, "nrow": 10, "ncol": 10, "fp8quantizedtofloat_ref": 10, "expand_into_jagged_permute_cuda": 11, "permut": [11, 19, 20, 22], "input_offset": [11, 22], "output_offset": [11, 22], "expand_into_jagged_permut": [11, 22], "expand": [11, 22], "case": [11, 14, 15, 17, 22, 25], "ha": [11, 13, 15, 17, 20, 22, 23, 25, 27, 28], "across": [11, 14, 19, 20, 22], "rank": [11, 17, 22, 23], "level": [11, 22], "exclus": [11, 13, 22], "op": [11, 15, 18, 20, 21, 22, 29], "sit": [11, 22], "we": [11, 13, 14, 17, 25], "deriv": [11, 17, 22, 26], "arrai": [11, 18, 22, 29], "comput": [11, 14, 15, 20, 22, 23], "formula": [11, 22], "output_permut": [11, 22], "table_offset": 11, "bag_offset": [11, 22], "histogram_binning_calibration_cpu": 11, "logit": 11, "bin_num_exampl": 11, "bin_num_posit": 11, "positive_weight": 11, "lower_bound": 11, "upper_bound": 11, "bin_ctr_in_use_aft": 11, "bin_ctr_weight_valu": 11, "divid": [11, 17], "predict": 11, "rang": [11, 13, 17, 22], "e": [11, 13, 14, 17, 19, 20, 27, 29, 30], "bin": [11, 14], "two": [11, 17, 18, 22, 23, 28], "exampl": [11, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 27, 28, 29, 30], "fall": [11, 14, 15], "bucket": [11, 14], "basic": [11, 13, 29], "histogram": 11, "result": [11, 13, 14, 18], "statist": [11, 23], "real": 11, "ctr": 11, "num_po": 11, "num_exampl": 11, "final": 11, "calibr": 11, "pre": [11, 15], "cali": 11, "wai": [11, 22, 26], "within": [11, 23], "suffici": [11, 25, 28], "That": 11, "fine": 11, "grain": 11, "modul": [11, 14, 15, 29], "theoret": 11, "layer": [11, 13], "uncalibr": 11, "befor": [11, 13, 14, 30], "sigmoid": 11, "calibart": 11, "pass": [11, 23, 25, 28], "lower": 11, "bound": [11, 17, 23], "calibration_target": 11, "observ": 11, "statisct": 11, "final_calibrated_predict": 11, "bin_ctr_weight": 11, "bin_ctr": 11, "calibrated_predict": 11, "bin_id": 11, "generic_histogram_binning_calibration_by_feature_cpu": 11, "segment_valu": 11, "segment_length": 11, "num_seg": 11, "bin_boundari": 11, "extens": [11, 27, 28], "ectr": 11, "abov": [11, 13, 15, 17, 26, 27, 29, 30], "accept": [11, 25], "sort": [11, 12, 13, 14], "keyjaggedtensor": 11, "num_bin": 11, "longer": [11, 24, 27], "still": [11, 14], "parambin_ctr_weight_valu": 11, "get_unique_indices_cuda": 12, "linear_indic": 12, "max_indic": 12, "compute_count": 12, "dedupl": 12, "get_unique_indices_with_inverse_cuda": 12, "compute_inverse_indic": 12, "lru_cache_find_uncached_cuda": 12, "unique_indic": 12, "unique_indices_length": [12, 13], "time_stamp": 12, "lru_stat": 12, "gather_cache_stat": 12, "uvm_cache_stat": 12, "lock_cache_lin": 12, "lxu_cache_locking_count": 12, "lru": [12, 13, 23], "uncach": [12, 13], "them": 12, "host_lxu_cache_slot": 12, "h_in": 12, "cache_set": [12, 23], "linearize_cache_indices_cuda": 12, "b_offset": 12, "indices_base_offset": 12, "linear": [12, 13], "uniqu": [12, 13, 23, 30], "linearize_cache_indices_from_row_idx_cuda": 12, "update_table_indic": 12, "update_row_indic": 12, "format": [12, 19, 20, 28, 29], "inplac": 12, "updat": [12, 13, 14, 15, 16, 23, 25], "lru_cache_populate_cuda": 12, "hash_size_cumsum": 12, "linear_cache_indic": 12, "stochastic_round": [12, 23], "fetch": [12, 13], "insert": [12, 13, 30], "timestep": 12, "lru_cache_populate_byte_cuda": 12, "byte": [12, 13, 21], "direct_mapped_lru_cache_populate_byte_cuda": 12, "lxu_cache_miss_timestamp": 12, "assoc": 12, "variant": [12, 14, 15, 28], "lfu_cache_populate_cuda": 12, "lfu_stat": 12, "lfu": [12, 23], "lfu_cache_populate_byte_cuda": 12, "lxu_cache_lookup_cuda": 12, "invalid_index": 12, "num_uniq_cache_indic": 12, "lxu_cache_locations_output": 12, "look": [12, 23], "up": [12, 13, 16, 23], "slot": [12, 13], "sentinel": [12, 13], "miss": [12, 13, 14], "direct_mapped_lxu_cache_lookup_cuda": 12, "lxu_cache_flush_cuda": 12, "flush": [12, 13], "reset_weight_momentum_cuda": 12, "momentum1_dev": 12, "momentum1_uvm": 12, "momentum1_plac": 12, "momentum1_offset": 12, "pruned_indic": 12, "pruned_indices_offset": 12, "logical_table_id": 12, "buffer_id": 12, "lxu_cache_locking_counter_decrement_cuda": 12, "decrement": 12, "counter": [12, 23], "lxu_cache_locations_update_cuda": 12, "lxu_cache_locations_new": 12, "rocksdbwritemod": 13, "rocksdb": 13, "mode": [13, 16, 23], "offload": 13, "3": [13, 14, 15, 17, 18, 19, 20, 22, 23, 26, 29], "iter": 13, "fwd_rocksdb_read": 13, "l2": [13, 23], "fwd": 13, "fwd_l1_evict": 13, "l1": 13, "eviciton": 13, "evict": 13, "bwd_l1_cnflct_miss_write_back": 13, "conflict": 13, "bwd": 13, "fill": 13, "potenti": 13, "trigger": 13, "onc": [13, 15, 25], "full": [13, 14, 15, 30], "addition": 13, "do": [13, 14, 15, 23, 25], "io": 13, "enumer": [13, 20], "inlin": [13, 30], "hash_shard": 13, "id": [13, 15], "num_shard": 13, "hash": 13, "shard": 13, "algorithm": [13, 23], "cuda_callback_func": 13, "cudastream_t": 13, "stream": [13, 14, 23], "cudaerror_t": 13, "statu": 13, "functor": 13, "callback": 13, "cudastreamaddcallback": 13, "common": [13, 14, 15, 17, 29], "cudastreamcallback_t": 13, "cast": 13, "invok": [13, 14, 19, 22, 23, 28], "delet": 13, "anoth": [13, 30], "none": [13, 19, 22, 23], "masked_index_put_cuda": 13, "count": 13, "use_pipelin": 13, "preferred_sm": 13, "similar": [13, 14, 17, 23], "index_put": 13, "ignor": [13, 16, 23, 28], "2d": [13, 17, 18, 19, 20, 22, 23, 29], "put": [13, 28], "equival": [13, 17], "filter_": 13, "indices_": 13, "nonzero": 13, "flatten": 13, "1d": [13, 18, 22, 23, 29], "flag": [13, 14, 28], "overlap": 13, "other": [13, 15, 17, 22, 26, 27, 28, 29], "fraction": 13, "sm": 13, "resourc": 13, "competit": 13, "masked_index_select_cuda": 13, "index_select": 13, "ssd_generate_row_addrs_cuda": 13, "assigned_cache_slot": 13, "linear_index_inverse_indic": 13, "unique_indices_count_cumsum": 13, "cache_set_inverse_indic": 13, "inserted_ssd_weight": 13, "cache_set_sorted_unique_indic": 13, "memori": [13, 15, 23, 31], "tbe": [13, 31], "retriev": 13, "scratch": [13, 15], "hbm": [13, 23], "lxu": 13, "associ": 13, "enabl": [13, 14, 16, 23], "conveni": 13, "first": [13, 14, 22, 27, 29, 30], "pointer": [13, 28], "moreov": 13, "list": [13, 14, 17, 18, 19, 20, 23, 26, 27, 29], "post": 13, "backward": [13, 23], "origin": 13, "being": [13, 14, 28], "prefix": [13, 14, 30], "ssd_update_row_addrs_cuda": 13, "ssd_row_addrs_curr": 13, "inserted_ssd_weights_curr_next_map": 13, "lxu_cache_locations_curr": 13, "linear_index_inverse_indices_curr": 13, "unique_indices_count_cumsum_curr": 13, "cache_set_inverse_indices_curr": 13, "inserted_ssd_weights_next": 13, "unique_indices_length_curr": 13, "pipelin": [13, 23], "dure": [13, 14, 17, 23, 29], "reloc": 13, "correct": [13, 14], "between": [13, 17, 27, 28, 30], "been": [13, 14, 27], "compact_indices_cuda": 13, "compact_indic": 13, "compact_count": 13, "mask": 13, "compact": 13, "given": [13, 14, 17, 18], "operat": 13, "remov": 13, "7": [13, 14, 15, 17, 18, 19, 20, 22, 23], "5": [13, 14, 17, 19, 20, 22, 23], "repres": [13, 17, 19, 20, 22, 23], "keep": [13, 14], "class": [13, 19, 23, 28, 29], "cachelibcach": 13, "cachelib_cach": 13, "h": [13, 14, 18, 27], "cachelib": 13, "wrapper": 13, "cachlib": 13, "interact": 13, "maintain": 13, "relat": [13, 17], "initi": 13, "state": [13, 14, 23], "logic": [13, 17, 27], "caller": 13, "reset": 13, "captur": 13, "delai": 13, "markus": 13, "boost": 13, "get": 13, "handl": [13, 17], "read": [13, 17], "done": [13, 14, 15], "embeddingparameterserv": 13, "public": [13, 25, 28], "embeddingkvdb": 13, "ps_table_batched_embed": 13, "servic": [13, 26], "tp": 13, "client": 13, "cachecontext": 13, "kv_db_table_batched_embed": 13, "l2cach": 13, "num_miss": 13, "cached_addr_list": 13, "prealloc": 13, "invalid": [13, 23], "spot": 13, "stai": 13, "struct": 13, "queueitem": 13, "queue": 13, "item": [13, 19, 20, 22, 29], "background": 13, "param": [13, 27, 29], "read_handl": 13, "abstract": 13, "pair": [13, 30], "later": [13, 14], "separ": [13, 23, 28], "get_cach": 13, "monitor": 13, "checkout": 13, "explan": 13, "enable_shared_from_thi": 13, "execut": [13, 15, 16], "dram": [13, 23], "remot": 13, "scalabl": 13, "without": [13, 14, 22, 26], "blow": 13, "subclass": 13, "embeddingrocksdb": 13, "ssd_table_batched_embed": 13, "fbgemm": [14, 15, 18, 20, 21, 22, 24, 25, 26, 28, 29], "experiment": [14, 15, 31], "reproduc": [14, 15, 25, 26], "platform_nam": 14, "unam": 14, "miniconda_prefix": 14, "home": 14, "download": [14, 15], "wget": 14, "q": 14, "anaconda": 14, "miniconda3": 14, "latest": 14, "sh": 14, "o": [14, 15], "p": [14, 20], "load": [14, 17, 29], "shortcut": 14, "bashrc": 14, "command": [14, 15, 27, 28], "against": [14, 16], "env": [14, 15], "name": [14, 15, 23, 26, 27, 29], "python_vers": 14, "12": [14, 17, 19, 20, 22, 23], "upgrad": 14, "pyopenssl": 14, "22": [14, 17, 19, 20], "requir": [14, 15, 16, 17, 23, 28, 29], "recent": [14, 15, 23], "nvcc": 14, "capabl": [14, 16], "bare": 14, "metal": 14, "neither": [14, 26], "nor": [14, 26], "nvidia": [14, 23], "present": [14, 29], "sinc": [14, 17], "pull": [14, 15, 28], "linux": [14, 15], "distribut": [14, 26], "ubuntu": 14, "04": 14, "11": [14, 15, 17, 19, 20, 21], "entrypoint": 14, "devel": 14, "ubuntu22": 14, "rest": [14, 15], "mai": [14, 15, 17, 22, 26], "construct": [14, 15, 17], "mechan": 14, "nvml": 14, "org": [14, 15, 29], "cuda_vers": 14, "label": 14, "verifi": [14, 15, 27, 29], "cuda_runtim": 14, "libnvidia": [14, 15], "ml": [14, 15], "libnccl": [14, 16], "printenv": 14, "extract": 14, "url": [14, 15], "builder": 14, "blob": 14, "main": [14, 25], "install_cuda": 14, "cudnn_url": 14, "redist": 14, "x86_64": 14, "26_cuda12": 14, "archiv": 14, "tar": 14, "xz": 14, "unpack": 14, "xvf": 14, "applic": [14, 15, 23, 27, 29], "alreadi": [14, 15, 25, 27, 29], "repositori": [14, 25], "cmake": 14, "configur": [14, 27], "amd": [14, 15], "minim": 14, "6": [14, 15, 17, 19, 20, 22], "termin": 14, "while": [14, 28], "come": [14, 15], "reason": [14, 15, 28], "oper": [14, 15, 16, 23], "guid": [14, 29], "disabl": 14, "apt": 14, "prompt": 14, "debian_frontend": 14, "noninteract": 14, "db": 14, "radeon": 14, "amdgpu": 14, "focal": 14, "install_5": 14, "50601": 14, "1_all": 14, "deb": 14, "usecas": 14, "hiplibsdk": 14, "dkm": 14, "hipifi": 14, "hip": 14, "dev": 14, "20": [14, 19, 20, 22], "sysroot": 14, "avoid": [14, 22], "glibcxx": 14, "fbgemm_cpu": 14, "10": [14, 15, 17, 19, 20, 23], "older": [14, 15], "accompani": [14, 28], "appropri": 14, "sysroot_linux": 14, "gcc_version": 14, "forg": [14, 28], "gxx_linux": 14, "64": [14, 17], "17": [14, 19, 20, 22], "binari": [14, 26], "cento": 14, "librari": [14, 28, 31], "libstdc": 14, "what": [14, 28], "libcxx_path": 14, "print": [14, 15, 19, 20, 21, 22, 23, 29], "objdump": 14, "tc": 14, "grep": 14, "glibc_": 14, "sed": 14, "vu": 14, "cat": 14, "glibcxx_": 14, "possibl": [14, 17, 25, 26], "just": 14, "minimum": [14, 27, 28, 29], "llvm_version": 14, "16": [14, 17, 19, 20, 21], "libcxx": 14, "outdat": 14, "aarch64": [14, 15], "cannot": 14, "explicitli": [14, 23], "clangxx": 14, "rt": 14, "lib": [14, 15, 16], "ld_library_path": [14, 15, 16], "config": [14, 23], "var": 14, "nvcc_prepend_flag": 14, "correctli": [14, 15, 16, 27, 28], "xcompil": 14, "ccbin": 14, "clangxx_path": 14, "unsupport": 14, "even": [14, 26], "though": [14, 15], "libstd": 14, "mean": [14, 17, 23], "regardless": 14, "scenario": 14, "binpath": 14, "overrid": 14, "exist": [14, 27, 29], "ln": 14, "sf": 14, "path_to_either_gcc_or_clang": 14, "cc": 14, "These": 14, "stage": [14, 17], "click": 14, "hypothesi": [14, 15], "jinja2": 14, "ncurs": 14, "numpi": [14, 15], "scikit": [14, 15], "offici": 14, "homepag": 14, "authorit": [14, 15, 28], "how": [14, 15, 16, 19, 20, 22, 29], "nightli": [14, 15], "rc": 14, "alwai": 14, "reliabl": 14, "arriv": 14, "hour": 14, "than": [14, 17], "window": 14, "silent": [14, 23], "both": [14, 23, 24, 26, 28], "place": [14, 23], "artifact": 14, "select": 14, "thu": [14, 22, 23], "import": [14, 15, 19, 20, 23, 29, 30], "much": [14, 27], "determinist": 14, "whl": [14, 15], "cu121": [14, 15], "rocm5": [14, 15], "ensur": [14, 15, 25], "properli": 14, "__version__": 14, "cuda_cmake_macro": 14, "gemm": 14, "via": [14, 23], "manual": [14, 15, 27], "sha": 14, "pin": 14, "ci": [14, 15], "ci_commit_pin": 14, "txt": [14, 16, 28, 30], "dedb7bdf33": 14, "tag": [14, 27, 30], "fbgemm_vers": 14, "v0": 14, "fbgemm_": 14, "addit": [14, 16, 17, 18], "flow": [14, 23], "becom": 14, "stale": 14, "problem": 14, "re": [14, 15], "attempt": 14, "failur": [14, 15], "clear": [14, 25], "py": [14, 15, 16, 28, 29], "clean": [14, 28], "must": [14, 15, 16, 17, 21, 22, 23, 26, 30], "package_nam": 14, "fbgemm_gpu_": 14, "convent": 14, "major": 14, "minor": 14, "py312": 14, "python_tag": 14, "determin": [14, 17, 23], "processor": 14, "arch": 14, "python_plat_nam": 14, "manylinux2014_": 14, "maco": 14, "macosx_10_9_": 14, "arm64": 14, "macosx_11_0_": 14, "win_": 14, "cpu_onli": 14, "bdist_wheel": 14, "package_vari": 14, "plat": 14, "cxxprefix": 14, "presum": 14, "made": [14, 28], "debug": [14, 16], "assert": 14, "presenc": 14, "unabl": 14, "cudacxx": 14, "cuda_bin_path": 14, "cub": 14, "cub_dir": 14, "header": [14, 27, 30], "cudnn_include_dir": 14, "cudnn_librari": 14, "filepath": 14, "nvml_lib_path": 14, "nccl": [14, 16], "nccl_lib_path": 14, "sm70": [14, 15], "80": 14, "v100": [14, 15], "a100": [14, 15], "cuda_arch_list": 14, "unset": 14, "torch_cuda_arch_list": 14, "preced": 14, "dtorch_cuda_arch_list": 14, "By": [14, 25], "those": [14, 17, 18, 25, 29], "rocm_path": 14, "pytorch_rocm_arch": 14, "gfx906": 14, "gfx908": 14, "gfx90a": 14, "wiki": 14, "gentoo": 14, "rocminfo": 14, "gfx": 14, "dhip_root_dir": 14, "dtorch_use_hip_dsa": 14, "complet": [14, 20, 22, 25, 28], "lot": 14, "jinja": 14, "instanti": [14, 19], "sure": [14, 25, 27, 29], "accident": 14, "cours": 14, "fbgemm_gpu_lib_path": 14, "fbgemm_gpu_pi": [14, 15], "defin": [14, 17, 27], "nm": 14, "gdcu": 14, "referenc": 14, "certain": 14, "gdc": 14, "merge_pooled_embed": [14, 15, 20], "isol": [15, 28], "build": [15, 16, 27, 29, 31], "sm80": 15, "respect": 15, "guarante": 15, "especi": 15, "displai": [15, 30], "setup": 15, "smi": 15, "515": 15, "76": [15, 22], "persist": 15, "bu": [15, 30], "disp": 15, "volatil": 15, "uncorr": 15, "ecc": 15, "fan": 15, "temp": 15, "perf": 15, "pwr": 15, "usag": [15, 28, 29], "cap": 15, "util": [15, 31], "mig": 15, "a10g": 15, "00000000": 15, "00": 15, "1e": [15, 23], "31c": 15, "p0": 15, "59w": 15, "300w": 15, "0mib": 15, "23028mib": 15, "gi": 15, "pid": 15, "No": [15, 23], "expos": 15, "imag": 15, "launch": 15, "toolkit": 15, "interfac": 15, "concis": 15, "info": [15, 27, 29], "dieedg": 15, "avgpwr": 15, "sclk": 15, "mclk": 15, "pwrcap": 15, "vram": 15, "33": [15, 19, 20, 22], "0c": 15, "37": [15, 19, 20], "0w": 15, "300mhz": 15, "1200mhz": 15, "auto": [15, 28], "290": 15, "32": [15, 19, 20], "39": [15, 19, 20], "log": 15, "difficult": 15, "relev": [15, 27], "genai": 15, "triton_vers": 15, "45fff310c8": 15, "about": [15, 29], "link": [15, 28], "encount": [15, 23], "signatur": [15, 28], "traceback": 15, "last": [15, 22], "root": [15, 25], "miniconda": 15, "mycondaenv": 15, "site": 15, "_op": [15, 28], "line": [15, 29, 30], "565": 15, "__getattr__": 15, "overload_nam": 15, "_c": 15, "_jit_get_oper": 15, "qualified_op_nam": 15, "runtimeerror": 15, "except": [15, 27, 29], "wa": 15, "string": [15, 30], "post47": 15, "py3": 15, "egg": 15, "__init__": [15, 29], "21": [15, 19, 20, 22], "_fbgemm_gpu_doc": 15, "noqa": 15, "f401": 15, "e402": 15, "18": [15, 19, 20, 22], "569": 15, "rais": [15, 29], "attributeerror": [15, 29], "_opnamespac": 15, "object": [15, 17], "attribut": [15, 29], "cli": 15, "main_run": 15, "47": [15, 19, 20, 22], "_zn6fbgemm48floatorhalftofusednbitrowwisequantizedsbhalfavx2itli2eeevpkt_miph": 15, "libtorch": 15, "visibl": 15, "incorrectli": [15, 28], "declar": [15, 27], "were": [15, 18], "pr": [15, 27, 28, 29], "1618": 15, "former": 15, "resolv": 15, "latter": 15, "seriou": 15, "tha": 15, "develop": [15, 28], "bench": 16, "good": [16, 26], "instal": [16, 28, 31], "pip": [16, 28], "pytest": 16, "rsx": 16, "pytestcollectionwarn": 16, "split_table_batched_embeddings_test": 16, "quantize_ops_test": 16, "sparse_ops_test": 16, "split_embedding_inference_converter_test": 16, "cuda_visible_devic": 16, "cuda_launch_block": 16, "involv": [16, 17], "rpath": 16, "fbgemm_test_with_rocm": 16, "hip_launch_block": 16, "split_table_batched_embeddings_benchmark": 16, "consecut": 17, "nestedtensor": 17, "raggedtensor": 17, "tensorflow": 17, "notabl": 17, "sentenc": 17, "maxlength": 17, "numel": [17, 22], "greatest": 17, "divisor": 17, "smallest": 17, "sub": 17, "exclud": 17, "partit": 17, "impli": [17, 26], "denot": [17, 27, 29], "offest": 17, "outer": 17, "would": 17, "begin": 17, "maximum": [17, 18, 29], "densor": 17, "form": [17, 26], "figur": 17, "below": 17, "show": [17, 23, 28], "accomod": 17, "At": [17, 27, 28, 29], "multipl": [17, 18, 23, 29, 31], "hadamard": 17, "product": [17, 26], "bmatrix": 17, "rightarrow": 17, "25": [17, 19, 20, 22], "36": [17, 19, 20], "49": 17, "81": 17, "50": 17, "operand": 17, "word": 17, "ax": 17, "properti": 17, "elementwis": [17, 18], "start": [17, 18, 20, 29, 30], "dim": [17, 19, 20, 22], "onto": 17, "part": 17, "everi": [17, 22, 23], "converson": 17, "could": 17, "lead": 17, "smaller": 17, "expect": 17, "happen": 17, "give": 17, "situat": 17, "like": 17, "dense_tensor": 17, "jagged_tensor": 17, "break": 17, "exact": 17, "usual": 17, "area": 18, "outsid": 18, "coverag": 18, "total": [18, 19, 20, 22, 23], "identit": 18, "add": [18, 25, 27, 28, 29], "structur": 18, "jagged_dense_dense_elementwise_add_jagged_output": 18, "y_0": 18, "y_1": 18, "multipli": [18, 23], "max_n": 18, "matmul": 18, "stacked_jagged_1d_to_dens": 18, "arg": [18, 23, 29], "kwarg": 18, "stacked_jagged_2d_to_dens": 18, "permute_pooled_embedding_modul": 19, "permutepooledembed": 19, "embs_dim": [19, 20], "sourc": [19, 23, 25, 26, 27, 28, 29], "column": [19, 20], "essenti": [19, 20], "second": [19, 20, 22, 27, 29], "suppos": [19, 20], "int64": [19, 20, 22], "perm": 19, "arang": [19, 20], "reshap": [19, 20], "13": [19, 20, 23], "14": [19, 20, 29], "15": [19, 20, 22], "19": [19, 20, 22], "23": [19, 20, 22], "24": [19, 20, 22], "26": [19, 20, 21], "27": [19, 20, 22], "28": [19, 20], "29": [19, 20, 22], "30": [19, 20, 22], "31": [19, 20, 22], "34": [19, 20, 22], "35": [19, 20], "38": [19, 20, 22], "40": [19, 20, 22], "41": [19, 20], "42": [19, 20, 22, 29], "43": [19, 20], "44": [19, 20], "45": [19, 20], "46": [19, 20, 22], "describ": [19, 20, 22, 23, 25], "__call__": 19, "b_local": [19, 20], "total_global_d": [19, 20], "local": [19, 20, 27, 29], "global": [19, 20, 23], "pooled_embed": 20, "uncat_dim_s": 20, "cat_dim": 20, "concaten": 20, "vice": 20, "versa": 20, "aggreg": 20, "permute_pooled_emb": 20, "cumul": [20, 22], "invers": 20, "itertool": 20, "accumul": 20, "inv_embs_dim": 20, "inv_permut": 20, "uint8": 21, "randn": 21, "8247": 21, "0031": 21, "0068": 21, "2081": 21, "5427": 21, "5772": 21, "0291": 21, "7626": 21, "159": 21, "86": 21, "48": [21, 22], "213": 21, "188": 21, "248": 21, "254": 21, "186": 21, "permute_2d_sparse_data": 22, "permuted_lengths_sum": 22, "3d": 22, "jag": [22, 29, 31], "less": 22, "repetit": 22, "sampl": [22, 23], "synchron": 22, "suppli": [22, 30], "int32": 22, "randint": 22, "low": [22, 31], "high": [22, 31], "100": 22, "61": 22, "98": 22, "56": 22, "94": 22, "89": 22, "65": 22, "71": 22, "54": 22, "78": 22, "68": 22, "60": 22, "51": 22, "52": 22, "97": 22, "66": 22, "permute_1d_sparse_data": 22, "referr": 22, "withh": 22, "84": 22, "feature_offset": 22, "asynchronous_complete_cumsum": 22, "t_in": 22, "nonblock": 22, "asynchron": 22, "offsets_rang": 22, "range_s": 22, "split_table_batched_embeddings_ops_train": 23, "splittablebatchedembeddingbagscodegen": 23, "embedding_spec": 23, "embeddingloc": 23, "computedevic": 23, "feature_table_map": 23, "cache_algorithm": 23, "cachealgorithm": 23, "cache_load_factor": 23, "cache_reserved_memori": 23, "cache_precis": 23, "weights_precis": 23, "enforce_hbm": 23, "emboptimtyp": 23, "exact_sgd": 23, "record_cache_metr": 23, "recordcachemetr": 23, "gather_uvm_cache_stat": 23, "gradient_clip": 23, "max_gradi": 23, "max_norm": 23, "learning_r": 23, "01": 23, "ep": 23, "08": 23, "momentum": 23, "weight_decai": 23, "weight_decay_mod": 23, "weightdecaymod": 23, "eta": 23, "001": 23, "beta1": 23, "beta2": 23, "999": 23, "step_ema": 23, "10000": 23, "step_swap": 23, "step_start": 23, "step_mod": 23, "stepmod": 23, "use_it": 23, "counter_based_regular": 23, "counterbasedregularizationdefinit": 23, "cowclip_regular": 23, "cowclipdefinit": 23, "poolingmod": 23, "str": 23, "boundscheckmod": 23, "uvm_non_rowwise_momentum": 23, "use_experimental_tb": 23, "prefetch_pipelin": 23, "stats_reporter_config": 23, "tbestatsreporterconfig": 23, "table_nam": 23, "optimizer_state_dtyp": 23, "dict": 23, "multipass_prefetch_config": 23, "multipassprefetchconfig": 23, "global_weight_decai": 23, "globalweightdecaydefinit": 23, "uvm_host_map": 23, "spec": 23, "physic": 23, "placement": 23, "virtual": 23, "managed_cach": 23, "mtia": 23, "least": 23, "frequent": 23, "factor": 23, "capac": 23, "reserv": [23, 26], "optimtyp": 23, "adam": 23, "exact_adagrad": 23, "adagrad": 23, "exact_rowwise_adagrad": 23, "aadagrad": 23, "sgd": 23, "lamb": 23, "lars_sgd": 23, "lar": 23, "partial_rowwise_adam": 23, "partial": 23, "partial_rowwise_lamb": 23, "ensemble_rowwise_adagrad": 23, "ensembl": 23, "Not": 23, "gradient": 23, "record": 23, "hit": 23, "request": [23, 24, 28], "record_cache_miss_count": 23, "metric": 23, "record_tablewise_cache_miss": 23, "collect": [23, 31], "stochast": 23, "round": 23, "clip": 23, "norm": 23, "learn": 23, "0e": 23, "epsilon": 23, "nn": 23, "decai": 23, "decoupl": 23, "pool": [23, 31], "union": 23, "skip": 23, "fatal": 23, "messag": 23, "adjust": 23, "v2": 23, "polici": 23, "forward_stream": 23, "stat": 23, "multipass": 23, "malloc": 23, "cudahostregist": 23, "cudamallocmanag": 23, "feature_requires_grad": 23, "batch_size_per_feature_per_rank": 23, "total_unique_indic": 23, "vbe": 23, "user": 23, "autograd": 23, "chosen": 23, "conatin": 23, "unweight": 23, "f": 23, "split_table_batched_embeddings_ops_common": 23, "init_embedding_weights_uniform": 23, "split_embedding_weight": 23, "9426": 23, "7046": 23, "4214": 23, "0419": 23, "1331": 23, "7856": 23, "8124": 23, "2021": 23, "5771": 23, "5911": 23, "7792": 23, "1068": 23, "6203": 23, "4813": 23, "1677": 23, "4790": 23, "5587": 23, "0941": 23, "5754": 23, "3475": 23, "8952": 23, "1964": 23, "0810": 23, "4174": 23, "2513": 23, "4039": 23, "3775": 23, "3273": 23, "5399": 23, "0229": 23, "1455": 23, "8770": 23, "9520": 23, "4593": 23, "7169": 23, "6307": 23, "1765": 23, "8757": 23, "8614": 23, "2051": 23, "0603": 23, "9980": 23, "7958": 23, "5826": 23, "long": 23, "5197": 23, "2957": 23, "3578": 23, "1487": 23, "4873": 23, "3044": 23, "9801": 23, "2769": 23, "7164": 23, "8528": 23, "7159": 23, "6719": 23, "0784": 23, "2016": 23, "2176": 23, "1988": 23, "3825": 23, "5008": 23, "8991": 23, "1405": 23, "2637": 23, "9427": 23, "8902": 23, "3754": 23, "5013": 23, "6105": 23, "9968": 23, "3057": 23, "7621": 23, "9821": 23, "7314": 23, "6195": 23, "grad_fn": 23, "cppnode": 23, "splitlookupfunction_sgd_op": 23, "set_learning_r": 23, "lr": 23, "set_optimizer_step": 23, "setp": 23, "view": [23, 28], "split_optimizer_st": 23, "momentum1": 23, "momentum2": 23, "prev_it": 23, "cowclip": 23, "row_count": 23, "update_hyper_paramet": 23, "params_dict": 23, "hyper": 23, "extern": [23, 30], "question": 24, "concern": 24, "discuss": 24, "kick": 24, "regard": 24, "feel": 24, "free": 24, "reach": 24, "easi": 25, "transpar": 25, "activ": 25, "welcom": [25, 31], "your": [25, 28, 29], "branch": 25, "ve": 25, "chang": [25, 27, 29], "api": [25, 27, 28, 29], "suit": 25, "lint": 25, "haven": 25, "submit": [25, 27, 29], "facebook": [25, 26, 31], "open": 25, "track": 25, "bug": 25, "descript": [25, 27, 28, 29, 30], "abl": 25, "bounti": 25, "safe": 25, "disclosur": 25, "secur": 25, "go": 25, "outlin": 25, "agre": 25, "tree": 25, "claus": 26, "bsd": 26, "softwar": 26, "copyright": 26, "inc": 26, "affili": 26, "right": [26, 30], "redistribut": 26, "modif": 26, "permit": 26, "condit": 26, "met": 26, "retain": 26, "notic": 26, "disclaim": 26, "materi": 26, "contributor": 26, "endors": 26, "promot": 26, "written": 26, "permiss": 26, "BY": 26, "THE": 26, "holder": 26, "AND": 26, "AS": 26, "express": [26, 30], "OR": 26, "warranti": 26, "NOT": 26, "limit": [26, 28], "TO": 26, "OF": 26, "merchant": 26, "FOR": 26, "particular": 26, "IN": 26, "NO": 26, "event": 26, "shall": 26, "BE": 26, "liabl": 26, "indirect": 26, "incident": 26, "special": 26, "exemplari": 26, "consequenti": 26, "damag": 26, "procur": 26, "substitut": 26, "profit": 26, "busi": 26, "interrupt": 26, "theori": 26, "liabil": 26, "contract": 26, "strict": 26, "tort": 26, "neglig": 26, "aris": 26, "IF": 26, "advis": 26, "SUCH": 26, "javadoc": 27, "style": [27, 29], "comment": [27, 28, 30], "sphinx": [27, 28, 29], "breath": 27, "kept": 27, "cpp": [27, 29, 30], "cu": 27, "cuh": 27, "everyth": 27, "ifndef": 27, "doxygen_this_will_be_skip": 27, "endif": 27, "hidden": 27, "html": [27, 28, 29], "descriptionss": 27, "publish": [27, 29], "docstr": [27, 28, 29], "method": [27, 28, 29], "organ": 27, "yet": 27, "top": [27, 31], "defgroup": 27, "directli": [27, 29], "behavior": [27, 29], "tparam": 27, "thrown": [27, 29], "ingroup": 27, "brief": 27, "short": 27, "example_method": [27, 29], "def": [27, 29], "foo": [27, 29], "lst": [27, 29], "And": [27, 29], "verbatim": [27, 29], "text": [27, 29, 30], "diagram": [27, 29], "unpars": 27, "prev": [27, 29], "usabl": [27, 29], "space": [27, 28, 29], "endcod": 27, "align": [27, 29], "param1": [27, 29], "param2": 27, "bad_alloc": 27, "logic_error": 27, "href": 27, "www": [27, 29], "nl": 27, "cmdlink": 27, "On": [27, 29], "doxygengroup": 27, "rst": [27, 29, 30], "content": [27, 30, 31], "toctre": [27, 29], "ini": 27, "taken": 27, "care": 27, "doc": [27, 28, 29, 30], "netlifi": [27, 28, 29], "preview": [27, 29], "serv": 28, "yourself": 28, "shoe": 28, "who": 28, "understand": 28, "live": 28, "easier": 28, "leav": 28, "task": 28, "tool": 28, "graphviz": [28, 30], "assembl": 28, "prepend": 28, "sphinx_lint": 28, "technic": 28, "why": 28, "occasion": 28, "unresolv": 28, "might": 28, "opt": 28, "pycapsul": 28, "neg": 28, "silenc": 28, "nitpick": 28, "conf": 28, "domain": 28, "deploi": 28, "app": 28, "googl": 29, "c_size_t": 29, "ret": 29, "emplace_back": 29, "valueerror": 29, "restructuredtext": 29, "en": 29, "master": 29, "__": 29, "pep": 29, "0287": 29, "autofunct": 29, "toc": 29, "c_ulong": 29, "mani": 29, "attach": 29, "fact": 29, "helper": 29, "codebas": 29, "add_doc": 29, "forc": 29, "hoc": 29, "the_new_doc_modul": 29, "remain": 29, "render": [29, 30], "anchor": 30, "_doc": 30, "underscor": 30, "_": 30, "There": 30, "elsewher": 30, "ref": 30, "literalinclud": 30, "rel": 30, "enclos": 30, "bracket": 30, "skiplin": 30, "math": 30, "k_": 30, "k_n": 30, "expressino": 30, "int_a": 30, "frac": 30, "2v": 30, "dx": 30, "left": 30, "dv": 30, "_a": 30, "du": 30, "digraph": 30, "altern": 30, "dot": 30, "examplegraph": 30, "precis": 31, "convolut": 31, "server": 31, "infer": 31, "transform": 31, "contribut": 31, "contact": 31, "licens": 31, "autovector": 31, "ssd": 31}, "objects": {"": [[13, 0, 1, "_CPPv4N16RocksdbWriteMode29BWD_L1_CNFLCT_MISS_WRITE_BACKE", "BWD_L1_CNFLCT_MISS_WRITE_BACK"], [1, 1, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec"], [1, 2, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::IndexType"], [1, 2, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::OffsetType"], [1, 2, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::OutType"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::block_size"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::data_size"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::exponent_bias"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::exponent_bits"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::index_size"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::indices"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::input"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::input_stride"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::is_bf16_out"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::is_weight_positional"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::normalize_by_lengths"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::offsets_or_lengths"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::out"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::output_size"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::output_stride"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::use_offsets"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::weights"], [1, 1, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec"], [1, 2, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::InType"], [1, 2, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::IndexType"], [1, 2, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::OffsetType"], [1, 2, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::OutType"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::block_size"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::data_size"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::index_size"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::indices"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::input"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::input_stride"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::is_bf16_in"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::is_bf16_out"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::is_weight_positional"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::no_bag"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::normalize_by_lengths"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::offsets_or_lengths"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::out"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::output_size"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::output_stride"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::scale_bias_last"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::use_offsets"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::weights"], [13, 0, 1, "_CPPv4N16RocksdbWriteMode5FLUSHE", "FLUSH"], [10, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref"], [10, 3, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::ebits"], [10, 3, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::exponent_bias"], [10, 3, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::input"], [10, 3, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::ncols"], [10, 3, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::nrows"], [10, 3, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::output"], [10, 1, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu"], [10, 3, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::forward"], [10, 3, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::input"], [10, 3, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::output_dtype"], [13, 0, 1, "_CPPv4N16RocksdbWriteMode15FWD_L1_EVICTIONE", "FWD_L1_EVICTION"], [13, 0, 1, "_CPPv4N16RocksdbWriteMode16FWD_ROCKSDB_READE", "FWD_ROCKSDB_READ"], [0, 1, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax"], [0, 3, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::len"], [0, 3, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::m"], [0, 3, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::max"], [0, 3, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::min"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf"], [0, 2, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::InputType"], [0, 3, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::bit_rate"], [0, 3, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input"], [0, 3, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input_columns"], [0, 3, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input_rows"], [0, 3, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::output"], [10, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref"], [10, 3, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::ebits"], [10, 3, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::exponent_bias"], [10, 3, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::input"], [10, 3, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::max_pos"], [10, 3, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::ncols"], [10, 3, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::nrows"], [10, 3, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::output"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize"], [0, 2, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::T"], [0, 3, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::dst"], [0, 3, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::len"], [0, 3, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::noise_ratio"], [0, 3, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::num_threads"], [0, 3, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::qparams"], [0, 3, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::src"], [0, 3, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::thread_id"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::C"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::G"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::K"], [0, 2, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::LAYOUT"], [0, 2, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::T"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::X"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::dst"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::scales"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::src"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::zero_points"], [13, 4, 1, "_CPPv416RocksdbWriteMode", "RocksdbWriteMode"], [13, 0, 1, "_CPPv4N16RocksdbWriteMode29BWD_L1_CNFLCT_MISS_WRITE_BACKE", "RocksdbWriteMode::BWD_L1_CNFLCT_MISS_WRITE_BACK"], [13, 0, 1, "_CPPv4N16RocksdbWriteMode5FLUSHE", "RocksdbWriteMode::FLUSH"], [13, 0, 1, "_CPPv4N16RocksdbWriteMode15FWD_L1_EVICTIONE", "RocksdbWriteMode::FWD_L1_EVICTION"], [13, 0, 1, "_CPPv4N16RocksdbWriteMode16FWD_ROCKSDB_READE", "RocksdbWriteMode::FWD_ROCKSDB_READ"], [0, 1, 1, "_CPPv46Xor128v", "Xor128"], [10, 1, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu"], [10, 3, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::forward"], [10, 3, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::input"], [10, 3, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::output_dtype"], [10, 1, 1, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE", "_bfloat16_to_float_gpu"], [10, 3, 1, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE", "_bfloat16_to_float_gpu::input"], [10, 1, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu"], [10, 3, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu::forward"], [10, 3, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu::input"], [10, 1, 1, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE", "_float_to_bfloat16_gpu"], [10, 3, 1, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE", "_float_to_bfloat16_gpu::input"], [10, 1, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out"], [10, 3, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out::input"], [10, 3, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out::output"], [10, 1, 1, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor", "_float_to_fused8bitrowwise_gpu"], [10, 3, 1, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor", "_float_to_fused8bitrowwise_gpu::input"], [10, 1, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu"], [10, 3, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu::bit_rate"], [10, 3, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu::input"], [10, 1, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu"], [10, 3, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::ebits"], [10, 3, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::exponent_bias"], [10, 3, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::input"], [10, 3, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::max_pos"], [10, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu"], [10, 3, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::bias"], [10, 3, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::bounding_box_size"], [10, 3, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::ebits"], [10, 3, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::input"], [10, 3, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::max_pos"], [10, 3, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::mbits"], [10, 3, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::min_pos"], [10, 1, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu"], [10, 3, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::forward"], [10, 3, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::input"], [10, 3, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::row_dim"], [10, 1, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out"], [10, 3, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out::input"], [10, 3, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out::output"], [10, 1, 1, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE", "_fused8bitrowwise_to_float_gpu"], [10, 3, 1, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE", "_fused8bitrowwise_to_float_gpu::input"], [10, 1, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu"], [10, 3, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::D_offsets"], [10, 3, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::input"], [10, 3, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::output_dtype"], [10, 1, 1, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE", "_fused8bitrowwise_to_half_gpu"], [10, 3, 1, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE", "_fused8bitrowwise_to_half_gpu::input"], [10, 1, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu"], [10, 3, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::input"], [10, 3, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::output_dtype"], [10, 3, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::quant_padding_float_type"], [10, 3, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::scale_bias_last"], [10, 1, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu"], [10, 3, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu::bit_rate"], [10, 3, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu::input"], [10, 1, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu"], [10, 3, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu::bit_rate"], [10, 3, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu::input"], [10, 1, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu"], [10, 3, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::bit_rate"], [10, 3, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::input"], [10, 3, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::output_dtype"], [10, 1, 1, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor", "_half_to_fused8bitrowwise_gpu"], [10, 3, 1, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor", "_half_to_fused8bitrowwise_gpu::input"], [10, 1, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu"], [10, 3, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu::bit_rate"], [10, 3, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu::input"], [10, 1, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu"], [10, 3, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::ebits"], [10, 3, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::exponent_bias"], [10, 3, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::input"], [10, 1, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu"], [10, 3, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::bias"], [10, 3, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::ebits"], [10, 3, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::input"], [10, 3, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::mbits"], [10, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu"], [10, 3, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::forward"], [10, 3, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::input"], [10, 3, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::output_dtype"], [10, 3, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::output_last_dim"], [10, 3, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::row_dim"], [10, 1, 1, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor", "_single_or_half_precision_to_fused8bitrowwise_gpu"], [10, 3, 1, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor", "_single_or_half_precision_to_fused8bitrowwise_gpu::input"], [10, 1, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu"], [10, 3, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu::bit_rate"], [10, 3, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu::input"], [9, 1, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device"], [9, 3, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device::inputTensors"], [9, 3, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device::target_device"], [6, 1, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul"], [6, 3, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::a_offsets"], [6, 3, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::a_values"], [6, 3, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::v"], [3, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::B_ofsets"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::bounds_check_mode"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::indices"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::max_B"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::offsets"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::rows_per_table"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::warning"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::weights"], [13, 1, 1, "_CPPv420compact_indices_cudaNSt6vectorI6TensorEE6TensorNSt6vectorI6TensorEE6Tensor6Tensor", "compact_indices_cuda"], [13, 3, 1, "_CPPv420compact_indices_cudaNSt6vectorI6TensorEE6TensorNSt6vectorI6TensorEE6Tensor6Tensor", "compact_indices_cuda::compact_count"], [13, 3, 1, "_CPPv420compact_indices_cudaNSt6vectorI6TensorEE6TensorNSt6vectorI6TensorEE6Tensor6Tensor", "compact_indices_cuda::compact_indices"], [13, 3, 1, "_CPPv420compact_indices_cudaNSt6vectorI6TensorEE6TensorNSt6vectorI6TensorEE6Tensor6Tensor", "compact_indices_cuda::count"], [13, 3, 1, "_CPPv420compact_indices_cudaNSt6vectorI6TensorEE6TensorNSt6vectorI6TensorEE6Tensor6Tensor", "compact_indices_cuda::indices"], [13, 3, 1, "_CPPv420compact_indices_cudaNSt6vectorI6TensorEE6TensorNSt6vectorI6TensorEE6Tensor6Tensor", "compact_indices_cuda::masks"], [13, 1, 1, "_CPPv418cuda_callback_func12cudaStream_t11cudaError_tPv", "cuda_callback_func"], [13, 3, 1, "_CPPv418cuda_callback_func12cudaStream_t11cudaError_tPv", "cuda_callback_func::functor"], [13, 3, 1, "_CPPv418cuda_callback_func12cudaStream_t11cudaError_tPv", "cuda_callback_func::status"], [13, 3, 1, "_CPPv418cuda_callback_func12cudaStream_t11cudaError_tPv", "cuda_callback_func::stream"], [6, 1, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEENSt8optionalIN2at6SymIntEEE", "dense_to_jagged"], [6, 3, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEENSt8optionalIN2at6SymIntEEE", "dense_to_jagged::dense"], [6, 3, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEENSt8optionalIN2at6SymIntEEE", "dense_to_jagged::offsets"], [6, 3, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEENSt8optionalIN2at6SymIntEEE", "dense_to_jagged::total_L"], [12, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::D_offsets"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::cache_index_table_map"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::gather_cache_stats"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::hash_size_cumsum"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::linear_cache_indices"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lru_state"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_miss_timestamp"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_weights"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::row_alignment"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::time_stamp"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::total_cache_hash_size"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::uvm_cache_stats"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights_offsets"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights_tys"], [12, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda"], [12, 3, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::gather_cache_stats"], [12, 3, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::invalid_index"], [12, 3, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::linear_cache_indices"], [12, 3, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::uvm_cache_stats"], [27, 1, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method"], [27, 2, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::Alignment"], [27, 2, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::T"], [27, 3, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::param1"], [27, 3, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::param2"], [11, 1, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda"], [11, 3, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::input_offsets"], [11, 3, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::output_offsets"], [11, 3, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::output_size"], [11, 3, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::permute"], [10, 1, 1, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor", "float_or_half_to_fused8bitrowwise_cpu"], [10, 3, 1, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor", "float_or_half_to_fused8bitrowwise_cpu::input"], [10, 1, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu"], [10, 3, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu::forward"], [10, 3, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu::input"], [10, 1, 1, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor", "float_to_fused8bitrowwise_cpu"], [10, 3, 1, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor", "float_to_fused8bitrowwise_cpu::input"], [10, 1, 1, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor", "fused8bitrowwise_to_float_cpu"], [10, 3, 1, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor", "fused8bitrowwise_to_float_cpu::input"], [10, 1, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu"], [10, 3, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::input"], [10, 3, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::output_dtype"], [10, 3, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::quant_padding_float_type"], [10, 3, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::scale_bias_last"], [10, 1, 1, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor", "fused8bitrowwise_to_half_cpu"], [10, 3, 1, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor", "fused8bitrowwise_to_half_cpu::input"], [10, 1, 1, "_CPPv437fusednbitrowwise_sbfront_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_sbfront_to_float_cpu"], [10, 3, 1, "_CPPv437fusednbitrowwise_sbfront_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_sbfront_to_float_cpu::bit_rate"], [10, 3, 1, "_CPPv437fusednbitrowwise_sbfront_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_sbfront_to_float_cpu::input"], [10, 1, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu"], [10, 3, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu::bit_rate"], [10, 3, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu::input"], [10, 1, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu"], [10, 3, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::bit_rate"], [10, 3, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::input"], [10, 3, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::output_dtype"], [10, 1, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu"], [10, 3, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu::bit_rate"], [10, 3, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu::input"], [11, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_boundaries"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_ctr_in_use_after"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_ctr_weight_value"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_num_examples"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_num_positives"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::logit"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::num_segments"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::positive_weight"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::segment_lengths"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::segment_value"], [12, 1, 1, "_CPPv423get_unique_indices_cudaRKN2at6TensorEK7int64_tKb", "get_unique_indices_cuda"], [12, 3, 1, "_CPPv423get_unique_indices_cudaRKN2at6TensorEK7int64_tKb", "get_unique_indices_cuda::compute_count"], [12, 3, 1, "_CPPv423get_unique_indices_cudaRKN2at6TensorEK7int64_tKb", "get_unique_indices_cuda::linear_indices"], [12, 3, 1, "_CPPv423get_unique_indices_cudaRKN2at6TensorEK7int64_tKb", "get_unique_indices_cuda::max_indices"], [12, 1, 1, "_CPPv436get_unique_indices_with_inverse_cudaRKN2at6TensorEK7int64_tKbKb", "get_unique_indices_with_inverse_cuda"], [12, 3, 1, "_CPPv436get_unique_indices_with_inverse_cudaRKN2at6TensorEK7int64_tKbKb", "get_unique_indices_with_inverse_cuda::compute_count"], [12, 3, 1, "_CPPv436get_unique_indices_with_inverse_cudaRKN2at6TensorEK7int64_tKbKb", "get_unique_indices_with_inverse_cuda::compute_inverse_indices"], [12, 3, 1, "_CPPv436get_unique_indices_with_inverse_cudaRKN2at6TensorEK7int64_tKbKb", "get_unique_indices_with_inverse_cuda::linear_indices"], [12, 3, 1, "_CPPv436get_unique_indices_with_inverse_cudaRKN2at6TensorEK7int64_tKbKb", "get_unique_indices_with_inverse_cuda::max_indices"], [4, 1, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::XQ"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::cache_K"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::cache_V"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::cache_logical_dtype_int"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::kv_cache_quant_num_groups"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::num_split_ks"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::qk_scale"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::seq_positions"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::use_tensor_cores"], [10, 1, 1, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor", "half_to_fused8bitrowwise_cpu"], [10, 3, 1, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor", "half_to_fused8bitrowwise_cpu::input"], [13, 1, 1, "_CPPv410hash_shard7int64_t6size_t", "hash_shard"], [13, 3, 1, "_CPPv410hash_shard7int64_t6size_t", "hash_shard::id"], [13, 3, 1, "_CPPv410hash_shard7int64_t6size_t", "hash_shard::num_shards"], [11, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_ctr_in_use_after"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_ctr_weight_value"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_num_examples"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_num_positives"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::logit"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::lower_bound"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::positive_weight"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::upper_bound"], [12, 1, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot"], [12, 3, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot::C"], [12, 3, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot::h_in"], [3, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::D_offsets"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::dev_weights"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::fp8_exponent_bias"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::fp8_exponent_bits"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::indice_weights"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::indices"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::lxu_cache_locations"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::lxu_cache_weights"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float16_D"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float32_D"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float8_D"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int2_D"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int4_D"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int8_D"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::offsets"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::output_dtype"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::pooling_mode"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::row_alignment"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::total_D"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::uvm_weights"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_offsets"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_placements"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_tys"], [3, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::D_offsets"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::dev_weights"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::fp8_exponent_bias"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::fp8_exponent_bits"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::indice_weights"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::indices"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::lxu_cache_locations"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::lxu_cache_weights"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float16_D"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float32_D"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float8_D"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int2_D"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int4_D"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int8_D"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::offsets"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::output_dtype"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::pooling_mode"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::row_alignment"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::total_D"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::uvm_weights"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_offsets"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_placements"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_tys"], [3, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::D_offsets"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::cache_hash_size_cumsum"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::cache_index_table_map"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::dev_weights"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::fp8_exponent_bias"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::fp8_exponent_bits"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::indice_weights"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::indices"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_locations"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_state"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_weights"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_state"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float16_D"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float32_D"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float8_D"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int2_D"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int4_D"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int8_D"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::offsets"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::output_dtype"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::pooling_mode"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::row_alignment"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::total_D"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::total_cache_hash_size"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::uvm_weights"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_offsets"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_placements"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_tys"], [3, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::D_offsets"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::cache_hash_size_cumsum"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::cache_index_table_map"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::dev_weights"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::fp8_exponent_bias"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::fp8_exponent_bits"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::indice_weights"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::indices"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_locations"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_state"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_weights"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_state"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float16_D"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float32_D"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float8_D"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int2_D"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int4_D"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int8_D"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::offsets"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::output_dtype"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::pooling_mode"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::row_alignment"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::total_D"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::total_cache_hash_size"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::uvm_weights"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_offsets"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_placements"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_tys"], [8, 1, 1, "_CPPv413is_uvm_tensorRK6Tensor", "is_uvm_tensor"], [8, 3, 1, "_CPPv413is_uvm_tensorRK6Tensor", "is_uvm_tensor::self"], [6, 1, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense"], [6, 3, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::max_L"], [6, 3, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::offsets"], [6, 3, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::padding_value"], [6, 3, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::values"], [6, 1, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense"], [6, 3, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::max_sequence_length"], [6, 3, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::offsets"], [6, 3, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::values"], [6, 1, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add"], [6, 3, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::x_offsets"], [6, 3, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::x_values"], [6, 3, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::y"], [6, 1, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output"], [6, 3, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::x_offsets"], [6, 3, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::x_values"], [6, 3, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::y"], [6, 1, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda"], [6, 3, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::x_offsets"], [6, 3, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::x_values"], [6, 3, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::y"], [6, 1, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul"], [6, 3, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::x_offsets"], [6, 3, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::x_values"], [6, 3, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::y"], [6, 1, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense"], [6, 3, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::max_lengths"], [6, 3, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::offsets"], [6, 3, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::padding_value"], [6, 3, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::values"], [6, 1, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward"], [6, 3, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::max_lengths"], [6, 3, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::offsets"], [6, 3, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::padding_value"], [6, 3, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::values"], [13, 5, 1, "_CPPv4N5kv_db12CacheContextE", "kv_db::CacheContext"], [13, 5, 1, "_CPPv4N5kv_db13EmbeddingKVDBE", "kv_db::EmbeddingKVDB"], [13, 5, 1, "_CPPv4N5kv_db9QueueItemE", "kv_db::QueueItem"], [13, 5, 1, "_CPPv4N8l2_cache13CacheLibCacheE", "l2_cache::CacheLibCache"], [12, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::D_offsets"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::cache_hash_size_cumsum"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::cache_index_table_map"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lfu_state"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::linear_cache_indices"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lxu_cache_weights"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::row_alignment"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::total_cache_hash_size"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights_offsets"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights_tys"], [12, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::D_offsets"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::cache_hash_size_cumsum"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::cache_index_table_map"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lfu_state"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::linear_cache_indices"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lxu_cache_weights"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::stochastic_rounding"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::total_cache_hash_size"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::weights"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::weights_offsets"], [12, 1, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t", "linearize_cache_indices_cuda"], [12, 3, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t", "linearize_cache_indices_cuda::B_offsets"], [12, 3, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t", "linearize_cache_indices_cuda::cache_hash_size_cumsum"], [12, 3, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t", "linearize_cache_indices_cuda::indices"], [12, 3, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t", "linearize_cache_indices_cuda::indices_base_offset"], [12, 3, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t", "linearize_cache_indices_cuda::max_B"], [12, 3, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t", "linearize_cache_indices_cuda::offsets"], [12, 1, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda"], [12, 3, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::cache_hash_size_cumsum"], [12, 3, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::update_row_indices"], [12, 3, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::update_table_indices"], [12, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::compute_inverse_indices"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::gather_cache_stats"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::lock_cache_line"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::lru_state"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::lxu_cache_locking_counter"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::max_indices"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::time_stamp"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::unique_indices"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::unique_indices_length"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::uvm_cache_stats"], [12, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::D_offsets"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::cache_index_table_map"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::gather_cache_stats"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::hash_size_cumsum"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::linear_cache_indices"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lru_state"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lxu_cache_weights"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::row_alignment"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::time_stamp"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::total_cache_hash_size"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::uvm_cache_stats"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights_offsets"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights_tys"], [12, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::D_offsets"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::cache_index_table_map"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::gather_cache_stats"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::hash_size_cumsum"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::linear_cache_indices"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lock_cache_line"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lru_state"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_locking_counter"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_weights"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::stochastic_rounding"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::time_stamp"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::total_cache_hash_size"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::uvm_cache_stats"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::weights"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::weights_offsets"], [12, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::D_offsets"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::cache_hash_size_cumsum"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::cache_index_table_map"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::lxu_cache_weights"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::stochastic_rounding"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::total_D"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::uvm_weights"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::weights_offsets"], [12, 1, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorENSt8optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda"], [12, 3, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorENSt8optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::lxu_cache_locations"], [12, 3, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorENSt8optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::lxu_cache_locations_new"], [12, 3, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorENSt8optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::num_uniq_cache_indices"], [12, 1, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda"], [12, 3, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda::lxu_cache_locations"], [12, 3, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda::lxu_cache_locking_counter"], [12, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda"], [12, 3, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::gather_cache_stats"], [12, 3, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::invalid_index"], [12, 3, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::linear_cache_indices"], [12, 3, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::lxu_cache_locations_output"], [12, 3, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::num_uniq_cache_indices"], [12, 3, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::uvm_cache_stats"], [13, 1, 1, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_put_cuda"], [13, 3, 1, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_put_cuda::count"], [13, 3, 1, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_put_cuda::indices"], [13, 3, 1, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_put_cuda::preferred_sms"], [13, 3, 1, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_put_cuda::self"], [13, 3, 1, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_put_cuda::use_pipeline"], [13, 3, 1, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_put_cuda::values"], [13, 1, 1, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_select_cuda"], [13, 3, 1, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_select_cuda::count"], [13, 3, 1, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_select_cuda::indices"], [13, 3, 1, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_select_cuda::preferred_sms"], [13, 3, 1, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_select_cuda::self"], [13, 3, 1, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_select_cuda::use_pipeline"], [13, 3, 1, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_select_cuda::values"], [8, 1, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor"], [8, 3, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor::self"], [8, 3, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor::sizes"], [8, 1, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor"], [8, 3, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor::self"], [8, 3, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor::sizes"], [8, 1, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta"], [8, 3, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta::self"], [8, 3, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta::sizes"], [8, 1, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor"], [8, 3, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::is_host_mapped"], [8, 3, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::self"], [8, 3, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::sizes"], [8, 1, 1, "_CPPv423new_unified_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor_meta"], [8, 3, 1, "_CPPv423new_unified_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor_meta::is_host_mapped"], [8, 3, 1, "_CPPv423new_unified_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor_meta::self"], [8, 3, 1, "_CPPv423new_unified_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor_meta::sizes"], [8, 1, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor"], [8, 3, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor::self"], [8, 3, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor::sizes"], [5, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu"], [5, 3, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::batch_size"], [5, 3, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::include_last_offsets"], [5, 3, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::indices_list"], [5, 3, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::offsets_list"], [5, 3, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::per_sample_weights"], [9, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad"], [9, 3, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::inv_offset_dim_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::inv_permute_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::offset_dim_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::permute_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::pooled_embs"], [9, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::inv_offset_dim_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::inv_permute_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::offset_dim_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::permute_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::pooled_embs"], [9, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::inv_offset_dim_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::inv_permute_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::offset_dim_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::permute_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::pooled_embs"], [9, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::inv_offset_dim_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::inv_permute_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::offset_dim_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::permute_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::pooled_embs"], [9, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::inv_offset_dim_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::inv_permute_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::offset_dim_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::permute_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::pooled_embs"], [9, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl"], [9, 3, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::allow_duplicates"], [9, 3, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::inv_offset_dim_list"], [9, 3, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::inv_permute_list"], [9, 3, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::offset_dim_list"], [9, 3, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::permute_list"], [9, 3, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::pooled_embs"], [9, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::inv_offset_dim_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::inv_permute_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::offset_dim_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::permute_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::pooled_embs"], [9, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::inv_offset_dim_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::inv_permute_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::offset_dim_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::permute_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::pooled_embs"], [3, 1, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu"], [3, 3, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::index_remappings"], [3, 3, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::index_remappings_offsets"], [3, 3, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::indices"], [3, 3, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::offsets"], [3, 1, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda"], [3, 3, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::index_remappings"], [3, 3, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::index_remappings_offsets"], [3, 3, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::indices"], [3, 3, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::offsets"], [3, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu"], [3, 3, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::dense_indices"], [3, 3, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::hash_table"], [3, 3, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::hash_table_offsets"], [3, 3, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::indices"], [3, 3, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::offsets"], [3, 1, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda"], [3, 3, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::hash_table"], [3, 3, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::hash_table_offsets"], [3, 3, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::indices"], [3, 3, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::offsets"], [3, 1, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu"], [3, 3, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::hash_table"], [3, 3, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::hash_table_offsets"], [3, 3, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::indices"], [3, 3, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::offsets"], [13, 5, 1, "_CPPv4N2ps24EmbeddingParameterServerE", "ps::EmbeddingParameterServer"], [7, 1, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda"], [7, 3, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda::grad_output"], [7, 3, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda::num_features_per_rank"], [7, 1, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda"], [7, 3, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::cumsum_dim_sum_per_rank"], [7, 3, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::dim_sum_per_rank"], [7, 3, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::grad_output"], [7, 1, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu"], [7, 3, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu::dim_sum_per_rank"], [7, 3, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu::grad_output"], [7, 1, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda"], [7, 3, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda::dim_sum_per_rank"], [7, 3, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda::grad_output"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::A_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::BIAS_TYPE"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::B_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::DIRECT"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::FUSE_RELU"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::HAS_BIAS"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::Q_GRAN"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::block"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::inp"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::ld_in"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::ld_out"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::out"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::r"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::A_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::BIAS_TYPE"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::B_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::C_PER_G"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::FUSE_RELU"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::HAS_BIAS"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::Q_GRAN"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::block"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::inp"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::ld_in"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::ld_out"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::out"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::r"], [12, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::D_offsets"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::buffer_ids"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::cache_hash_size_cumsum"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::dev_weights"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::logical_table_ids"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::lxu_cache_weights"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_dev"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_offsets"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_placements"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_uvm"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::pruned_indices"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::pruned_indices_offsets"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::total_cache_hash_size"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::uvm_weights"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::weights_offsets"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::weights_placements"], [13, 5, 1, "_CPPv4N3ssd16EmbeddingRocksDBE", "ssd::EmbeddingRocksDB"], [13, 1, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::assigned_cache_slots"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::cache_set_inverse_indices"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::cache_set_sorted_unique_indices"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::inserted_ssd_weights"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::linear_index_inverse_indices"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::lxu_cache_locations"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::lxu_cache_weights"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::unique_indices_count_cumsum"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::unique_indices_length"], [13, 1, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::cache_set_inverse_indices_curr"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::inserted_ssd_weights_curr_next_map"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::inserted_ssd_weights_next"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::linear_index_inverse_indices_curr"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::lxu_cache_locations_curr"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::lxu_cache_weights"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::ssd_row_addrs_curr"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::unique_indices_count_cumsum_curr"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::unique_indices_length_curr"], [5, 1, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu"], [5, 3, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::include_last_offsets"], [5, 3, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::indices_list"], [5, 3, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::offsets_list"], [5, 3, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::per_sample_weights"], [8, 1, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise"], [8, 3, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise::cuda_memory_advise"], [8, 3, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise::self"], [8, 1, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorNSt8optionalI6TensorEE", "uvm_cuda_mem_prefetch_async"], [8, 3, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorNSt8optionalI6TensorEE", "uvm_cuda_mem_prefetch_async::device_t"], [8, 3, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorNSt8optionalI6TensorEE", "uvm_cuda_mem_prefetch_async::self"], [8, 1, 1, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor", "uvm_mem_advice_dont_fork"], [8, 3, 1, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor", "uvm_mem_advice_dont_fork::self"], [8, 1, 1, "_CPPv411uvm_storageRK6Tensor", "uvm_storage"], [8, 3, 1, "_CPPv411uvm_storageRK6Tensor", "uvm_storage::self"], [8, 1, 1, "_CPPv410uvm_to_cpuRK6Tensor", "uvm_to_cpu"], [8, 3, 1, "_CPPv410uvm_to_cpuRK6Tensor", "uvm_to_cpu::self"], [8, 1, 1, "_CPPv416uvm_to_cpu_cloneRK6Tensor", "uvm_to_cpu_clone"], [8, 3, 1, "_CPPv416uvm_to_cpu_cloneRK6Tensor", "uvm_to_cpu_clone::self"], [8, 1, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device"], [8, 3, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device::prototype"], [8, 3, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device::self"], [22, 6, 0, "-", "fbgemm_gpu"]], "fbgemm_gpu.docs.examples": [[29, 7, 1, "", "example_method"]], "fbgemm_gpu.permute_pooled_embedding_modules": [[19, 8, 1, "", "PermutePooledEmbeddings"]], "fbgemm_gpu.permute_pooled_embedding_modules.PermutePooledEmbeddings": [[19, 9, 1, "", "__call__"]], "fbgemm_gpu.split_table_batched_embeddings_ops_training": [[23, 8, 1, "", "SplitTableBatchedEmbeddingBagsCodegen"]], "fbgemm_gpu.split_table_batched_embeddings_ops_training.SplitTableBatchedEmbeddingBagsCodegen": [[23, 9, 1, "", "forward"], [23, 9, 1, "", "set_learning_rate"], [23, 9, 1, "", "set_optimizer_step"], [23, 9, 1, "", "split_embedding_weights"], [23, 9, 1, "", "split_optimizer_states"], [23, 9, 1, "", "update_hyper_parameters"]], "torch.ops.fbgemm": [[21, 7, 1, "", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf"], [22, 7, 1, "", "asynchronous_complete_cumsum"], [18, 7, 1, "", "batched_dense_vec_jagged_2d_mul"], [18, 7, 1, "", "dense_to_jagged"], [22, 7, 1, "", "expand_into_jagged_permute"], [18, 7, 1, "", "jagged_1d_to_dense"], [18, 7, 1, "", "jagged_2d_to_dense"], [18, 7, 1, "", "jagged_dense_dense_elementwise_add_jagged_output"], [18, 7, 1, "", "jagged_dense_elementwise_add"], [18, 7, 1, "", "jagged_dense_elementwise_add_jagged_output"], [18, 7, 1, "", "jagged_dense_elementwise_mul"], [18, 7, 1, "", "jagged_to_padded_dense"], [20, 7, 1, "", "merge_pooled_embeddings"], [22, 7, 1, "", "offsets_range"], [22, 7, 1, "", "permute_1D_sparse_data"], [22, 7, 1, "", "permute_2D_sparse_data"], [20, 7, 1, "", "permute_pooled_embs"], [18, 7, 1, "", "stacked_jagged_1d_to_dense"], [18, 7, 1, "", "stacked_jagged_2d_to_dense"]]}, "objtypes": {"0": "cpp:enumerator", "1": "cpp:function", "2": "cpp:templateParam", "3": "cpp:functionParam", "4": "cpp:enum", "5": "cpp:class", "6": "py:module", "7": "py:function", "8": "py:class", "9": "py:method"}, "objnames": {"0": ["cpp", "enumerator", "C++ enumerator"], "1": ["cpp", "function", "C++ function"], "2": ["cpp", "templateParam", "C++ template parameter"], "3": ["cpp", "functionParam", "C++ function parameter"], "4": ["cpp", "enum", "C++ enum"], "5": ["cpp", "class", "C++ class"], "6": ["py", "module", "Python module"], "7": ["py", "function", "Python function"], "8": ["py", "class", "Python class"], "9": ["py", "method", "Python method"]}, "titleterms": {"quantiz": [0, 10, 21], "util": 0, "refer": [0, 30], "implement": [0, 1], "method": [0, 1], "avx": 0, "2": 0, "512": 0, "tbe": [1, 23], "cpu": [1, 3, 6, 7, 10, 11, 14, 15], "autovector": 1, "fp8": 1, "16": 1, "32": 1, "autovec": 1, "build": [2, 14, 28], "instruct": [2, 14, 15, 16], "fbgemm": [2, 31], "requir": 2, "hardwar": 2, "softwar": 2, "depend": 2, "asmjit": 2, "cpuinfo": 2, "googletest": 2, "set": [2, 14, 15, 28], "up": [2, 14, 15, 28], "an": [2, 14], "isol": [2, 14], "environ": [2, 14, 15, 16, 28], "instal": [2, 14, 15], "tool": [2, 14], "c": [2, 14, 27, 31], "compil": [2, 14], "other": [2, 14, 30], "librari": [2, 15], "prepar": [2, 14], "linux": 2, "maco": 2, "cmake": 2, "gcc": [2, 14], "issu": [2, 25], "12": 2, "clang": [2, 14], "bazel": 2, "window": 2, "embed": [3, 9, 12, 13, 19, 20, 23], "oper": [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 17, 18, 20, 21, 22, 31], "cuda": [3, 6, 7, 8, 10, 11, 13, 14, 15, 16], "experiment": 4, "attent": 4, "combin": [5, 17], "input": 5, "jag": [6, 17, 18], "tensor": [6, 17, 18], "layout": 7, "transform": 7, "memori": 8, "pool": [9, 19, 20], "merg": 9, "permut": 9, "spars": [11, 22], "data": 11, "tabl": [12, 23], "batch": [12, 23], "ssd": 13, "miniconda": 14, "conda": [14, 15], "onli": [14, 15], "genai": 14, "docker": [14, 15], "imag": 14, "cudnn": 14, "cutlass": 14, "rocm": [14, 15, 16], "miopen": 14, "symlink": 14, "pytorch": [14, 15], "through": [14, 15], "pip": [14, 15], "post": [14, 15], "check": [14, 15], "triton": [14, 15], "pre": 14, "setup": [14, 16], "The": 14, "process": 14, "wheel": 14, "variabl": 14, "For": 14, "develop": [14, 31], "undefin": [14, 15], "symbol": [14, 15], "glibc": 14, "version": 14, "compat": 14, "nvidia": 15, "driver": 15, "contain": 15, "runtim": 15, "amdgpu": 15, "python": [15, 29, 31], "fbgemm_gpu": [15, 16, 28, 31], "packag": 15, "public": 15, "pypi": 15, "test": 16, "run": 16, "variant": 16, "benchmark": 16, "high": 17, "level": 17, "overview": [17, 31], "format": 17, "valu": 17, "offset": 17, "max": 17, "length": 17, "exampl": 17, "arithmet": 17, "convers": 17, "dens": 17, "modul": [19, 23, 31], "train": 23, "contact": 24, "u": 24, "github": 24, "slack": 24, "contribut": 25, "code": [25, 27, 29, 30], "conduct": 25, "pull": 25, "request": 25, "contributor": 25, "licens": [25, 26], "agreement": 25, "cla": 25, "ad": [27, 29, 30], "document": [27, 28, 29, 30, 31], "gener": [28, 29, 31], "guidelin": 28, "specif": 28, "guid": 28, "toolchain": 28, "lint": 28, "deploy": 28, "preview": 28, "todo": 29, "auto": 29, "sphinx": 30, "pointer": 30, "section": 30, "referenc": 30, "sourc": 30, "latex": 30, "graph": 30, "homepag": 31, "info": 31, "api": 31}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"Adding Documentation to Python Code": [[29, "adding-documentation-to-python-code"]], "Todo": [[29, "id1"]], "Adding Documentation to Auto-Generated Python Code": [[29, "adding-documentation-to-auto-generated-python-code"]], "Sphinx Documentation Pointers": [[30, "sphinx-documentation-pointers"]], "References Other Sections of the Documentation": [[30, "references-other-sections-of-the-documentation"]], "Referencing the Source Code": [[30, "referencing-the-source-code"]], "Adding LaTeX": [[30, "adding-latex"]], "Adding Graphs": [[30, "adding-graphs"]], "Documentation": [[28, "documentation"]], "General Documentation Guidelines": [[28, "general-documentation-guidelines"]], "Specific Documentation Guides": [[28, "specific-documentation-guides"]], "Building the Documentation": [[28, "building-the-documentation"]], "Set Up Build Environment": [[28, "set-up-build-environment"]], "Build FBGEMM_GPU": [[28, "build-fbgemm-gpu"]], "Set Up the Documentation Toolchain": [[28, "set-up-the-documentation-toolchain"]], "Build the Documentation": [[28, "build-the-documentation"]], "Linting the Documentation": [[28, "linting-the-documentation"]], "Deployment Preview": [[28, "deployment-preview"]], "FBGEMM and FBGEMM_GPU Documentation Homepage": [[31, "fbgemm-and-fbgemm-gpu-documentation-homepage"]], "General Info": [[31, null]], "FBGEMM Development": [[31, null]], "FBGEMM_GPU Development": [[31, null]], "FBGEMM_GPU Overview": [[31, null]], "FBGEMM C++ API": [[31, null]], "FBGEMM_GPU C++ API": [[31, null]], "FBGEMM_GPU Python Operators API": [[31, null]], "FBGEMM_GPU Python Modules API": [[31, null]], "Jagged Tensor Operators": [[17, "jagged-tensor-operators"], [6, "jagged-tensor-operators"], [18, "module-fbgemm_gpu"]], "High Level Overview": [[17, "high-level-overview"]], "Jagged Tensor Format": [[17, "jagged-tensor-format"]], "Values": [[17, "values"]], "Offsets": [[17, "offsets"]], "Max Lengths": [[17, "max-lengths"]], "Jagged Tensor Example": [[17, "jagged-tensor-example"]], "Jagged Tensor Operations": [[17, "jagged-tensor-operations"]], "Arithmetic Operations": [[17, "arithmetic-operations"]], "Conversion Operations": [[17, "conversion-operations"]], "Jagged to Dense": [[17, "jagged-to-dense"]], "Dense to Jagged": [[17, "dense-to-jagged"]], "Combined Arithmetic + Conversion Operations": [[17, "combined-arithmetic-conversion-operations"]], "Test Instructions": [[16, "test-instructions"]], "Setup the FBGEMM_GPU Test Environment": [[16, "setup-the-fbgemm-gpu-test-environment"]], "Running FBGEMM_GPU Tests": [[16, "running-fbgemm-gpu-tests"]], "Testing with the CUDA Variant": [[16, "testing-with-the-cuda-variant"]], "Testing with the ROCm Variant": [[16, "testing-with-the-rocm-variant"]], "Running FBGEMM_GPU Benchmarks": [[16, "running-fbgemm-gpu-benchmarks"]], "Pooled Embedding Modules": [[19, "module-fbgemm_gpu"]], "Table Batched Embedding (TBE) Training Module": [[23, "table-batched-embedding-tbe-training-module"]], "Build Instructions": [[14, "build-instructions"], [2, "build-instructions"]], "Set Up an Isolated Build Environment": [[14, "set-up-an-isolated-build-environment"], [2, "set-up-an-isolated-build-environment"]], "Install Miniconda": [[14, "install-miniconda"]], "Set Up the Conda Environment": [[14, "set-up-the-conda-environment"]], "Set Up for CPU-Only Build": [[14, "set-up-for-cpu-only-build"]], "Set Up for CUDA / GenAI-Only Build": [[14, "set-up-for-cuda-genai-only-build"]], "CUDA Docker Image": [[14, "cuda-docker-image"]], "Install CUDA": [[14, "install-cuda"]], "Install cuDNN": [[14, "install-cudnn"]], "Install CUTLASS": [[14, "install-cutlass"]], "Set Up for ROCm Build": [[14, "set-up-for-rocm-build"]], "ROCm Docker Image": [[14, "rocm-docker-image"]], "Install ROCm": [[14, "install-rocm"]], "Install MIOpen": [[14, "install-miopen"]], "Install the Build Tools": [[14, "install-the-build-tools"], [2, "install-the-build-tools"]], "C/C++ Compiler (GCC)": [[14, "c-c-compiler-gcc"]], "C/C++ Compiler (Clang)": [[14, "c-c-compiler-clang"]], "Compiler Symlinks": [[14, "compiler-symlinks"]], "Other Build Tools": [[14, "other-build-tools"], [2, "other-build-tools"]], "Install PyTorch": [[14, "install-pytorch"], [15, "install-pytorch"]], "Installation Through Conda": [[14, "installation-through-conda"]], "Installation Through PyTorch PIP": [[14, "installation-through-pytorch-pip"]], "Post-Install Checks": [[14, "post-install-checks"]], "Install PyTorch-Triton": [[14, "install-pytorch-triton"]], "Other Pre-Build Setup": [[14, "other-pre-build-setup"]], "Preparing the Build": [[14, "preparing-the-build"], [2, "preparing-the-build"]], "The Build Process": [[14, "the-build-process"]], "Set Wheel Build Variables": [[14, "set-wheel-build-variables"]], "CPU-Only Build": [[14, "cpu-only-build"]], "CUDA Build": [[14, "cuda-build"]], "GenAI-Only Build": [[14, "genai-only-build"]], "ROCm Build": [[14, "rocm-build"]], "Post-Build Checks (For Developers)": [[14, "post-build-checks-for-developers"]], "Undefined Symbols Check": [[14, "undefined-symbols-check"]], "GLIBC Version Compatibility Check": [[14, "glibc-version-compatibility-check"]], "Installation Instructions": [[15, "installation-instructions"]], "Set Up CPU-Only Environment": [[15, "set-up-cpu-only-environment"]], "Set Up CUDA Environment": [[15, "set-up-cuda-environment"]], "Install NVIDIA Drivers": [[15, "install-nvidia-drivers"]], "Set Up the CUDA Docker Container and Conda Environment": [[15, "set-up-the-cuda-docker-container-and-conda-environment"]], "Install the CUDA Runtime": [[15, "install-the-cuda-runtime"]], "Set Up ROCm Environment": [[15, "set-up-rocm-environment"]], "Install AMDGPU Drivers": [[15, "install-amdgpu-drivers"]], "Set Up the ROCm Docker Container and Conda Environment": [[15, "set-up-the-rocm-docker-container-and-conda-environment"]], "Install Python Libraries": [[15, "install-python-libraries"]], "Install Triton": [[15, "install-triton"]], "Install the FBGEMM_GPU Package": [[15, "install-the-fbgemm-gpu-package"]], "Install through PyTorch PIP": [[15, "install-through-pytorch-pip"]], "Install through Public PyPI": [[15, "install-through-public-pypi"]], "Post-Installation Checks": [[15, "post-installation-checks"]], "Undefined Symbols": [[15, "undefined-symbols"]], "SSD Embedding Operators": [[13, "ssd-embedding-operators"]], "CUDA Operators": [[13, "cuda-operators"], [7, "cuda-operators"], [6, "cuda-operators"], [11, "cuda-operators"], [10, "cuda-operators"], [3, "cuda-operators"]], "Table Batched Embedding Operators": [[12, "table-batched-embedding-operators"]], "Combine Input Operators": [[5, "combine-input-operators"]], "Layout Transformation Operators": [[7, "layout-transformation-operators"]], "CPU Operators": [[7, "cpu-operators"], [6, "cpu-operators"], [11, "cpu-operators"], [10, "cpu-operators"], [3, "cpu-operators"]], "Experimental Operators": [[4, "experimental-operators"]], "Attention Operators": [[4, "attention-operators"]], "Sparse Data Operators": [[11, "sparse-data-operators"]], "Pooled Embeddings Operators": [[9, "pooled-embeddings-operators"]], "Merge Operators": [[9, "merge-operators"]], "Permutation Operators": [[9, "permutation-operators"]], "CUDA Memory Operators": [[8, "cuda-memory-operators"]], "Quantization Operators": [[10, "quantization-operators"], [21, "module-fbgemm_gpu"]], "Contact Us": [[24, "contact-us"]], "GitHub": [[24, "github"]], "Slack": [[24, "slack"]], "Contributing": [[25, "contributing"]], "Code of Conduct": [[25, "code-of-conduct"]], "Pull Requests": [[25, "pull-requests"]], "Contributor License Agreement (\u201cCLA\u201d)": [[25, "contributor-license-agreement-cla"]], "Issues": [[25, "issues"]], "License": [[25, "license"], [26, "license"]], "Adding Documentation to C++ Code": [[27, "adding-documentation-to-c-code"]], "TBE CPU Autovectorization": [[1, "tbe-cpu-autovectorization"]], "FP8/16/32 Autovec Implementation Methods": [[1, "fp8-16-32-autovec-implementation-methods"]], "Quantization Utilities": [[0, "quantization-utilities"]], "Reference Implementation Methods": [[0, "reference-implementation-methods"]], "AVX-2 Implementation Methods": [[0, "avx-2-implementation-methods"]], "AVX-512 Implementation Methods": [[0, "avx-512-implementation-methods"]], "FBGEMM Requirements": [[2, "fbgemm-requirements"]], "Hardware Requirements": [[2, "hardware-requirements"]], "Software Dependencies": [[2, "software-dependencies"]], "asmjit": [[2, "asmjit"]], "cpuinfo": [[2, "cpuinfo"]], "GoogleTest": [[2, "googletest"]], "C/C++ Compiler": [[2, "c-c-compiler"]], "Build the FBGEMM Library": [[2, "build-the-fbgemm-library"]], "Building on Linux and macOS (CMake + GCC)": [[2, "building-on-linux-and-macos-cmake-gcc"]], "Build Issues with GCC 12+": [[2, "build-issues-with-gcc-12"]], "Building on Linux and macOS (CMake + Clang)": [[2, "building-on-linux-and-macos-cmake-clang"]], "Building on Linux (Bazel)": [[2, "building-on-linux-bazel"]], "Building on Windows": [[2, "building-on-windows"]], "Embedding Operators": [[3, "embedding-operators"]], "Pooled Embedding Operators": [[20, "module-fbgemm_gpu"]], "Sparse Operators": [[22, "module-fbgemm_gpu"]]}, "indexentries": {"findminmax (c++ function)": [[0, "_CPPv410FindMinMaxPKfPfPf7int64_t"]], "floatorhalftofusednbitrowwisequantizedsbhalf (c++ function)": [[0, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE"]], "fusedquantizedequantize (c++ function)": [[0, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif"]], "quantizegroupwise (c++ function)": [[0, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T"]], "xor128 (c++ function)": [[0, "_CPPv46Xor128v"]], "requantizeoutputprocessingavx2 (c++ function)": [[0, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE"]], "requantizeoutputprocessinggconvavx512 (c++ function)": [[0, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE"]], "embeddingspmdmfp8_autovec (c++ function)": [[1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib"]], "embeddingspmdm_autovec (c++ function)": [[1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb"]], "bounds_check_indices_cuda (c++ function)": [[3, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t"]], "int_nbit_split_embedding_codegen_lookup_function (c++ function)": [[3, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE"]], "int_nbit_split_embedding_codegen_lookup_function_cpu (c++ function)": [[3, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE"]], "int_nbit_split_embedding_uvm_caching_codegen_lookup_function (c++ function)": [[3, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE"]], "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu (c++ function)": [[3, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE"]], "pruned_array_lookup_cpu (c++ function)": [[3, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor"]], "pruned_array_lookup_cuda (c++ function)": [[3, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_insert_unweighted_cpu (c++ function)": [[3, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_lookup_cuda (c++ function)": [[3, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_lookup_unweighted_cpu (c++ function)": [[3, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor"]], "gqa_attn_splitk (c++ function)": [[4, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t"]], "padding_fused_tbe_input_combine_cpu (c++ function)": [[5, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t"]], "tbe_input_combine_cpu (c++ function)": [[5, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE"]], "batched_dense_vec_jagged_2d_mul (c++ function)": [[6, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor"]], "dense_to_jagged (c++ function)": [[6, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEENSt8optionalIN2at6SymIntEEE"]], "jagged_1d_to_dense (c++ function)": [[6, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t"]], "jagged_2d_to_dense (c++ function)": [[6, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE"]], "jagged_dense_elementwise_add (c++ function)": [[6, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_add_jagged_output (c++ function)": [[6, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_add_jagged_output_cuda (c++ function)": [[6, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_mul (c++ function)": [[6, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_to_padded_dense (c++ function)": [[6, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd"]], "jagged_to_padded_dense_forward (c++ function)": [[6, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd"]], "recat_embedding_grad_output_cuda (c++ function)": [[7, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE"]], "recat_embedding_grad_output_mixed_d_batch_cuda (c++ function)": [[7, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor"]], "recat_embedding_grad_output_mixed_d_cpu (c++ function)": [[7, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE"]], "recat_embedding_grad_output_mixed_d_cuda (c++ function)": [[7, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE"]], "is_uvm_tensor (c++ function)": [[8, "_CPPv413is_uvm_tensorRK6Tensor"]], "new_host_mapped_tensor (c++ function)": [[8, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_managed_tensor (c++ function)": [[8, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_managed_tensor_meta (c++ function)": [[8, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_unified_tensor (c++ function)": [[8, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb"]], "new_unified_tensor_meta (c++ function)": [[8, "_CPPv423new_unified_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEEb"]], "new_vanilla_managed_tensor (c++ function)": [[8, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "uvm_cuda_mem_advise (c++ function)": [[8, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t"]], "uvm_cuda_mem_prefetch_async (c++ function)": [[8, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorNSt8optionalI6TensorEE"]], "uvm_mem_advice_dont_fork (c++ function)": [[8, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor"]], "uvm_storage (c++ function)": [[8, "_CPPv411uvm_storageRK6Tensor"]], "uvm_to_cpu (c++ function)": [[8, "_CPPv410uvm_to_cpuRK6Tensor"]], "uvm_to_cpu_clone (c++ function)": [[8, "_CPPv416uvm_to_cpu_cloneRK6Tensor"]], "uvm_to_device (c++ function)": [[8, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor"]], "all_to_one_device (c++ function)": [[9, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE"]], "permute_pooled_embs_auto_grad (c++ function)": [[9, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_cpu (c++ function)": [[9, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_gpu (c++ function)": [[9, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_split_cpu (c++ function)": [[9, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_auto_grad_split_gpu (c++ function)": [[9, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_cpu_impl (c++ function)": [[9, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb"]], "permute_pooled_embs_split_cpu (c++ function)": [[9, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_split_gpu (c++ function)": [[9, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "fp8quantizedtofloat_ref (c++ function)": [[10, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi"]], "fp8rowwise_to_float_cpu (c++ function)": [[10, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t"]], "floattofp8quantized_ref (c++ function)": [[10, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd"]], "_fp8rowwise_to_float_gpu (c++ function)": [[10, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t"]], "_bfloat16_to_float_gpu (c++ function)": [[10, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE"]], "_float_to_fp8rowwise_gpu (c++ function)": [[10, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb"]], "_float_to_bfloat16_gpu (c++ function)": [[10, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE"]], "_float_to_fused8bitrowwise_cpu_out (c++ function)": [[10, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor"]], "_float_to_fused8bitrowwise_gpu (c++ function)": [[10, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor"]], "_float_to_fusednbitrowwise_gpu (c++ function)": [[10, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t"]], "_float_to_hfp8_gpu (c++ function)": [[10, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd"]], "_float_to_msfp_gpu (c++ function)": [[10, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd"]], "_float_to_paddedfp8rowwise_gpu (c++ function)": [[10, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t"]], "_fused8bitrowwise_to_float_cpu_out (c++ function)": [[10, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor"]], "_fused8bitrowwise_to_float_gpu (c++ function)": [[10, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE"]], "_fused8bitrowwise_to_float_mixed_dim_gpu (c++ function)": [[10, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t"]], "_fused8bitrowwise_to_half_gpu (c++ function)": [[10, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE"]], "_fused8bitrowwise_to_single_or_half_precision_gpu (c++ function)": [[10, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb"]], "_fusednbitrowwise_to_float_gpu (c++ function)": [[10, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t"]], "_fusednbitrowwise_to_half_gpu (c++ function)": [[10, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t"]], "_fusednbitrowwise_to_single_or_half_precision_gpu (c++ function)": [[10, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t"]], "_half_to_fused8bitrowwise_gpu (c++ function)": [[10, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor"]], "_half_to_fusednbitrowwise_gpu (c++ function)": [[10, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t"]], "_hfp8_to_float_gpu (c++ function)": [[10, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t"]], "_msfp_to_float_gpu (c++ function)": [[10, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t"]], "_paddedfp8rowwise_to_float_gpu (c++ function)": [[10, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t"]], "_single_or_half_precision_to_fused8bitrowwise_gpu (c++ function)": [[10, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor"]], "_single_or_half_precision_to_fusednbitrowwise_gpu (c++ function)": [[10, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t"]], "float_or_half_to_fused8bitrowwise_cpu (c++ function)": [[10, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor"]], "float_to_fp8rowwise_cpu (c++ function)": [[10, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb"]], "float_to_fused8bitrowwise_cpu (c++ function)": [[10, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor"]], "fused8bitrowwise_to_float_cpu (c++ function)": [[10, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor"]], "fused8bitrowwise_to_float_or_half_cpu (c++ function)": [[10, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb"]], "fused8bitrowwise_to_half_cpu (c++ function)": [[10, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor"]], "fusednbitrowwise_sbfront_to_float_cpu (c++ function)": [[10, "_CPPv437fusednbitrowwise_sbfront_to_float_cpuRK6TensorK7int64_t"]], "fusednbitrowwise_to_float_cpu (c++ function)": [[10, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t"]], "fusednbitrowwise_to_float_or_half_cpu (c++ function)": [[10, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t"]], "fusednbitrowwise_to_half_cpu (c++ function)": [[10, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t"]], "half_to_fused8bitrowwise_cpu (c++ function)": [[10, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor"]], "expand_into_jagged_permute_cuda (c++ function)": [[11, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t"]], "generic_histogram_binning_calibration_by_feature_cpu (c++ function)": [[11, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td"]], "histogram_binning_calibration_cpu (c++ function)": [[11, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td"]], "direct_mapped_lru_cache_populate_byte_cuda (c++ function)": [[12, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE"]], "direct_mapped_lxu_cache_lookup_cuda (c++ function)": [[12, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE"]], "get_unique_indices_cuda (c++ function)": [[12, "_CPPv423get_unique_indices_cudaRKN2at6TensorEK7int64_tKb"]], "get_unique_indices_with_inverse_cuda (c++ function)": [[12, "_CPPv436get_unique_indices_with_inverse_cudaRKN2at6TensorEK7int64_tKbKb"]], "host_lxu_cache_slot (c++ function)": [[12, "_CPPv419host_lxu_cache_slot7int64_t7int64_t"]], "lfu_cache_populate_byte_cuda (c++ function)": [[12, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t"]], "lfu_cache_populate_cuda (c++ function)": [[12, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb"]], "linearize_cache_indices_cuda (c++ function)": [[12, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t"]], "linearize_cache_indices_from_row_idx_cuda (c++ function)": [[12, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE"]], "lru_cache_find_uncached_cuda (c++ function)": [[12, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb"]], "lru_cache_populate_byte_cuda (c++ function)": [[12, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE"]], "lru_cache_populate_cuda (c++ function)": [[12, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE"]], "lxu_cache_flush_cuda (c++ function)": [[12, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb"]], "lxu_cache_locations_update_cuda (c++ function)": [[12, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorENSt8optionalIN2at6TensorEEE"]], "lxu_cache_locking_counter_decrement_cuda (c++ function)": [[12, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE"]], "lxu_cache_lookup_cuda (c++ function)": [[12, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE"]], "reset_weight_momentum_cuda (c++ function)": [[12, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t"]], "rocksdbwritemode (c++ enum)": [[13, "_CPPv416RocksdbWriteMode"]], "rocksdbwritemode::bwd_l1_cnflct_miss_write_back (c++ enumerator)": [[13, "_CPPv4N16RocksdbWriteMode29BWD_L1_CNFLCT_MISS_WRITE_BACKE"]], "rocksdbwritemode::flush (c++ enumerator)": [[13, "_CPPv4N16RocksdbWriteMode5FLUSHE"]], "rocksdbwritemode::fwd_l1_eviction (c++ enumerator)": [[13, "_CPPv4N16RocksdbWriteMode15FWD_L1_EVICTIONE"]], "rocksdbwritemode::fwd_rocksdb_read (c++ enumerator)": [[13, "_CPPv4N16RocksdbWriteMode16FWD_ROCKSDB_READE"]], "compact_indices_cuda (c++ function)": [[13, "_CPPv420compact_indices_cudaNSt6vectorI6TensorEE6TensorNSt6vectorI6TensorEE6Tensor6Tensor"]], "cuda_callback_func (c++ function)": [[13, "_CPPv418cuda_callback_func12cudaStream_t11cudaError_tPv"]], "hash_shard (c++ function)": [[13, "_CPPv410hash_shard7int64_t6size_t"]], "kv_db::cachecontext (c++ class)": [[13, "_CPPv4N5kv_db12CacheContextE"]], "kv_db::embeddingkvdb (c++ class)": [[13, "_CPPv4N5kv_db13EmbeddingKVDBE"]], "kv_db::queueitem (c++ struct)": [[13, "_CPPv4N5kv_db9QueueItemE"]], "l2_cache::cachelibcache (c++ class)": [[13, "_CPPv4N8l2_cache13CacheLibCacheE"]], "masked_index_put_cuda (c++ function)": [[13, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t"]], "masked_index_select_cuda (c++ function)": [[13, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t"]], "ps::embeddingparameterserver (c++ class)": [[13, "_CPPv4N2ps24EmbeddingParameterServerE"]], "ssd::embeddingrocksdb (c++ class)": [[13, "_CPPv4N3ssd16EmbeddingRocksDBE"]], "ssd_generate_row_addrs_cuda (c++ function)": [[13, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "ssd_update_row_addrs_cuda (c++ function)": [[13, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "batched_dense_vec_jagged_2d_mul() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.batched_dense_vec_jagged_2d_mul"]], "dense_to_jagged() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.dense_to_jagged"]], "fbgemm_gpu": [[18, "module-fbgemm_gpu"], [19, "module-fbgemm_gpu"], [20, "module-fbgemm_gpu"], [21, "module-fbgemm_gpu"], [22, "module-fbgemm_gpu"]], "jagged_1d_to_dense() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.jagged_1d_to_dense"]], "jagged_2d_to_dense() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.jagged_2d_to_dense"]], "jagged_dense_dense_elementwise_add_jagged_output() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.jagged_dense_dense_elementwise_add_jagged_output"]], "jagged_dense_elementwise_add() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.jagged_dense_elementwise_add"]], "jagged_dense_elementwise_add_jagged_output() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.jagged_dense_elementwise_add_jagged_output"]], "jagged_dense_elementwise_mul() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.jagged_dense_elementwise_mul"]], "jagged_to_padded_dense() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.jagged_to_padded_dense"]], "module": [[18, "module-fbgemm_gpu"], [19, "module-fbgemm_gpu"], [20, "module-fbgemm_gpu"], [21, "module-fbgemm_gpu"], [22, "module-fbgemm_gpu"]], "stacked_jagged_1d_to_dense() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.stacked_jagged_1d_to_dense"]], "stacked_jagged_2d_to_dense() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.stacked_jagged_2d_to_dense"]], "permutepooledembeddings (class in fbgemm_gpu.permute_pooled_embedding_modules)": [[19, "fbgemm_gpu.permute_pooled_embedding_modules.PermutePooledEmbeddings"]], "__call__() (fbgemm_gpu.permute_pooled_embedding_modules.permutepooledembeddings method)": [[19, "fbgemm_gpu.permute_pooled_embedding_modules.PermutePooledEmbeddings.__call__"]], "merge_pooled_embeddings() (in module torch.ops.fbgemm)": [[20, "torch.ops.fbgemm.merge_pooled_embeddings"]], "permute_pooled_embs() (in module torch.ops.fbgemm)": [[20, "torch.ops.fbgemm.permute_pooled_embs"]], "floatorhalftofusednbitrowwisequantizedsbhalf() (in module torch.ops.fbgemm)": [[21, "torch.ops.fbgemm.FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf"]], "asynchronous_complete_cumsum() (in module torch.ops.fbgemm)": [[22, "torch.ops.fbgemm.asynchronous_complete_cumsum"]], "expand_into_jagged_permute() (in module torch.ops.fbgemm)": [[22, "torch.ops.fbgemm.expand_into_jagged_permute"]], "offsets_range() (in module torch.ops.fbgemm)": [[22, "torch.ops.fbgemm.offsets_range"]], "permute_1d_sparse_data() (in module torch.ops.fbgemm)": [[22, "torch.ops.fbgemm.permute_1D_sparse_data"]], "permute_2d_sparse_data() (in module torch.ops.fbgemm)": [[22, "torch.ops.fbgemm.permute_2D_sparse_data"]], "splittablebatchedembeddingbagscodegen (class in fbgemm_gpu.split_table_batched_embeddings_ops_training)": [[23, "fbgemm_gpu.split_table_batched_embeddings_ops_training.SplitTableBatchedEmbeddingBagsCodegen"]], "forward() (fbgemm_gpu.split_table_batched_embeddings_ops_training.splittablebatchedembeddingbagscodegen method)": [[23, "fbgemm_gpu.split_table_batched_embeddings_ops_training.SplitTableBatchedEmbeddingBagsCodegen.forward"]], "set_learning_rate() (fbgemm_gpu.split_table_batched_embeddings_ops_training.splittablebatchedembeddingbagscodegen method)": [[23, "fbgemm_gpu.split_table_batched_embeddings_ops_training.SplitTableBatchedEmbeddingBagsCodegen.set_learning_rate"]], "set_optimizer_step() (fbgemm_gpu.split_table_batched_embeddings_ops_training.splittablebatchedembeddingbagscodegen method)": [[23, "fbgemm_gpu.split_table_batched_embeddings_ops_training.SplitTableBatchedEmbeddingBagsCodegen.set_optimizer_step"]], "split_embedding_weights() (fbgemm_gpu.split_table_batched_embeddings_ops_training.splittablebatchedembeddingbagscodegen method)": [[23, "fbgemm_gpu.split_table_batched_embeddings_ops_training.SplitTableBatchedEmbeddingBagsCodegen.split_embedding_weights"]], "split_optimizer_states() (fbgemm_gpu.split_table_batched_embeddings_ops_training.splittablebatchedembeddingbagscodegen method)": [[23, "fbgemm_gpu.split_table_batched_embeddings_ops_training.SplitTableBatchedEmbeddingBagsCodegen.split_optimizer_states"]], "update_hyper_parameters() (fbgemm_gpu.split_table_batched_embeddings_ops_training.splittablebatchedembeddingbagscodegen method)": [[23, "fbgemm_gpu.split_table_batched_embeddings_ops_training.SplitTableBatchedEmbeddingBagsCodegen.update_hyper_parameters"]], "example_method (c++ function)": [[27, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf"]], "example_method() (in module fbgemm_gpu.docs.examples)": [[29, "fbgemm_gpu.docs.examples.example_method"]]}}) \ No newline at end of file