diff --git a/docs/source/installation/linux.md b/docs/source/installation/linux.md index 37527f6cc2f..f2a37297ef1 100644 --- a/docs/source/installation/linux.md +++ b/docs/source/installation/linux.md @@ -56,3 +56,15 @@ There are some known limitations when you pip install pre-built TensorRT LLM whe when OMPI was not configured --with-slurm and we weren't able to discover a SLURM installation in the usual places. ``` + +2. Prevent `pip` from replacing existing PyTorch installation + + On certain systems, particularly Ubuntu 22.04, users installing TensorRT LLM would find that their existing, CUDA 13.0 compatible PyTorch installation (e.g., `torch==2.9.0+cu130`) was being uninstalled by `pip`. It was then replaced by a CUDA 12.8 version (`torch==2.9.0`), causing the TensorRT LLM installation to be unusable and leading to runtime errors. + + The solution is to create a `pip` constraints file, locking `torch` to the currently installed version. Here is an example of how this can be done manually: + + ```bash + CURRENT_TORCH_VERSION=$(python3 -c "import torch; print(torch.__version__)") + echo "torch==$CURRENT_TORCH_VERSION" > /tmp/torch-constraint.txt + pip3 install --upgrade pip setuptools && pip3 install tensorrt_llm -c /tmp/torch-constraint.txt + ``` diff --git a/requirements.txt b/requirements.txt index 0dc90b39a7a..d200e1fc1b9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,7 @@ diffusers>=0.27.0 lark mpi4py numpy<2 -onnx>=1.18.0 +onnx>=1.18.0,<1.20.0 onnx_graphsurgeon>=0.5.2 openai polygraphy @@ -19,8 +19,6 @@ pandas h5py==3.12.1 StrEnum sentencepiece>=0.1.99 -# WAR for tensorrt depending on the archived nvidia-cuda-runtime-cu13 package -nvidia-cuda-runtime-cu13==0.0.0a0 tensorrt~=10.13.0 # https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-10.html#rel-25-10 uses 2.9.0a0. torch>=2.9.0a0,<=2.9.0 diff --git a/tests/unittest/test_pip_install.py b/tests/unittest/test_pip_install.py index 87c21d4f634..ca780811ce4 100644 --- a/tests/unittest/test_pip_install.py +++ b/tests/unittest/test_pip_install.py @@ -42,6 +42,51 @@ def download_wheel(args): shell=True) +def install_tensorrt_llm(): + """ + Installs the tensorrt_llm wheel, dynamically creating a torch constraint + if torch is already installed to prevent it from being replaced. + """ + print("########## Install tensorrt_llm package ##########") + + install_command = "pip3 install tensorrt_llm-*.whl" + + # Always check for an existing torch installation, regardless of OS. + try: + print("Checking for existing torch installation...") + torch_version_result = subprocess.run( + ['python3', '-c', 'import torch; print(torch.__version__)'], + capture_output=True, + text=True, + check=True) + torch_version = torch_version_result.stdout.strip() + + if torch_version: + print(f"Found installed torch version: {torch_version}") + constraint_filename = "torch-constraint.txt" + with open(constraint_filename, "w") as f: + f.write(f"torch=={torch_version}\n") + print( + f"Created {constraint_filename} to constrain torch to version {torch_version}." + ) + + # Modify install command to use the constraint + install_command += f" -c {constraint_filename}" + else: + # This case is unlikely if the subprocess call succeeds + print( + "Could not determine installed torch version. Installing without constraint." + ) + + except (subprocess.CalledProcessError, FileNotFoundError): + # This handles cases where python3 fails or 'import torch' raises an error. + print("Torch is not installed. Proceeding without constraint.") + + # Execute the final installation command + print(f"Executing command: {install_command}") + subprocess.check_call(install_command, shell=True) + + def test_pip_install(): parser = argparse.ArgumentParser(description="Check Pip Install") parser.add_argument("--wheel_path", @@ -62,8 +107,8 @@ def test_pip_install(): shell=True) download_wheel(args) - print("########## Install tensorrt_llm package ##########") - subprocess.check_call("pip3 install tensorrt_llm-*.whl", shell=True) + install_tensorrt_llm() + print("########## Test import tensorrt_llm ##########") subprocess.check_call( 'python3 -c "import tensorrt_llm; print(tensorrt_llm.__version__)"',