From 55738303df66abe426b342dc625c32820acb97aa Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Tue, 23 Sep 2025 15:35:42 +0000
Subject: [PATCH 01/21] Update dependencies and TensorRT installation

Co-authored-by: victorgelias <victorgelias@gmail.com>
---
 demo/realtime-img2img/requirements.txt        |  4 +++-
 setup.py                                      |  4 ++++
 src/streamdiffusion/tools/install-tensorrt.py | 17 ++++++++++++++---
 3 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/demo/realtime-img2img/requirements.txt b/demo/realtime-img2img/requirements.txt
index a379a58e..9328e589 100644
--- a/demo/realtime-img2img/requirements.txt
+++ b/demo/realtime-img2img/requirements.txt
@@ -7,7 +7,9 @@ fastapi==0.115.0
 uvicorn[standard]==0.32.0
 Pillow==10.5.0
 compel==2.0.2
-controlnet-aux==0.0.7
+controlnet-aux==0.0.10
+mediapipe==0.10.21
+insightface==0.7.3
 xformers; sys_platform != 'darwin' or platform_machine != 'arm64'
 markdown2
 PyYAML
diff --git a/setup.py b/setup.py
index 6338e387..503905dd 100644
--- a/setup.py
+++ b/setup.py
@@ -40,6 +40,10 @@ def deps_list(*pkgs):
     deps["diffusers"],
     deps["transformers"],
     deps["accelerate"],
+    # Required preprocessors/features (pin to known-good versions)
+    "controlnet-aux==0.0.10",
+    "mediapipe==0.10.21",
+    "insightface==0.7.3",
     "diffusers-ipadapter @ git+https://github.com/livepeer/Diffusers_IPAdapter.git@405f87da42932e30bd55ee8dca3ce502d7834a99",
 ]
 
diff --git a/src/streamdiffusion/tools/install-tensorrt.py b/src/streamdiffusion/tools/install-tensorrt.py
index 182871c4..34e6e820 100644
--- a/src/streamdiffusion/tools/install-tensorrt.py
+++ b/src/streamdiffusion/tools/install-tensorrt.py
@@ -23,15 +23,26 @@ def install(cu: Optional[Literal["11", "12"]] = get_cuda_version_from_torch()):
     print("Installing TensorRT requirements...")
 
     if is_installed("tensorrt"):
-        if version("tensorrt") < Version("9.0.0"):
-            run_pip("uninstall -y tensorrt")
+        try:
+            if version("tensorrt") and version("tensorrt") < Version("9.0.0"):
+                run_pip("uninstall -y tensorrt")
+        except Exception:
+            # best-effort cleanup; proceed with install
+            pass
 
     cudnn_name = f"nvidia-cudnn-cu{cu}==8.9.4.25"
 
     if not is_installed("tensorrt"):
+        # Ensure CuDNN for the correct CUDA major is present
         run_pip(f"install {cudnn_name} --no-cache-dir")
+        # Install a stable, known-good TensorRT build from NVIDIA PyPI for CUDA {cu}
+        # Post11 builds are for CUDA 12.x; for CUDA 11 we fallback to the matching 8.x series
+        if cu == "12":
+            trt_spec = "tensorrt==9.1.0.post12"  # stable CUDA 12 build
+        else:
+            trt_spec = "tensorrt==8.6.1"  # last stable for CUDA 11 runtime
         run_pip(
-            "install --pre --extra-index-url https://pypi.nvidia.com tensorrt==9.0.1.post11.dev4 --no-cache-dir"
+            f"install --extra-index-url https://pypi.nvidia.com {trt_spec} --no-cache-dir"
         )
 
     if not is_installed("polygraphy"):

From 822e1c6c35691b422c34b58777a784a4ca06413e Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Tue, 23 Sep 2025 15:53:20 +0000
Subject: [PATCH 02/21] Update dependencies and TensorRT installation

Co-authored-by: victorgelias <victorgelias@gmail.com>
---
 demo/realtime-img2img/requirements.txt        |  3 ---
 setup.py                                      | 20 +++++++++++--------
 src/streamdiffusion/tools/install-tensorrt.py | 19 ++++++++----------
 3 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/demo/realtime-img2img/requirements.txt b/demo/realtime-img2img/requirements.txt
index 9328e589..487d92a6 100644
--- a/demo/realtime-img2img/requirements.txt
+++ b/demo/realtime-img2img/requirements.txt
@@ -7,9 +7,6 @@ fastapi==0.115.0
 uvicorn[standard]==0.32.0
 Pillow==10.5.0
 compel==2.0.2
-controlnet-aux==0.0.10
-mediapipe==0.10.21
-insightface==0.7.3
 xformers; sys_platform != 'darwin' or platform_machine != 'arm64'
 markdown2
 PyYAML
diff --git a/setup.py b/setup.py
index 503905dd..e3450e2c 100644
--- a/setup.py
+++ b/setup.py
@@ -6,15 +6,18 @@
 
 _deps = [
     "torch",
-    "xformers",
-    "diffusers>=0.31.0",
-    "transformers",
-    "accelerate",
+    "xformers==0.0.30",
+    "diffusers==0.35.0",
+    "transformers==4.56.0",
+    "accelerate==1.10.0",
+    "huggingface_hub==0.35.0",
+    "Pillow==10.5.0",
     "fire",
     "omegaconf",
     "cuda-python==12.9.0",
-    "onnx>=1.15.0",
-    "onnxruntime>=1.16.3",
+    "onnx==1.18.0",
+    "onnxruntime==1.22.0",
+    "onnxruntime-gpu==1.22.0",
     "protobuf>=3.20.2",
     "colored",
     "pywin32;sys_platform == 'win32'"
@@ -30,7 +33,7 @@ def deps_list(*pkgs):
 extras = {}
 extras["xformers"] = deps_list("xformers")
 extras["torch"] = deps_list("torch", "accelerate")
-extras["tensorrt"] = deps_list("protobuf", "cuda-python", "onnx", "onnxruntime", "colored")
+extras["tensorrt"] = deps_list("protobuf", "cuda-python", "onnx", "onnxruntime", "onnxruntime-gpu", "colored")
 
 extras["dev"] = extras["xformers"] + extras["torch"] + extras["tensorrt"]
 
@@ -40,7 +43,8 @@ def deps_list(*pkgs):
     deps["diffusers"],
     deps["transformers"],
     deps["accelerate"],
-    # Required preprocessors/features (pin to known-good versions)
+    deps["huggingface_hub"],
+    deps["Pillow"],
     "controlnet-aux==0.0.10",
     "mediapipe==0.10.21",
     "insightface==0.7.3",
diff --git a/src/streamdiffusion/tools/install-tensorrt.py b/src/streamdiffusion/tools/install-tensorrt.py
index 34e6e820..d4685656 100644
--- a/src/streamdiffusion/tools/install-tensorrt.py
+++ b/src/streamdiffusion/tools/install-tensorrt.py
@@ -30,28 +30,25 @@ def install(cu: Optional[Literal["11", "12"]] = get_cuda_version_from_torch()):
             # best-effort cleanup; proceed with install
             pass
 
-    cudnn_name = f"nvidia-cudnn-cu{cu}==8.9.4.25"
+    cudnn_name = f"nvidia-cudnn-cu{cu}==8.9.7.29"
 
     if not is_installed("tensorrt"):
-        # Ensure CuDNN for the correct CUDA major is present
         run_pip(f"install {cudnn_name} --no-cache-dir")
-        # Install a stable, known-good TensorRT build from NVIDIA PyPI for CUDA {cu}
-        # Post11 builds are for CUDA 12.x; for CUDA 11 we fallback to the matching 8.x series
         if cu == "12":
-            trt_spec = "tensorrt==9.1.0.post12"  # stable CUDA 12 build
+            run_pip("install --extra-index-url https://pypi.nvidia.com tensorrt==10.12.0.36 --no-cache-dir")
+            run_pip("install --extra-index-url https://pypi.nvidia.com tensorrt-cu12-bindings==10.12.0.36 --no-cache-dir")
+            run_pip("install --extra-index-url https://pypi.nvidia.com tensorrt-cu12-libs==10.12.0.36 --no-cache-dir")
         else:
-            trt_spec = "tensorrt==8.6.1"  # last stable for CUDA 11 runtime
-        run_pip(
-            f"install --extra-index-url https://pypi.nvidia.com {trt_spec} --no-cache-dir"
-        )
+            # CUDA 11 fallback to last supported TRT 8.x
+            run_pip("install --extra-index-url https://pypi.nvidia.com tensorrt==8.6.1 --no-cache-dir")
 
     if not is_installed("polygraphy"):
         run_pip(
-            "install polygraphy==0.47.1 --extra-index-url https://pypi.ngc.nvidia.com"
+            "install polygraphy==0.49.24 --extra-index-url https://pypi.ngc.nvidia.com"
         )
     if not is_installed("onnx_graphsurgeon"):
         run_pip(
-            "install onnx-graphsurgeon==0.3.26 --extra-index-url https://pypi.ngc.nvidia.com"
+            "install onnx-graphsurgeon==0.5.8 --extra-index-url https://pypi.ngc.nvidia.com"
         )
     if platform.system() == 'Windows' and not is_installed("pywin32"):
         run_pip(

From 04c7c6f897de0c89cd5025a8e1f2011d84bb611d Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Tue, 23 Sep 2025 16:14:30 +0000
Subject: [PATCH 03/21] Update dependencies and TensorRT installation script

Co-authored-by: victorgelias <victorgelias@gmail.com>
---
 setup.py                                      | 18 ++++---
 src/streamdiffusion/tools/install-tensorrt.py | 49 +++++--------------
 2 files changed, 22 insertions(+), 45 deletions(-)

diff --git a/setup.py b/setup.py
index e3450e2c..c8210a5d 100644
--- a/setup.py
+++ b/setup.py
@@ -5,22 +5,24 @@
 
 
 _deps = [
-    "torch",
+    "torch==2.7.1+cu128",
+    "torchvision==0.22.1+cu128",
+    "torchaudio==2.7.1+cu128",
     "xformers==0.0.30",
     "diffusers==0.35.0",
     "transformers==4.56.0",
     "accelerate==1.10.0",
     "huggingface_hub==0.35.0",
     "Pillow==10.5.0",
-    "fire",
-    "omegaconf",
-    "cuda-python==12.9.0",
+    "fire==0.6.0",
+    "omegaconf==2.3.0",
+    "cuda-python==12.8.0",
     "onnx==1.18.0",
     "onnxruntime==1.22.0",
     "onnxruntime-gpu==1.22.0",
-    "protobuf>=3.20.2",
-    "colored",
-    "pywin32;sys_platform == 'win32'"
+    "protobuf==4.25.3",
+    "colored==2.2.4",
+    "pywin32==306;sys_platform == 'win32'"
 ]
 
 deps = {b: a for a, b in (re.findall(r"^(([^!=<>~]+)(?:[!=<>~].*)?$)", x)[0] for x in _deps)}
@@ -32,7 +34,7 @@ def deps_list(*pkgs):
 
 extras = {}
 extras["xformers"] = deps_list("xformers")
-extras["torch"] = deps_list("torch", "accelerate")
+extras["torch"] = deps_list("torch", "torchvision", "torchaudio", "accelerate")
 extras["tensorrt"] = deps_list("protobuf", "cuda-python", "onnx", "onnxruntime", "onnxruntime-gpu", "colored")
 
 extras["dev"] = extras["xformers"] + extras["torch"] + extras["tensorrt"]
diff --git a/src/streamdiffusion/tools/install-tensorrt.py b/src/streamdiffusion/tools/install-tensorrt.py
index d4685656..307b84e1 100644
--- a/src/streamdiffusion/tools/install-tensorrt.py
+++ b/src/streamdiffusion/tools/install-tensorrt.py
@@ -1,46 +1,23 @@
-from typing import Literal, Optional
+from typing import Literal
 
 import fire
-from packaging.version import Version
-
-from ..pip_utils import is_installed, run_pip, version
+from ..pip_utils import is_installed, run_pip
 import platform
 
 
-def get_cuda_version_from_torch() -> Optional[Literal["11", "12"]]:
-    try:
-        import torch
-    except ImportError:
-        return None
-
-    return torch.version.cuda.split(".")[0]
-
-
-def install(cu: Optional[Literal["11", "12"]] = get_cuda_version_from_torch()):
-    if cu is None or cu not in ["11", "12"]:
-        print("Could not detect CUDA version. Please specify manually.")
-        return
+def install(cu: Literal["11", "12"]):
     print("Installing TensorRT requirements...")
 
-    if is_installed("tensorrt"):
-        try:
-            if version("tensorrt") and version("tensorrt") < Version("9.0.0"):
-                run_pip("uninstall -y tensorrt")
-        except Exception:
-            # best-effort cleanup; proceed with install
-            pass
-
     cudnn_name = f"nvidia-cudnn-cu{cu}==8.9.7.29"
 
-    if not is_installed("tensorrt"):
-        run_pip(f"install {cudnn_name} --no-cache-dir")
-        if cu == "12":
-            run_pip("install --extra-index-url https://pypi.nvidia.com tensorrt==10.12.0.36 --no-cache-dir")
-            run_pip("install --extra-index-url https://pypi.nvidia.com tensorrt-cu12-bindings==10.12.0.36 --no-cache-dir")
-            run_pip("install --extra-index-url https://pypi.nvidia.com tensorrt-cu12-libs==10.12.0.36 --no-cache-dir")
-        else:
-            # CUDA 11 fallback to last supported TRT 8.x
-            run_pip("install --extra-index-url https://pypi.nvidia.com tensorrt==8.6.1 --no-cache-dir")
+    run_pip(f"install {cudnn_name} --no-cache-dir")
+
+    if cu == "12":
+        run_pip("install --extra-index-url https://pypi.nvidia.com tensorrt==10.12.0.36 --no-cache-dir")
+        run_pip("install --extra-index-url https://pypi.nvidia.com tensorrt-cu12-bindings==10.12.0.36 --no-cache-dir")
+        run_pip("install --extra-index-url https://pypi.nvidia.com tensorrt-cu12-libs==10.12.0.36 --no-cache-dir")
+    else:
+        run_pip("install --extra-index-url https://pypi.nvidia.com tensorrt==8.6.1 --no-cache-dir")
 
     if not is_installed("polygraphy"):
         run_pip(
@@ -52,11 +29,9 @@ def install(cu: Optional[Literal["11", "12"]] = get_cuda_version_from_torch()):
         )
     if platform.system() == 'Windows' and not is_installed("pywin32"):
         run_pip(
-            "install pywin32"
+            "install pywin32==306"
         )
 
-    pass
-
 
 if __name__ == "__main__":
     fire.Fire(install)

From 42de82121c564bd27cb7d5dd5c883e7272b567fd Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Tue, 23 Sep 2025 16:29:28 +0000
Subject: [PATCH 04/21] Refactor: Remove hardcoded torch dependencies and add
 validation

Co-authored-by: victorgelias <victorgelias@gmail.com>
---
 setup.py                       |  5 +----
 src/streamdiffusion/wrapper.py | 25 ++++++++++++++++++++++++-
 2 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/setup.py b/setup.py
index c8210a5d..55a22de5 100644
--- a/setup.py
+++ b/setup.py
@@ -5,9 +5,6 @@
 
 
 _deps = [
-    "torch==2.7.1+cu128",
-    "torchvision==0.22.1+cu128",
-    "torchaudio==2.7.1+cu128",
     "xformers==0.0.30",
     "diffusers==0.35.0",
     "transformers==4.56.0",
@@ -34,7 +31,7 @@ def deps_list(*pkgs):
 
 extras = {}
 extras["xformers"] = deps_list("xformers")
-extras["torch"] = deps_list("torch", "torchvision", "torchaudio", "accelerate")
+extras["torch"] = []
 extras["tensorrt"] = deps_list("protobuf", "cuda-python", "onnx", "onnxruntime", "onnxruntime-gpu", "colored")
 
 extras["dev"] = extras["xformers"] + extras["torch"] + extras["tensorrt"]
diff --git a/src/streamdiffusion/wrapper.py b/src/streamdiffusion/wrapper.py
index 6a997b12..884eb749 100644
--- a/src/streamdiffusion/wrapper.py
+++ b/src/streamdiffusion/wrapper.py
@@ -2,7 +2,14 @@
 from pathlib import Path
 from typing import Dict, List, Literal, Optional, Union, Any, Tuple
 
-import torch
+try:
+    import torch
+except ImportError as e:
+    raise ImportError(
+        "PyTorch is not installed. Install the CUDA-matched wheels, for example:\n"
+        "  pip install --index-url https://download.pytorch.org/whl/cu128 torch==2.7.1+cu128 torchvision==0.22.1+cu128 torchaudio==2.7.1+cu128\n"
+        "Adjust the CUDA index and versions to match your environment."
+    ) from e
 import numpy as np
 from PIL import Image
 from diffusers import AutoencoderTiny, StableDiffusionPipeline, StableDiffusionXLPipeline, AutoPipelineForText2Image
@@ -252,6 +259,7 @@ def __init__(
         self.safety_checker_fallback_type = safety_checker_fallback_type
         self.safety_checker_threshold = safety_checker_threshold
 
+        self._validate_torch_stack()
         self.stream: StreamDiffusion = self._load_model(
             model_id_or_path=model_id_or_path,
             lora_dict=lora_dict,
@@ -316,6 +324,21 @@ def __init__(
                 similar_image_filter_threshold, similar_image_filter_max_skip_frame
             )
 
+    def _validate_torch_stack(self) -> None:
+        # Validate torch with CUDA and specific minor if available
+        if not torch.cuda.is_available():
+            return
+        torch_version = getattr(torch, "__version__", "")
+        cuda_version = getattr(torch.version, "cuda", "")
+        if not cuda_version:
+            raise RuntimeError("Torch is installed without CUDA. Install CUDA-enabled wheels from the PyTorch CUDA index.")
+        # If targeting cu128, enforce it here
+        target_cuda_minor = "12.8"
+        if not cuda_version.startswith(target_cuda_minor.split(".")[0]):
+            raise RuntimeError(f"CUDA major mismatch: torch CUDA {cuda_version}, expected {target_cuda_minor} series.")
+        if not cuda_version.startswith(target_cuda_minor):
+            raise RuntimeError(f"CUDA minor mismatch: torch CUDA {cuda_version}, expected {target_cuda_minor}.")
+
     def prepare(
         self,
         prompt: Union[str, List[Tuple[str, float]]],

From 52acc8f92a012fa0093668693b8e0c0bdc15f9d2 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Tue, 23 Sep 2025 16:32:31 +0000
Subject: [PATCH 05/21] Checkpoint before follow-up message

Co-authored-by: victorgelias <victorgelias@gmail.com>
---
 setup.py | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 55a22de5..d2d14849 100644
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,6 @@
 import os
 import re
+import sys
 
 from setuptools import find_packages, setup
 
@@ -13,7 +14,6 @@
     "Pillow==10.5.0",
     "fire==0.6.0",
     "omegaconf==2.3.0",
-    "cuda-python==12.8.0",
     "onnx==1.18.0",
     "onnxruntime==1.22.0",
     "onnxruntime-gpu==1.22.0",
@@ -32,7 +32,7 @@ def deps_list(*pkgs):
 extras = {}
 extras["xformers"] = deps_list("xformers")
 extras["torch"] = []
-extras["tensorrt"] = deps_list("protobuf", "cuda-python", "onnx", "onnxruntime", "onnxruntime-gpu", "colored")
+extras["tensorrt"] = deps_list("protobuf", "onnx", "onnxruntime", "onnxruntime-gpu", "colored")
 
 extras["dev"] = extras["xformers"] + extras["torch"] + extras["tensorrt"]
 
@@ -50,6 +50,27 @@ def deps_list(*pkgs):
     "diffusers-ipadapter @ git+https://github.com/livepeer/Diffusers_IPAdapter.git@405f87da42932e30bd55ee8dca3ce502d7834a99",
 ]
 
+
+def _require_torch_preinstalled() -> None:
+    missing = []
+    for pkg in ("torch", "torchvision", "torchaudio"):
+        try:
+            __import__(pkg)
+        except Exception:
+            missing.append(pkg)
+    if missing:
+        msg = (
+            "Missing required pre-installed packages: " + ", ".join(missing) + "\n"
+            "Install the PyTorch CUDA wheels from the appropriate index first, e.g.:\n"
+            "  pip install --index-url https://download.pytorch.org/whl/cu12x torch torchvision torchaudio\n"
+            "Replace the index URL and versions to match your CUDA runtime."
+        )
+        raise RuntimeError(msg)
+
+
+if any(cmd in sys.argv for cmd in ("install", "bdist_wheel", "develop")):
+    _require_torch_preinstalled()
+
 setup(
     name="streamdiffusion",
     version="0.1.1",

From 8fe9e8f7ef6be16d685076d3f144f0ca09dc3929 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Tue, 23 Sep 2025 16:35:17 +0000
Subject: [PATCH 06/21] Refactor torch validation and pin cuda-python

Co-authored-by: victorgelias <victorgelias@gmail.com>
---
 setup.py                       | 12 ++++++++++++
 src/streamdiffusion/wrapper.py | 25 ++-----------------------
 2 files changed, 14 insertions(+), 23 deletions(-)

diff --git a/setup.py b/setup.py
index d2d14849..9eeb8125 100644
--- a/setup.py
+++ b/setup.py
@@ -70,6 +70,18 @@ def _require_torch_preinstalled() -> None:
 
 if any(cmd in sys.argv for cmd in ("install", "bdist_wheel", "develop")):
     _require_torch_preinstalled()
+    # Dynamically pin cuda-python to match the preinstalled torch CUDA series
+    try:
+        import torch  # noqa: F401
+        cuda_str = getattr(torch.version, "cuda", "")
+        if cuda_str:
+            parts = cuda_str.split(".")
+            if len(parts) >= 2:
+                cu_major, cu_minor = parts[0], parts[1]
+                cuda_python_version = f"{cu_major}.{cu_minor}.0"
+                install_requires.append(f"cuda-python=={cuda_python_version}")
+    except Exception:
+        pass
 
 setup(
     name="streamdiffusion",
diff --git a/src/streamdiffusion/wrapper.py b/src/streamdiffusion/wrapper.py
index 884eb749..4d93d924 100644
--- a/src/streamdiffusion/wrapper.py
+++ b/src/streamdiffusion/wrapper.py
@@ -2,14 +2,7 @@
 from pathlib import Path
 from typing import Dict, List, Literal, Optional, Union, Any, Tuple
 
-try:
-    import torch
-except ImportError as e:
-    raise ImportError(
-        "PyTorch is not installed. Install the CUDA-matched wheels, for example:\n"
-        "  pip install --index-url https://download.pytorch.org/whl/cu128 torch==2.7.1+cu128 torchvision==0.22.1+cu128 torchaudio==2.7.1+cu128\n"
-        "Adjust the CUDA index and versions to match your environment."
-    ) from e
+import torch
 import numpy as np
 from PIL import Image
 from diffusers import AutoencoderTiny, StableDiffusionPipeline, StableDiffusionXLPipeline, AutoPipelineForText2Image
@@ -259,7 +252,6 @@ def __init__(
         self.safety_checker_fallback_type = safety_checker_fallback_type
         self.safety_checker_threshold = safety_checker_threshold
 
-        self._validate_torch_stack()
         self.stream: StreamDiffusion = self._load_model(
             model_id_or_path=model_id_or_path,
             lora_dict=lora_dict,
@@ -324,20 +316,7 @@ def __init__(
                 similar_image_filter_threshold, similar_image_filter_max_skip_frame
             )
 
-    def _validate_torch_stack(self) -> None:
-        # Validate torch with CUDA and specific minor if available
-        if not torch.cuda.is_available():
-            return
-        torch_version = getattr(torch, "__version__", "")
-        cuda_version = getattr(torch.version, "cuda", "")
-        if not cuda_version:
-            raise RuntimeError("Torch is installed without CUDA. Install CUDA-enabled wheels from the PyTorch CUDA index.")
-        # If targeting cu128, enforce it here
-        target_cuda_minor = "12.8"
-        if not cuda_version.startswith(target_cuda_minor.split(".")[0]):
-            raise RuntimeError(f"CUDA major mismatch: torch CUDA {cuda_version}, expected {target_cuda_minor} series.")
-        if not cuda_version.startswith(target_cuda_minor):
-            raise RuntimeError(f"CUDA minor mismatch: torch CUDA {cuda_version}, expected {target_cuda_minor}.")
+    
 
     def prepare(
         self,

From 23a340c9b1cda280f97824b768e2863049bc209d Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Tue, 23 Sep 2025 17:06:05 +0000
Subject: [PATCH 07/21] Update dependencies and CUDA version detection

Co-authored-by: victorgelias <victorgelias@gmail.com>
---
 setup.py                                      | 35 ++++++++++---------
 src/streamdiffusion/tools/install-tensorrt.py | 18 ++++++++--
 2 files changed, 33 insertions(+), 20 deletions(-)

diff --git a/setup.py b/setup.py
index 9eeb8125..d128fa1d 100644
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,7 @@
     "transformers==4.56.0",
     "accelerate==1.10.0",
     "huggingface_hub==0.35.0",
-    "Pillow==10.5.0",
+    "Pillow==11.0.0",
     "fire==0.6.0",
     "omegaconf==2.3.0",
     "onnx==1.18.0",
@@ -19,7 +19,10 @@
     "onnxruntime-gpu==1.22.0",
     "protobuf==4.25.3",
     "colored==2.2.4",
-    "pywin32==306;sys_platform == 'win32'"
+    "pywin32==306;sys_platform == 'win32'",
+    "controlnet-aux==0.0.10",
+    "mediapipe==0.10.21",
+    "insightface==0.7.3",
 ]
 
 deps = {b: a for a, b in (re.findall(r"^(([^!=<>~]+)(?:[!=<>~].*)?$)", x)[0] for x in _deps)}
@@ -44,9 +47,9 @@ def deps_list(*pkgs):
     deps["accelerate"],
     deps["huggingface_hub"],
     deps["Pillow"],
-    "controlnet-aux==0.0.10",
-    "mediapipe==0.10.21",
-    "insightface==0.7.3",
+    deps["controlnet-aux"],
+    deps["mediapipe"],
+    deps["insightface"],
     "diffusers-ipadapter @ git+https://github.com/livepeer/Diffusers_IPAdapter.git@405f87da42932e30bd55ee8dca3ce502d7834a99",
 ]
 
@@ -70,18 +73,16 @@ def _require_torch_preinstalled() -> None:
 
 if any(cmd in sys.argv for cmd in ("install", "bdist_wheel", "develop")):
     _require_torch_preinstalled()
-    # Dynamically pin cuda-python to match the preinstalled torch CUDA series
-    try:
-        import torch  # noqa: F401
-        cuda_str = getattr(torch.version, "cuda", "")
-        if cuda_str:
-            parts = cuda_str.split(".")
-            if len(parts) >= 2:
-                cu_major, cu_minor = parts[0], parts[1]
-                cuda_python_version = f"{cu_major}.{cu_minor}.0"
-                install_requires.append(f"cuda-python=={cuda_python_version}")
-    except Exception:
-        pass
+    import torch  # guaranteed by the preinstall check
+    cuda_str = getattr(torch.version, "cuda", "")
+    if not cuda_str:
+        raise RuntimeError("Detected CPU-only PyTorch. Install CUDA-enabled torch/vision/audio before installing this package.")
+    parts = cuda_str.split(".")
+    if len(parts) < 2:
+        raise RuntimeError(f"Unrecognized CUDA version from torch: '{cuda_str}'")
+    cu_major, cu_minor = parts[0], parts[1]
+    cuda_python_version = f"{cu_major}.{cu_minor}.0"
+    install_requires.append(f"cuda-python=={cuda_python_version}")
 
 setup(
     name="streamdiffusion",
diff --git a/src/streamdiffusion/tools/install-tensorrt.py b/src/streamdiffusion/tools/install-tensorrt.py
index 307b84e1..fb1787c1 100644
--- a/src/streamdiffusion/tools/install-tensorrt.py
+++ b/src/streamdiffusion/tools/install-tensorrt.py
@@ -1,14 +1,26 @@
-from typing import Literal
+from typing import Literal, Optional
 
 import fire
 from ..pip_utils import is_installed, run_pip
 import platform
 
 
-def install(cu: Literal["11", "12"]):
+def _detect_cuda_major() -> Optional[Literal["11", "12"]]:
+    try:
+        import torch
+        return torch.version.cuda.split(".")[0]  # type: ignore
+    except Exception:
+        return None
+
+
+def install(cu: Optional[Literal["11", "12"]] = _detect_cuda_major()):
     print("Installing TensorRT requirements...")
+    if cu not in ("11", "12"):
+        raise RuntimeError("CUDA major version not detected. Pass --cu 11 or --cu 12 explicitly.")
 
-    cudnn_name = f"nvidia-cudnn-cu{cu}==8.9.7.29"
+    cudnn_name = (
+        f"nvidia-cudnn-cu12==9.7.1.26" if cu == "12" else f"nvidia-cudnn-cu11==8.9.7.29"
+    )
 
     run_pip(f"install {cudnn_name} --no-cache-dir")
 

From 02cf5d37a387e944016c946f51c504847f014e30 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Tue, 23 Sep 2025 17:57:00 +0000
Subject: [PATCH 08/21] Refactor CUDA version check and dependency installation

Co-authored-by: victorgelias <victorgelias@gmail.com>
---
 setup.py | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/setup.py b/setup.py
index d128fa1d..97d34fc5 100644
--- a/setup.py
+++ b/setup.py
@@ -54,7 +54,7 @@ def deps_list(*pkgs):
 ]
 
 
-def _require_torch_preinstalled() -> None:
+def _require_torch_preinstalled() -> str:
     missing = []
     for pkg in ("torch", "torchvision", "torchaudio"):
         try:
@@ -69,20 +69,15 @@ def _require_torch_preinstalled() -> None:
             "Replace the index URL and versions to match your CUDA runtime."
         )
         raise RuntimeError(msg)
+    import torch  # safe here; already verified importable
+    return getattr(torch.version, "cuda", "")
 
 
 if any(cmd in sys.argv for cmd in ("install", "bdist_wheel", "develop")):
-    _require_torch_preinstalled()
-    import torch  # guaranteed by the preinstall check
-    cuda_str = getattr(torch.version, "cuda", "")
+    cuda_str = _require_torch_preinstalled()
     if not cuda_str:
         raise RuntimeError("Detected CPU-only PyTorch. Install CUDA-enabled torch/vision/audio before installing this package.")
-    parts = cuda_str.split(".")
-    if len(parts) < 2:
-        raise RuntimeError(f"Unrecognized CUDA version from torch: '{cuda_str}'")
-    cu_major, cu_minor = parts[0], parts[1]
-    cuda_python_version = f"{cu_major}.{cu_minor}.0"
-    install_requires.append(f"cuda-python=={cuda_python_version}")
+    install_requires.append(f"cuda-python=={cuda_str}.0")
 
 setup(
     name="streamdiffusion",

From 3e268be1bb6c86c638ab5431b5fe554dff44505c Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Tue, 23 Sep 2025 17:57:54 +0000
Subject: [PATCH 09/21] Refactor TensorRT installation for CUDA 12

Co-authored-by: victorgelias <victorgelias@gmail.com>
---
 src/streamdiffusion/tools/install-tensorrt.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/streamdiffusion/tools/install-tensorrt.py b/src/streamdiffusion/tools/install-tensorrt.py
index fb1787c1..f8c04325 100644
--- a/src/streamdiffusion/tools/install-tensorrt.py
+++ b/src/streamdiffusion/tools/install-tensorrt.py
@@ -25,9 +25,10 @@ def install(cu: Optional[Literal["11", "12"]] = _detect_cuda_major()):
     run_pip(f"install {cudnn_name} --no-cache-dir")
 
     if cu == "12":
-        run_pip("install --extra-index-url https://pypi.nvidia.com tensorrt==10.12.0.36 --no-cache-dir")
-        run_pip("install --extra-index-url https://pypi.nvidia.com tensorrt-cu12-bindings==10.12.0.36 --no-cache-dir")
-        run_pip("install --extra-index-url https://pypi.nvidia.com tensorrt-cu12-libs==10.12.0.36 --no-cache-dir")
+        run_pip("install --extra-index-url https://pypi.nvidia.com --no-cache-dir "
+                "tensorrt==10.12.0.36 "
+                "tensorrt-cu12-bindings==10.12.0.36 "
+                "tensorrt-cu12-libs==10.12.0.36")
     else:
         run_pip("install --extra-index-url https://pypi.nvidia.com tensorrt==8.6.1 --no-cache-dir")
 

From 5d54cf3aa5544afca73e54de8fe46a3d10957422 Mon Sep 17 00:00:00 2001
From: Victor Elias <victor@livepeer.org>
Date: Tue, 23 Sep 2025 18:32:50 +0000
Subject: [PATCH 10/21] Undo uncalled for wrapper change

---
 src/streamdiffusion/wrapper.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/streamdiffusion/wrapper.py b/src/streamdiffusion/wrapper.py
index 4d93d924..6a997b12 100644
--- a/src/streamdiffusion/wrapper.py
+++ b/src/streamdiffusion/wrapper.py
@@ -316,8 +316,6 @@ def __init__(
                 similar_image_filter_threshold, similar_image_filter_max_skip_frame
             )
 
-    
-
     def prepare(
         self,
         prompt: Union[str, List[Tuple[str, float]]],

From 49d2942673899165d15eefa0ff0b6b1fc54bd2c1 Mon Sep 17 00:00:00 2001
From: Victor Elias <victor@livepeer.org>
Date: Tue, 23 Sep 2025 18:45:38 +0000
Subject: [PATCH 11/21] Cleanup setup

---
 setup.py | 54 +++++++++++++++++++++++++++---------------------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/setup.py b/setup.py
index 97d34fc5..1f380810 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,32 @@
 from setuptools import find_packages, setup
 
 
+def _check_torch_installed() -> str:
+    try:
+        import torch
+        import torchvision
+    except Exception:
+        msg = (
+            "Missing required pre-installed packages: torch, torchvision\n"
+            "Install the PyTorch CUDA wheels from the appropriate index first, e.g.:\n"
+            "  pip install --index-url https://download.pytorch.org/whl/cu12x torch torchvision\n"
+            "Replace the index URL and versions to match your CUDA runtime."
+        )
+        raise RuntimeError(msg)
+
+def get_cuda_version() -> str:
+    _check_torch_installed()
+    import torch
+    v = torch.version.cuda
+    if not v:
+        raise RuntimeError("Detected CPU-only PyTorch. Install CUDA-enabled torch/vision/audio before installing this package.")
+    return v
+
+
+_is_install = any(cmd in sys.argv for cmd in ("install", "bdist_wheel", "develop"))
+
 _deps = [
+    f"cuda-python~={get_cuda_version()}" if _is_install else "cuda-python",
     "xformers==0.0.30",
     "diffusers==0.35.0",
     "transformers==4.56.0",
@@ -34,8 +59,8 @@ def deps_list(*pkgs):
 
 extras = {}
 extras["xformers"] = deps_list("xformers")
-extras["torch"] = []
-extras["tensorrt"] = deps_list("protobuf", "onnx", "onnxruntime", "onnxruntime-gpu", "colored")
+extras["torch"] = deps_list("torch", "accelerate")
+extras["tensorrt"] = deps_list("protobuf", "cuda-python", "onnx", "onnxruntime", "onnxruntime-gpu", "colored")
 
 extras["dev"] = extras["xformers"] + extras["torch"] + extras["tensorrt"]
 
@@ -54,31 +79,6 @@ def deps_list(*pkgs):
 ]
 
 
-def _require_torch_preinstalled() -> str:
-    missing = []
-    for pkg in ("torch", "torchvision", "torchaudio"):
-        try:
-            __import__(pkg)
-        except Exception:
-            missing.append(pkg)
-    if missing:
-        msg = (
-            "Missing required pre-installed packages: " + ", ".join(missing) + "\n"
-            "Install the PyTorch CUDA wheels from the appropriate index first, e.g.:\n"
-            "  pip install --index-url https://download.pytorch.org/whl/cu12x torch torchvision torchaudio\n"
-            "Replace the index URL and versions to match your CUDA runtime."
-        )
-        raise RuntimeError(msg)
-    import torch  # safe here; already verified importable
-    return getattr(torch.version, "cuda", "")
-
-
-if any(cmd in sys.argv for cmd in ("install", "bdist_wheel", "develop")):
-    cuda_str = _require_torch_preinstalled()
-    if not cuda_str:
-        raise RuntimeError("Detected CPU-only PyTorch. Install CUDA-enabled torch/vision/audio before installing this package.")
-    install_requires.append(f"cuda-python=={cuda_str}.0")
-
 setup(
     name="streamdiffusion",
     version="0.1.1",

From af0df82318dfcfa72d30c0b56139af68bafaf422 Mon Sep 17 00:00:00 2001
From: Victor Elias <victor@livepeer.org>
Date: Tue, 23 Sep 2025 20:03:36 +0000
Subject: [PATCH 12/21] Cleanup install tensorrt

---
 setup.py                                      | 15 ++++----
 src/streamdiffusion/pip_utils.py              | 37 ++++++++++++++++++-
 src/streamdiffusion/tools/install-tensorrt.py | 32 ++++++++--------
 3 files changed, 60 insertions(+), 24 deletions(-)

diff --git a/setup.py b/setup.py
index 1f380810..4e80e7f0 100644
--- a/setup.py
+++ b/setup.py
@@ -1,11 +1,10 @@
-import os
 import re
 import sys
 
 from setuptools import find_packages, setup
 
-
-def _check_torch_installed() -> str:
+# Same helpers as pip_utils.py but we shouldn't import it from setup.py
+def _check_torch_installed():
     try:
         import torch
         import torchvision
@@ -18,13 +17,15 @@ def _check_torch_installed() -> str:
         )
         raise RuntimeError(msg)
 
+    if not torch.version.cuda:
+        raise RuntimeError("Detected CPU-only PyTorch. Install CUDA-enabled torch/vision/audio before installing this package.")
+
+
 def get_cuda_version() -> str:
     _check_torch_installed()
+
     import torch
-    v = torch.version.cuda
-    if not v:
-        raise RuntimeError("Detected CPU-only PyTorch. Install CUDA-enabled torch/vision/audio before installing this package.")
-    return v
+    return torch.version.cuda
 
 
 _is_install = any(cmd in sys.argv for cmd in ("install", "bdist_wheel", "develop"))
diff --git a/src/streamdiffusion/pip_utils.py b/src/streamdiffusion/pip_utils.py
index 25b024ad..b403355a 100644
--- a/src/streamdiffusion/pip_utils.py
+++ b/src/streamdiffusion/pip_utils.py
@@ -3,7 +3,7 @@
 import os
 import subprocess
 import sys
-from typing import Dict, Optional
+from typing import Dict, Literal, Optional
 
 from packaging.version import Version
 
@@ -12,6 +12,41 @@
 index_url = os.environ.get("INDEX_URL", "")
 
 
+def _check_torch_installed():
+    try:
+        import torch
+        import torchvision
+    except Exception:
+        msg = (
+            "Missing required pre-installed packages: torch, torchvision\n"
+            "Install the PyTorch CUDA wheels from the appropriate index first, e.g.:\n"
+            "  pip install --index-url https://download.pytorch.org/whl/cu12x torch torchvision\n"
+            "Replace the index URL and versions to match your CUDA runtime."
+        )
+        raise RuntimeError(msg)
+
+    if not torch.version.cuda:
+        raise RuntimeError("Detected CPU-only PyTorch. Install CUDA-enabled torch/vision/audio before installing this package.")
+
+
+def get_cuda_version() -> str:
+    _check_torch_installed()
+
+    import torch
+    return torch.version.cuda
+
+
+def get_cuda_major() -> Optional[Literal["11", "12"]]:
+    version = get_cuda_version()
+    if not version:
+        return None
+
+    major = version.split(".")[0]
+    if major not in ("11", "12"):
+        raise RuntimeError("CUDA major version not detected. Must be 11 or 12.")
+    return major
+
+
 def version(package: str) -> Optional[Version]:
     try:
         return Version(importlib.import_module(package).__version__)
diff --git a/src/streamdiffusion/tools/install-tensorrt.py b/src/streamdiffusion/tools/install-tensorrt.py
index f8c04325..785b2edb 100644
--- a/src/streamdiffusion/tools/install-tensorrt.py
+++ b/src/streamdiffusion/tools/install-tensorrt.py
@@ -1,36 +1,36 @@
 from typing import Literal, Optional
 
 import fire
-from ..pip_utils import is_installed, run_pip
-import platform
-
+from packaging.version import Version
 
-def _detect_cuda_major() -> Optional[Literal["11", "12"]]:
-    try:
-        import torch
-        return torch.version.cuda.split(".")[0]  # type: ignore
-    except Exception:
-        return None
+from ..pip_utils import is_installed, run_pip, version, get_cuda_major
+import platform
 
 
-def install(cu: Optional[Literal["11", "12"]] = _detect_cuda_major()):
-    print("Installing TensorRT requirements...")
+def install(cu: Optional[Literal["11", "12"]] = get_cuda_major()):
     if cu not in ("11", "12"):
         raise RuntimeError("CUDA major version not detected. Pass --cu 11 or --cu 12 explicitly.")
 
-    cudnn_name = (
-        f"nvidia-cudnn-cu12==9.7.1.26" if cu == "12" else f"nvidia-cudnn-cu11==8.9.7.29"
-    )
+    print("Installing TensorRT requirements...")
+
+    trt_version = version("tensorrt")
 
-    run_pip(f"install {cudnn_name} --no-cache-dir")
 
     if cu == "12":
+        if trt_version and trt_version < Version("12.0.0"):
+            run_pip("uninstall -y tensorrt")
+
+        run_pip(f"install nvidia-cudnn-cu12==9.7.1.26 --no-cache-dir")
         run_pip("install --extra-index-url https://pypi.nvidia.com --no-cache-dir "
                 "tensorrt==10.12.0.36 "
                 "tensorrt-cu12-bindings==10.12.0.36 "
                 "tensorrt-cu12-libs==10.12.0.36")
     else:
-        run_pip("install --extra-index-url https://pypi.nvidia.com tensorrt==8.6.1 --no-cache-dir")
+        if trt_version and trt_version < Version("9.0.0"):
+            run_pip("uninstall -y tensorrt")
+
+        run_pip(f"install nvidia-cudnn-cu11==8.9.7.29 --no-cache-dir")
+        run_pip("install --extra-index-url https://pypi.nvidia.com tensorrt==9.0.1.post11.dev4 --no-cache-dir")
 
     if not is_installed("polygraphy"):
         run_pip(

From 1788ae6a4b4d2fb30cd08b9e00233922ed2d23a5 Mon Sep 17 00:00:00 2001
From: Victor Elias <victor@livepeer.org>
Date: Tue, 23 Sep 2025 20:06:36 +0000
Subject: [PATCH 13/21] Consistent CLMI behvaior

---
 src/streamdiffusion/pip_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/streamdiffusion/pip_utils.py b/src/streamdiffusion/pip_utils.py
index b403355a..fe0c5ebd 100644
--- a/src/streamdiffusion/pip_utils.py
+++ b/src/streamdiffusion/pip_utils.py
@@ -43,7 +43,7 @@ def get_cuda_major() -> Optional[Literal["11", "12"]]:
 
     major = version.split(".")[0]
     if major not in ("11", "12"):
-        raise RuntimeError("CUDA major version not detected. Must be 11 or 12.")
+        return None
     return major
 
 

From 16549ab5258e25749085c09fed262af3310e4417 Mon Sep 17 00:00:00 2001
From: Victor Elias <victor@livepeer.org>
Date: Tue, 23 Sep 2025 20:17:01 +0000
Subject: [PATCH 14/21] Simplify tensiorrt installs

---
 src/streamdiffusion/tools/install-tensorrt.py | 29 ++++++++-----------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/src/streamdiffusion/tools/install-tensorrt.py b/src/streamdiffusion/tools/install-tensorrt.py
index 785b2edb..0fbb8d2e 100644
--- a/src/streamdiffusion/tools/install-tensorrt.py
+++ b/src/streamdiffusion/tools/install-tensorrt.py
@@ -13,24 +13,19 @@ def install(cu: Optional[Literal["11", "12"]] = get_cuda_major()):
 
     print("Installing TensorRT requirements...")
 
+    min_trt_version = Version("10.12.0") if cu == "12" else Version("9.0.0")
     trt_version = version("tensorrt")
-
-
-    if cu == "12":
-        if trt_version and trt_version < Version("12.0.0"):
-            run_pip("uninstall -y tensorrt")
-
-        run_pip(f"install nvidia-cudnn-cu12==9.7.1.26 --no-cache-dir")
-        run_pip("install --extra-index-url https://pypi.nvidia.com --no-cache-dir "
-                "tensorrt==10.12.0.36 "
-                "tensorrt-cu12-bindings==10.12.0.36 "
-                "tensorrt-cu12-libs==10.12.0.36")
-    else:
-        if trt_version and trt_version < Version("9.0.0"):
-            run_pip("uninstall -y tensorrt")
-
-        run_pip(f"install nvidia-cudnn-cu11==8.9.7.29 --no-cache-dir")
-        run_pip("install --extra-index-url https://pypi.nvidia.com tensorrt==9.0.1.post11.dev4 --no-cache-dir")
+    if trt_version and trt_version < min_trt_version:
+        run_pip("uninstall -y tensorrt")
+
+    cudnn_package, trt_package = (
+        ("nvidia-cudnn-cu12==9.7.1.26", "tensorrt==10.12.0.36")
+        if cu == "12" else
+        ("nvidia-cudnn-cu11==8.9.7.29", "tensorrt==9.0.1.post11.dev4")
+    )
+    if not is_installed(trt_package):
+        run_pip(f"install {cudnn_package} --no-cache-dir")
+        run_pip(f"install --extra-index-url https://pypi.nvidia.com {trt_package} --no-cache-dir")
 
     if not is_installed("polygraphy"):
         run_pip(

From efc6b7115d345fd32c4805b6b2caf5348fbe1e2e Mon Sep 17 00:00:00 2001
From: Victor Elias <victor@livepeer.org>
Date: Tue, 23 Sep 2025 20:28:38 +0000
Subject: [PATCH 15/21] Add versionless torch

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index 4e80e7f0..c4176537 100644
--- a/setup.py
+++ b/setup.py
@@ -32,6 +32,7 @@ def get_cuda_version() -> str:
 
 _deps = [
     f"cuda-python~={get_cuda_version()}" if _is_install else "cuda-python",
+    "torch", # We can't really pin the torch version as it depends on CUDA
     "xformers==0.0.30",
     "diffusers==0.35.0",
     "transformers==4.56.0",

From d2f437b218eb6d8dd15e6843202219546b7fee95 Mon Sep 17 00:00:00 2001
From: Victor Elias <victor@livepeer.org>
Date: Tue, 23 Sep 2025 20:43:02 +0000
Subject: [PATCH 16/21] Make controlnet and ipadapter extras

---
 setup.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/setup.py b/setup.py
index c4176537..bc94f246 100644
--- a/setup.py
+++ b/setup.py
@@ -47,12 +47,14 @@ def get_cuda_version() -> str:
     "protobuf==4.25.3",
     "colored==2.2.4",
     "pywin32==306;sys_platform == 'win32'",
+    "onnx-graphsurgeon==0.5.8",
     "controlnet-aux==0.0.10",
+    "diffusers-ipadapter @ git+https://github.com/livepeer/Diffusers_IPAdapter.git@405f87da42932e30bd55ee8dca3ce502d7834a99",
     "mediapipe==0.10.21",
     "insightface==0.7.3",
 ]
 
-deps = {b: a for a, b in (re.findall(r"^(([^!=<>~]+)(?:[!=<>~].*)?$)", x)[0] for x in _deps)}
+deps = {b: a for a, b in (re.findall(r"^(([^!=<>~ @]+)(?:[!=<>~ @].*)?$)", x)[0] for x in _deps)}
 
 
 def deps_list(*pkgs):
@@ -63,6 +65,8 @@ def deps_list(*pkgs):
 extras["xformers"] = deps_list("xformers")
 extras["torch"] = deps_list("torch", "accelerate")
 extras["tensorrt"] = deps_list("protobuf", "cuda-python", "onnx", "onnxruntime", "onnxruntime-gpu", "colored")
+extras["controlnet"] = deps_list("onnx-graphsurgeon", "controlnet-aux")
+extras["ipadapter"] = deps_list("diffusers-ipadapter", "mediapipe", "insightface")
 
 extras["dev"] = extras["xformers"] + extras["torch"] + extras["tensorrt"]
 
@@ -74,10 +78,6 @@ def deps_list(*pkgs):
     deps["accelerate"],
     deps["huggingface_hub"],
     deps["Pillow"],
-    deps["controlnet-aux"],
-    deps["mediapipe"],
-    deps["insightface"],
-    "diffusers-ipadapter @ git+https://github.com/livepeer/Diffusers_IPAdapter.git@405f87da42932e30bd55ee8dca3ce502d7834a99",
 ]
 
 

From 1d49a247343945cc275401c37ae886525b491287 Mon Sep 17 00:00:00 2001
From: Victor Elias <victor@livepeer.org>
Date: Tue, 23 Sep 2025 21:16:14 +0000
Subject: [PATCH 17/21] Do not check cuda version on wheel

whatever that means
---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index bc94f246..6f3808d8 100644
--- a/setup.py
+++ b/setup.py
@@ -28,7 +28,7 @@ def get_cuda_version() -> str:
     return torch.version.cuda
 
 
-_is_install = any(cmd in sys.argv for cmd in ("install", "bdist_wheel", "develop"))
+_is_install = any(cmd in sys.argv for cmd in ("install", "develop"))
 
 _deps = [
     f"cuda-python~={get_cuda_version()}" if _is_install else "cuda-python",

From a001f4cba69e780d4436e6e5a4c660d2ae11befc Mon Sep 17 00:00:00 2001
From: Victor Elias <victor@livepeer.org>
Date: Tue, 23 Sep 2025 21:30:16 +0000
Subject: [PATCH 18/21] Fix utils not being a module

---
 src/streamdiffusion/utils/__init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 src/streamdiffusion/utils/__init__.py

diff --git a/src/streamdiffusion/utils/__init__.py b/src/streamdiffusion/utils/__init__.py
new file mode 100644
index 00000000..e69de29b

From 4fff57971006e614850a255c351db2b7fc809141 Mon Sep 17 00:00:00 2001
From: Victor Elias <victor@livepeer.org>
Date: Tue, 23 Sep 2025 22:57:22 +0000
Subject: [PATCH 19/21] Fix cuda-python install

such rabbit hole
---
 setup.py | 33 +++++++++++++++++++++++++--------
 1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/setup.py b/setup.py
index 6f3808d8..05da62c3 100644
--- a/setup.py
+++ b/setup.py
@@ -1,9 +1,10 @@
+import os
 import re
 import sys
 
 from setuptools import find_packages, setup
 
-# Same helpers as pip_utils.py but we shouldn't import it from setup.py
+# Copied from pip_utils.py to avoid import
 def _check_torch_installed():
     try:
         import torch
@@ -21,18 +22,32 @@ def _check_torch_installed():
         raise RuntimeError("Detected CPU-only PyTorch. Install CUDA-enabled torch/vision/audio before installing this package.")
 
 
-def get_cuda_version() -> str:
-    _check_torch_installed()
+def get_cuda_constraint():
+    cuda_version = os.environ.get("STREAMDIFFUSION_CUDA_VERSION") or \
+                    os.environ.get("CUDA_VERSION")
+
+    if not cuda_version:
+        try:
+            import torch
+            cuda_version = torch.version.cuda
+        except Exception:
+            # might not be available during wheel build, so we have to ignore
+            pass
 
-    import torch
-    return torch.version.cuda
+    if not cuda_version:
+        return ">=11,<13"
 
+    parts = cuda_version.split(".")
+    if len(parts) < 2:
+        raise RuntimeError(f"Invalid CUDA version: {cuda_version}")
+    return f"=={parts[0]}.{parts[1]}"
 
-_is_install = any(cmd in sys.argv for cmd in ("install", "develop"))
+
+if any(cmd in sys.argv for cmd in ("install", "develop")):
+    _check_torch_installed()
 
 _deps = [
-    f"cuda-python~={get_cuda_version()}" if _is_install else "cuda-python",
-    "torch", # We can't really pin the torch version as it depends on CUDA
+    f"cuda-python~={get_cuda_constraint()}",
     "xformers==0.0.30",
     "diffusers==0.35.0",
     "transformers==4.56.0",
@@ -52,6 +67,8 @@ def get_cuda_version() -> str:
     "diffusers-ipadapter @ git+https://github.com/livepeer/Diffusers_IPAdapter.git@405f87da42932e30bd55ee8dca3ce502d7834a99",
     "mediapipe==0.10.21",
     "insightface==0.7.3",
+    # We can't really pin torch version as it depends on CUDA, but we check if it's pre-installed above
+    "torch",
 ]
 
 deps = {b: a for a, b in (re.findall(r"^(([^!=<>~ @]+)(?:[!=<>~ @].*)?$)", x)[0] for x in _deps)}

From 39e818a3e56c586e94dfd7b417ddf122f62783ed Mon Sep 17 00:00:00 2001
From: Victor Elias <victor@livepeer.org>
Date: Wed, 24 Sep 2025 00:17:32 +0000
Subject: [PATCH 20/21] Fix cuda constraint constraint

---
 setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 05da62c3..7a687bcc 100644
--- a/setup.py
+++ b/setup.py
@@ -40,14 +40,14 @@ def get_cuda_constraint():
     parts = cuda_version.split(".")
     if len(parts) < 2:
         raise RuntimeError(f"Invalid CUDA version: {cuda_version}")
-    return f"=={parts[0]}.{parts[1]}"
+    return f"~={parts[0]}.{parts[1]}"
 
 
 if any(cmd in sys.argv for cmd in ("install", "develop")):
     _check_torch_installed()
 
 _deps = [
-    f"cuda-python~={get_cuda_constraint()}",
+    f"cuda-python{get_cuda_constraint()}",
     "xformers==0.0.30",
     "diffusers==0.35.0",
     "transformers==4.56.0",

From b725e8b79b47aaac208a446b00f37b1445c1bb09 Mon Sep 17 00:00:00 2001
From: BuffMcBigHuge <marco@bymar.co>
Date: Wed, 24 Sep 2025 14:40:32 -0400
Subject: [PATCH 21/21] Buffer allocation test -fix.

---
 src/streamdiffusion/acceleration/tensorrt/utilities.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/streamdiffusion/acceleration/tensorrt/utilities.py b/src/streamdiffusion/acceleration/tensorrt/utilities.py
index ce1124df..5d21d21c 100644
--- a/src/streamdiffusion/acceleration/tensorrt/utilities.py
+++ b/src/streamdiffusion/acceleration/tensorrt/utilities.py
@@ -270,6 +270,11 @@ def activate(self, reuse_device_memory=None):
             self.context = self.engine.create_execution_context()
 
     def allocate_buffers(self, shape_dict=None, device="cuda"):
+        # Ensure an execution context exists before allocating buffers
+        if self.context is None:
+            if self.engine is None:
+                raise RuntimeError("TensorRT engine is not loaded; call load() before allocate_buffers().")
+            self.activate()
         # Check if we can reuse existing buffers (OPTIMIZATION)
         if self._can_reuse_buffers(shape_dict, device):
             return