livepeer · BuffMcBigHuge · Aug 8, 2025 · Aug 12, 2025 · Aug 12, 2025 · Aug 12, 2025
diff --git a/configs/sd15_multicontrol.yaml.example b/configs/sd15_multicontrol.yaml.example
@@ -32,11 +32,19 @@ seed: 789
 frame_buffer_size: 1
 delta: 0.7
 use_denoising_batch: true
-use_lcm_lora: true
+# LoRA configuration - use lora_dict to load LCM LoRA and other LoRAs
+lora_dict:
+  "latent-consistency/lcm-lora-sdv1-5": 1.0  # LCM LoRA for faster inference
+  # Add other LoRAs here:
+  # "your_custom_lora": 0.7
+
 use_tiny_vae: true
 acceleration: "tensorrt"   # "xformers" for non-TensorRT setups
 cfg_type: "self"
 
+scheduler: "lcm" # Supports "lcm" or "tcd"
+sampler: "normal"
+
 # Engine directory for TensorRT (engines will be built here if not found)
 engine_dir: "./engines/sd15"
 

diff --git a/configs/sdturbo_multicontrol.yaml.example b/configs/sdturbo_multicontrol.yaml.example
@@ -22,11 +22,19 @@ seed: 789
 frame_buffer_size: 1
 delta: 0.7
 use_denoising_batch: true
-use_lcm_lora: true          # SD-Turbo benefits from LCM LoRA
+# LoRA configuration - SD-Turbo can benefit from LCM LoRA
+lora_dict:
+  "latent-consistency/lcm-lora-sdv1-5": 1.0  # LCM LoRA for faster inference
+  # Add other LoRAs here:
+  # "your_custom_lora": 0.7
+
 use_tiny_vae: true
 acceleration: "tensorrt"    # "xformers" for non-TensorRT setups
 cfg_type: "self"
 
+scheduler: "lcm" # Supports "lcm" or "tcd"
+sampler: "normal"
+
 # Engine directory for TensorRT
 engine_dir: "./engines/sdturbo"
 

diff --git a/configs/sdxl_multicontrol.yaml.example b/configs/sdxl_multicontrol.yaml.example
@@ -31,11 +31,20 @@ seed: 42                    # Base seed (used with seed_blending above)
 frame_buffer_size: 1
 delta: 0.7
 use_denoising_batch: true
-use_lcm_lora: false         # SDXL has built-in optimizations
+# LoRA configuration - SDXL can use LCM LoRA for faster inference
+# lora_dict:
+#   "latent-consistency/lcm-lora-sdxl": 1.0  # Uncomment to enable LCM LoRA for SDXL
+#   # Add other LoRAs here:
+#   # "your_custom_lora": 0.7
+
 use_taesd: true             # Use Tiny AutoEncoder for SDXL
 use_tiny_vae: true
 acceleration: "tensorrt"    # "xformers" for non-TensorRT setups
 cfg_type: "self"
+
+scheduler: "lcm" # Supports "lcm" or "tcd"
+sampler: "normal"
+
 safety_checker: false
 
 # Engine directory for TensorRT

diff --git a/demo/realtime-img2img/config.py b/demo/realtime-img2img/config.py
@@ -20,6 +20,7 @@ class Args(NamedTuple):
     controlnet_config: str
     api_only: bool
     log_level: str
+    quiet: bool
 
     def pretty_print(self):
         print("\n")
@@ -34,6 +35,7 @@ def pretty_print(self):
 ENGINE_DIR = os.environ.get("ENGINE_DIR", "engines")
 ACCELERATION = os.environ.get("ACCELERATION", "xformers")
 LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO")
+QUIET = os.environ.get("QUIET", "False").lower() in ("true", "1", "yes", "on")
 
 default_host = os.getenv("HOST", "0.0.0.0")
 default_port = int(os.getenv("PORT", "7860"))
@@ -129,5 +131,12 @@ def pretty_print(self):
     choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
     help="Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)",
 )
+parser.add_argument(
+    "--quiet",
+    dest="quiet",
+    action="store_true",
+    default=QUIET,
+    help="Suppress uvicorn INFO messages (server access logs, etc.)",
+)
 config = Args(**vars(parser.parse_args()))
 config.pretty_print()
diff --git a/demo/realtime-img2img/frontend/package-lock.json b/demo/realtime-img2img/frontend/package-lock.json
diff --git a/demo/realtime-img2img/frontend/src/lib/components/PreprocessorDocs.svelte b/demo/realtime-img2img/frontend/src/lib/components/PreprocessorDocs.svelte
@@ -40,7 +40,6 @@
       use_denoising_batch: true,
       delta: 0.7,
       frame_buffer_size: 1,
-      use_lcm_lora: true,
       use_tiny_vae: true,
       acceleration: "xformers",
       cfg_type: "self",

diff --git a/demo/realtime-img2img/frontend/src/routes/+page.svelte b/demo/realtime-img2img/frontend/src/routes/+page.svelte
@@ -1026,7 +1026,7 @@
           <ControlNetConfig 
             bind:this={controlNetConfigComponent}
             {controlnetInfo} 
-            {tIndexList} 
+            {tIndexList}
             {guidanceScale}
             {delta}
             {numInferenceSteps}
@@ -1183,5 +1183,8 @@
     cursor: col-resize !important;
   }
 
-  /* Removed unused .resizer:hover selector */
+  /* Improved resizer hover effects */
+  .resizer:hover {
+    background-color: rgb(59 130 246) !important; /* blue-500 */
+  }
 </style>
diff --git a/demo/realtime-img2img/main.py b/demo/realtime-img2img/main.py
@@ -59,6 +59,13 @@ def setup_logging(log_level: str = "INFO"):
 # Initialize logger
 logger = setup_logging(config.log_level)
 
+# Suppress uvicorn INFO messages
+if config.quiet:
+    uvicorn_logger = logging.getLogger('uvicorn')
+    uvicorn_logger.setLevel(logging.WARNING)
+    uvicorn_access_logger = logging.getLogger('uvicorn.access')
+    uvicorn_access_logger.setLevel(logging.WARNING)
+
 
 class AppState:
     """Centralized application state management - SINGLE SOURCE OF TRUTH"""

diff --git a/demo/realtime-img2img/requirements.txt b/demo/realtime-img2img/requirements.txt
@@ -1,11 +1,11 @@
 diffusers==0.35.0
-transformers==4.56.0
-peft==0.18.0
+transformers==4.55.4
+peft==0.17.1
 accelerate==1.10.0
-huggingface_hub==0.35.0
+huggingface_hub==0.34.4
 fastapi==0.115.0
 uvicorn[standard]==0.32.0
-Pillow==10.5.0
+Pillow==10.4.0
 compel==2.0.2
 controlnet-aux==0.0.7
 xformers; sys_platform != 'darwin' or platform_machine != 'arm64'

diff --git a/demo/realtime-txt2img/config.py b/demo/realtime-txt2img/config.py
@@ -29,8 +29,7 @@ class Config:
     model_id_or_path: str = os.environ.get("MODEL", "KBlueLeaf/kohaku-v2.1")
     # LoRA dictionary write like    field(default_factory=lambda: {'E:/stable-diffusion-webui/models/Lora_1.safetensors' : 1.0 , 'E:/stable-diffusion-webui/models/Lora_2.safetensors' : 0.2})
     lora_dict: dict = None
-    # LCM-LORA model
-    lcm_lora_id: str = os.environ.get("LORA", "latent-consistency/lcm-lora-sdv1-5")
+    # LCM-LORA model (use lora_dict instead of lcm_lora_id)
     # TinyVAE model
     vae_id: str = os.environ.get("VAE", "madebyollin/taesd")
     # Device to use

diff --git a/demo/realtime-txt2img/main.py b/demo/realtime-txt2img/main.py
@@ -63,7 +63,6 @@ def __init__(self, config: Config) -> None:
             mode=config.mode,
             model_id_or_path=config.model_id_or_path,
             lora_dict=config.lora_dict,
-            lcm_lora_id=config.lcm_lora_id,
             vae_id=config.vae_id,
             device=config.device,
             dtype=config.dtype,

diff --git a/examples/optimal-performance/multi.py b/examples/optimal-performance/multi.py
@@ -74,7 +74,6 @@ def image_generation_process(
         frame_buffer_size=batch_size,
         warmup=10,
         acceleration=acceleration,
-        use_lcm_lora=False,
         mode="txt2img",
         cfg_type="none",
         use_denoising_batch=True,

diff --git a/examples/optimal-performance/single.py b/examples/optimal-performance/single.py
@@ -40,7 +40,6 @@ def image_generation_process(
         frame_buffer_size=1,
         warmup=10,
         acceleration=acceleration,
-        use_lcm_lora=False,
         mode="txt2img",
         cfg_type="none",
         use_denoising_batch=True,

diff --git a/src/streamdiffusion/acceleration/tensorrt/engine_manager.py b/src/streamdiffusion/acceleration/tensorrt/engine_manager.py
@@ -1,3 +1,5 @@
+
+import hashlib
 import logging
 from enum import Enum
 from pathlib import Path
@@ -75,15 +77,30 @@ def __init__(self, engine_dir: str):
                 'loader': lambda path, cuda_stream, **kwargs: str(path)
             }
         }
-
+
+    def _lora_signature(self, lora_dict: Dict[str, float]) -> str:
+        """Create a short, stable signature for a set of LoRAs.
+
+        Uses sorted basenames and weights, hashed to a short hex to avoid
+        long/invalid paths while keeping cache keys stable across runs.
+        """
+        # Build canonical string of basename:weight pairs
+        parts = []
+        for path, weight in sorted(lora_dict.items(), key=lambda x: str(x[0])):
+            base = Path(str(path)).name  # basename only
+            parts.append(f"{base}:{weight}")
+        canon = "|".join(parts)
+        h = hashlib.sha1(canon.encode("utf-8")).hexdigest()[:10]
+        return f"{len(lora_dict)}-{h}"
+
     def get_engine_path(self, 
                        engine_type: EngineType,
                        model_id_or_path: str,
                        max_batch_size: int,
                        min_batch_size: int,
                        mode: str,
-                       use_lcm_lora: bool,
                        use_tiny_vae: bool,
+                       lora_dict: Optional[Dict[str, float]] = None,
                        ipadapter_scale: Optional[float] = None,
                        ipadapter_tokens: Optional[int] = None,
                        controlnet_model_id: Optional[str] = None,
@@ -114,14 +131,18 @@ def get_engine_path(self,
             base_name = maybe_path.stem if maybe_path.exists() else model_id_or_path
 
             # Create prefix (from wrapper.py lines 1005-1013)
-            prefix = f"{base_name}--lcm_lora-{use_lcm_lora}--tiny_vae-{use_tiny_vae}--min_batch-{min_batch_size}--max_batch-{max_batch_size}"
+            prefix = f"{base_name}--tiny_vae-{use_tiny_vae}--min_batch-{min_batch_size}--max_batch-{max_batch_size}"
 
             # IP-Adapter differentiation: add type and (optionally) tokens
             # Keep scale out of identity for runtime control, but include a type flag to separate caches
             if is_faceid is True:
                 prefix += f"--fid"
             if ipadapter_tokens is not None:
                 prefix += f"--tokens{ipadapter_tokens}"
+
+            # Fused Loras - use concise hashed signature to avoid long/invalid paths
+            if lora_dict is not None and len(lora_dict) > 0:
+                prefix += f"--lora-{self._lora_signature(lora_dict)}"
 
             prefix += f"--mode-{mode}"
 
@@ -287,7 +308,6 @@ def get_or_load_controlnet_engine(self,
             max_batch_size=max_batch_size,
             min_batch_size=min_batch_size,
             mode="",  # Not used for ControlNet
-            use_lcm_lora=False,  # Not used for ControlNet
             use_tiny_vae=False,  # Not used for ControlNet
             controlnet_model_id=model_id
         )

diff --git a/src/streamdiffusion/acceleration/tensorrt/utilities.py b/src/streamdiffusion/acceleration/tensorrt/utilities.py
@@ -360,6 +360,29 @@ def reset_cuda_graph(self):
             self.graph = None
 
     def infer(self, feed_dict, stream, use_cuda_graph=False):
+        # Filter inputs to only those the engine actually exposes to avoid binding errors
+        try:
+            allowed_inputs = set()
+            for idx in range(self.engine.num_io_tensors):
+                name = self.engine.get_tensor_name(idx)
+                if self.engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT:
+                    allowed_inputs.add(name)
+
+            # Drop any extra keys (e.g., text_embeds/time_ids) that the engine was not built to accept
+            if allowed_inputs:
+                filtered_feed_dict = {k: v for k, v in feed_dict.items() if k in allowed_inputs}
+                if len(filtered_feed_dict) != len(feed_dict):
+                    missing = [k for k in feed_dict.keys() if k not in allowed_inputs]
+                    if missing:
+                        logger.debug(
+                            "TensorRT Engine: filtering unsupported inputs %s (allowed=%s)",
+                            missing, sorted(list(allowed_inputs))
+                        )
+                feed_dict = filtered_feed_dict
+        except Exception:
+            # Be permissive if engine query fails; proceed with original dict
+            pass
+
         for name, buf in feed_dict.items():
             self.tensors[name].copy_(buf)
 

diff --git a/src/streamdiffusion/config.py b/src/streamdiffusion/config.py
@@ -100,7 +100,6 @@ def _extract_wrapper_params(config: Dict[str, Any]) -> Dict[str, Any]:
         'lora_dict': config.get('lora_dict'),
         'mode': config.get('mode', 'img2img'),
         'output_type': config.get('output_type', 'pil'),
-        'lcm_lora_id': config.get('lcm_lora_id'),
         'vae_id': config.get('vae_id'),
         'device': config.get('device', 'cuda'),
         'dtype': _parse_dtype(config.get('dtype', 'float16')),
@@ -111,7 +110,7 @@ def _extract_wrapper_params(config: Dict[str, Any]) -> Dict[str, Any]:
         'acceleration': config.get('acceleration', 'tensorrt'),
         'do_add_noise': config.get('do_add_noise', True),
         'device_ids': config.get('device_ids'),
-        'use_lcm_lora': config.get('use_lcm_lora', True),
+        'use_lcm_lora': config.get('use_lcm_lora'),  # Backwards compatibility
         'use_tiny_vae': config.get('use_tiny_vae', True),
         'enable_similar_image_filter': config.get('enable_similar_image_filter', False),
         'similar_image_filter_threshold': config.get('similar_image_filter_threshold', 0.98),
@@ -124,6 +123,8 @@ def _extract_wrapper_params(config: Dict[str, Any]) -> Dict[str, Any]:
         'engine_dir': config.get('engine_dir', 'engines'),
         'normalize_prompt_weights': config.get('normalize_prompt_weights', True),
         'normalize_seed_weights': config.get('normalize_seed_weights', True),
+        'scheduler': config.get('scheduler', 'lcm'),
+        'sampler': config.get('sampler', 'normal'),
         'compile_engines_only': config.get('compile_engines_only', False),
     }
     if 'controlnets' in config and config['controlnets']: