Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
e98c099
Testing scheduling and sampling.
BuffMcBigHuge Aug 8, 2025
c404e32
Merge branch 'main' into marco/feat/schedulers-samplers-revert
BuffMcBigHuge Aug 12, 2025
4407724
Added lora signature to engine name.
BuffMcBigHuge Aug 12, 2025
cabf811
Merge branch 'main' into marco/feat/schedulers-samplers-revert
BuffMcBigHuge Aug 12, 2025
9999514
Merge branch 'main' into marco/feat/schedulers-samplers-revert.
BuffMcBigHuge Aug 21, 2025
f79a59c
Clean up of scheduler/samplers that weren't working, fix to controlne…
BuffMcBigHuge Aug 23, 2025
977afb1
Fix to lora engine setup, changed requirements in realtime-img2img fo…
BuffMcBigHuge Aug 25, 2025
0044a9b
ControlNet TCD.
BuffMcBigHuge Aug 25, 2025
b3182d0
Merge branch 'main' into marco/feat/schedulers-samplers-revert.
BuffMcBigHuge Sep 8, 2025
41e5122
At uvicorn quiet param to help debug issues without unncessary logging.
BuffMcBigHuge Sep 10, 2025
b04f0e8
Fix to LoRA and IPAdapter conflict.
BuffMcBigHuge Sep 11, 2025
1c0f1f6
Merge branch 'main' into marco/feat/schedulers-samplers-revert.
BuffMcBigHuge Sep 15, 2025
e2778b6
Deprecation of use_lcm_lora.
BuffMcBigHuge Sep 15, 2025
53f7d92
Added backwards compatibility for use_lcm_lora.
BuffMcBigHuge Sep 16, 2025
55a20c9
Reverted single/multi scripts for simplicity.
BuffMcBigHuge Sep 16, 2025
123ba69
Updated descriptive comments, added tcd support, small cleanup/fixes.
BuffMcBigHuge Sep 16, 2025
312811c
Oops.
BuffMcBigHuge Sep 16, 2025
54f0546
Fix for potential xformers issue.
BuffMcBigHuge Sep 16, 2025
7e210ea
Fix to TCD update params.
BuffMcBigHuge Sep 16, 2025
a0779f4
Removal of old fuse method.
BuffMcBigHuge Sep 17, 2025
3854dbe
Merge branch 'main' into marco/feat/schedulers-samplers-revert.
BuffMcBigHuge Sep 23, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion configs/sd15_multicontrol.yaml.example
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,19 @@ seed: 789
frame_buffer_size: 1
delta: 0.7
use_denoising_batch: true
use_lcm_lora: true
# LoRA configuration - use lora_dict to load LCM LoRA and other LoRAs
lora_dict:
"latent-consistency/lcm-lora-sdv1-5": 1.0 # LCM LoRA for faster inference
# Add other LoRAs here:
# "your_custom_lora": 0.7

use_tiny_vae: true
acceleration: "tensorrt" # "xformers" for non-TensorRT setups
cfg_type: "self"

scheduler: "lcm" # Supports "lcm" or "tcd"
sampler: "normal"

# Engine directory for TensorRT (engines will be built here if not found)
engine_dir: "./engines/sd15"

Expand Down
10 changes: 9 additions & 1 deletion configs/sdturbo_multicontrol.yaml.example
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,19 @@ seed: 789
frame_buffer_size: 1
delta: 0.7
use_denoising_batch: true
use_lcm_lora: true # SD-Turbo benefits from LCM LoRA
# LoRA configuration - SD-Turbo can benefit from LCM LoRA
lora_dict:
"latent-consistency/lcm-lora-sdv1-5": 1.0 # LCM LoRA for faster inference
# Add other LoRAs here:
# "your_custom_lora": 0.7

use_tiny_vae: true
acceleration: "tensorrt" # "xformers" for non-TensorRT setups
cfg_type: "self"

scheduler: "lcm" # Supports "lcm" or "tcd"
sampler: "normal"

# Engine directory for TensorRT
engine_dir: "./engines/sdturbo"

Expand Down
11 changes: 10 additions & 1 deletion configs/sdxl_multicontrol.yaml.example
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,20 @@ seed: 42 # Base seed (used with seed_blending above)
frame_buffer_size: 1
delta: 0.7
use_denoising_batch: true
use_lcm_lora: false # SDXL has built-in optimizations
# LoRA configuration - SDXL can use LCM LoRA for faster inference
# lora_dict:
# "latent-consistency/lcm-lora-sdxl": 1.0 # Uncomment to enable LCM LoRA for SDXL
# # Add other LoRAs here:
# # "your_custom_lora": 0.7

use_taesd: true # Use Tiny AutoEncoder for SDXL
use_tiny_vae: true
acceleration: "tensorrt" # "xformers" for non-TensorRT setups
cfg_type: "self"

scheduler: "lcm" # Supports "lcm" or "tcd"
sampler: "normal"

safety_checker: false

# Engine directory for TensorRT
Expand Down
9 changes: 9 additions & 0 deletions demo/realtime-img2img/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class Args(NamedTuple):
controlnet_config: str
api_only: bool
log_level: str
quiet: bool

def pretty_print(self):
print("\n")
Expand All @@ -34,6 +35,7 @@ def pretty_print(self):
ENGINE_DIR = os.environ.get("ENGINE_DIR", "engines")
ACCELERATION = os.environ.get("ACCELERATION", "xformers")
LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO")
QUIET = os.environ.get("QUIET", "False").lower() in ("true", "1", "yes", "on")

default_host = os.getenv("HOST", "0.0.0.0")
default_port = int(os.getenv("PORT", "7860"))
Expand Down Expand Up @@ -129,5 +131,12 @@ def pretty_print(self):
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
help="Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)",
)
parser.add_argument(
"--quiet",
dest="quiet",
action="store_true",
default=QUIET,
help="Suppress uvicorn INFO messages (server access logs, etc.)",
)
config = Args(**vars(parser.parse_args()))
config.pretty_print()
20 changes: 3 additions & 17 deletions demo/realtime-img2img/frontend/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
use_denoising_batch: true,
delta: 0.7,
frame_buffer_size: 1,
use_lcm_lora: true,
use_tiny_vae: true,
acceleration: "xformers",
cfg_type: "self",
Expand Down
7 changes: 5 additions & 2 deletions demo/realtime-img2img/frontend/src/routes/+page.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -1026,7 +1026,7 @@
<ControlNetConfig
bind:this={controlNetConfigComponent}
{controlnetInfo}
{tIndexList}
{tIndexList}
{guidanceScale}
{delta}
{numInferenceSteps}
Expand Down Expand Up @@ -1183,5 +1183,8 @@
cursor: col-resize !important;
}

/* Removed unused .resizer:hover selector */
/* Improved resizer hover effects */
.resizer:hover {
background-color: rgb(59 130 246) !important; /* blue-500 */
}
</style>
7 changes: 7 additions & 0 deletions demo/realtime-img2img/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,13 @@ def setup_logging(log_level: str = "INFO"):
# Initialize logger
logger = setup_logging(config.log_level)

# Suppress uvicorn INFO messages
if config.quiet:
uvicorn_logger = logging.getLogger('uvicorn')
uvicorn_logger.setLevel(logging.WARNING)
uvicorn_access_logger = logging.getLogger('uvicorn.access')
uvicorn_access_logger.setLevel(logging.WARNING)


class AppState:
"""Centralized application state management - SINGLE SOURCE OF TRUTH"""
Expand Down
8 changes: 4 additions & 4 deletions demo/realtime-img2img/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
diffusers==0.35.0
transformers==4.56.0
peft==0.18.0
transformers==4.55.4
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These dep versions changed for Windows support.

peft==0.17.1
accelerate==1.10.0
huggingface_hub==0.35.0
huggingface_hub==0.34.4
fastapi==0.115.0
uvicorn[standard]==0.32.0
Pillow==10.5.0
Pillow==10.4.0
compel==2.0.2
controlnet-aux==0.0.7
xformers; sys_platform != 'darwin' or platform_machine != 'arm64'
Expand Down
3 changes: 1 addition & 2 deletions demo/realtime-txt2img/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ class Config:
model_id_or_path: str = os.environ.get("MODEL", "KBlueLeaf/kohaku-v2.1")
# LoRA dictionary write like field(default_factory=lambda: {'E:/stable-diffusion-webui/models/Lora_1.safetensors' : 1.0 , 'E:/stable-diffusion-webui/models/Lora_2.safetensors' : 0.2})
lora_dict: dict = None
# LCM-LORA model
lcm_lora_id: str = os.environ.get("LORA", "latent-consistency/lcm-lora-sdv1-5")
# LCM-LORA model (use lora_dict instead of lcm_lora_id)
# TinyVAE model
vae_id: str = os.environ.get("VAE", "madebyollin/taesd")
# Device to use
Expand Down
1 change: 0 additions & 1 deletion demo/realtime-txt2img/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ def __init__(self, config: Config) -> None:
mode=config.mode,
model_id_or_path=config.model_id_or_path,
lora_dict=config.lora_dict,
lcm_lora_id=config.lcm_lora_id,
vae_id=config.vae_id,
device=config.device,
dtype=config.dtype,
Expand Down
1 change: 0 additions & 1 deletion examples/optimal-performance/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ def image_generation_process(
frame_buffer_size=batch_size,
warmup=10,
acceleration=acceleration,
use_lcm_lora=False,
mode="txt2img",
cfg_type="none",
use_denoising_batch=True,
Expand Down
1 change: 0 additions & 1 deletion examples/optimal-performance/single.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ def image_generation_process(
frame_buffer_size=1,
warmup=10,
acceleration=acceleration,
use_lcm_lora=False,
mode="txt2img",
cfg_type="none",
use_denoising_batch=True,
Expand Down
28 changes: 24 additions & 4 deletions src/streamdiffusion/acceleration/tensorrt/engine_manager.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@

import hashlib
import logging
from enum import Enum
from pathlib import Path
Expand Down Expand Up @@ -75,15 +77,30 @@ def __init__(self, engine_dir: str):
'loader': lambda path, cuda_stream, **kwargs: str(path)
}
}


def _lora_signature(self, lora_dict: Dict[str, float]) -> str:
"""Create a short, stable signature for a set of LoRAs.

Uses sorted basenames and weights, hashed to a short hex to avoid
long/invalid paths while keeping cache keys stable across runs.
"""
# Build canonical string of basename:weight pairs
parts = []
for path, weight in sorted(lora_dict.items(), key=lambda x: str(x[0])):
base = Path(str(path)).name # basename only
parts.append(f"{base}:{weight}")
canon = "|".join(parts)
h = hashlib.sha1(canon.encode("utf-8")).hexdigest()[:10]
return f"{len(lora_dict)}-{h}"

def get_engine_path(self,
engine_type: EngineType,
model_id_or_path: str,
max_batch_size: int,
min_batch_size: int,
mode: str,
use_lcm_lora: bool,
use_tiny_vae: bool,
lora_dict: Optional[Dict[str, float]] = None,
ipadapter_scale: Optional[float] = None,
ipadapter_tokens: Optional[int] = None,
controlnet_model_id: Optional[str] = None,
Expand Down Expand Up @@ -114,14 +131,18 @@ def get_engine_path(self,
base_name = maybe_path.stem if maybe_path.exists() else model_id_or_path

# Create prefix (from wrapper.py lines 1005-1013)
prefix = f"{base_name}--lcm_lora-{use_lcm_lora}--tiny_vae-{use_tiny_vae}--min_batch-{min_batch_size}--max_batch-{max_batch_size}"
prefix = f"{base_name}--tiny_vae-{use_tiny_vae}--min_batch-{min_batch_size}--max_batch-{max_batch_size}"
Copy link
Collaborator Author

@BuffMcBigHuge BuffMcBigHuge Sep 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will cause engines to rebuild - so it's easiest to remove lcm_lora-{use_lcm_lora}-- from any engines you've already built.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ty for the heads up on this


# IP-Adapter differentiation: add type and (optionally) tokens
# Keep scale out of identity for runtime control, but include a type flag to separate caches
if is_faceid is True:
prefix += f"--fid"
if ipadapter_tokens is not None:
prefix += f"--tokens{ipadapter_tokens}"

# Fused Loras - use concise hashed signature to avoid long/invalid paths
if lora_dict is not None and len(lora_dict) > 0:
prefix += f"--lora-{self._lora_signature(lora_dict)}"

prefix += f"--mode-{mode}"

Expand Down Expand Up @@ -287,7 +308,6 @@ def get_or_load_controlnet_engine(self,
max_batch_size=max_batch_size,
min_batch_size=min_batch_size,
mode="", # Not used for ControlNet
use_lcm_lora=False, # Not used for ControlNet
use_tiny_vae=False, # Not used for ControlNet
controlnet_model_id=model_id
)
Expand Down
23 changes: 23 additions & 0 deletions src/streamdiffusion/acceleration/tensorrt/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,29 @@ def reset_cuda_graph(self):
self.graph = None

def infer(self, feed_dict, stream, use_cuda_graph=False):
# Filter inputs to only those the engine actually exposes to avoid binding errors
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not 100% sure about this

try:
allowed_inputs = set()
for idx in range(self.engine.num_io_tensors):
name = self.engine.get_tensor_name(idx)
if self.engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT:
allowed_inputs.add(name)

# Drop any extra keys (e.g., text_embeds/time_ids) that the engine was not built to accept
if allowed_inputs:
filtered_feed_dict = {k: v for k, v in feed_dict.items() if k in allowed_inputs}
if len(filtered_feed_dict) != len(feed_dict):
missing = [k for k in feed_dict.keys() if k not in allowed_inputs]
if missing:
logger.debug(
"TensorRT Engine: filtering unsupported inputs %s (allowed=%s)",
missing, sorted(list(allowed_inputs))
)
feed_dict = filtered_feed_dict
except Exception:
# Be permissive if engine query fails; proceed with original dict
pass

for name, buf in feed_dict.items():
self.tensors[name].copy_(buf)

Expand Down
5 changes: 3 additions & 2 deletions src/streamdiffusion/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ def _extract_wrapper_params(config: Dict[str, Any]) -> Dict[str, Any]:
'lora_dict': config.get('lora_dict'),
'mode': config.get('mode', 'img2img'),
'output_type': config.get('output_type', 'pil'),
'lcm_lora_id': config.get('lcm_lora_id'),
'vae_id': config.get('vae_id'),
'device': config.get('device', 'cuda'),
'dtype': _parse_dtype(config.get('dtype', 'float16')),
Expand All @@ -111,7 +110,7 @@ def _extract_wrapper_params(config: Dict[str, Any]) -> Dict[str, Any]:
'acceleration': config.get('acceleration', 'tensorrt'),
'do_add_noise': config.get('do_add_noise', True),
'device_ids': config.get('device_ids'),
'use_lcm_lora': config.get('use_lcm_lora', True),
'use_lcm_lora': config.get('use_lcm_lora'), # Backwards compatibility
'use_tiny_vae': config.get('use_tiny_vae', True),
'enable_similar_image_filter': config.get('enable_similar_image_filter', False),
'similar_image_filter_threshold': config.get('similar_image_filter_threshold', 0.98),
Expand All @@ -124,6 +123,8 @@ def _extract_wrapper_params(config: Dict[str, Any]) -> Dict[str, Any]:
'engine_dir': config.get('engine_dir', 'engines'),
'normalize_prompt_weights': config.get('normalize_prompt_weights', True),
'normalize_seed_weights': config.get('normalize_seed_weights', True),
'scheduler': config.get('scheduler', 'lcm'),
'sampler': config.get('sampler', 'normal'),
'compile_engines_only': config.get('compile_engines_only', False),
}
if 'controlnets' in config and config['controlnets']:
Expand Down
Loading