livepeer · ryanontheinside · Aug 8, 2025 · Aug 8, 2025 · Aug 8, 2025 · Aug 8, 2025
diff --git a/configs/prompt_blending_demo.yaml.example b/configs/prompt_blending_demo.yaml.example
diff --git a/configs/sd15_canny_depth.yaml.example b/configs/sd15_canny_depth.yaml.example
diff --git a/configs/sd15_depth_trt_example.yaml.example b/configs/sd15_depth_trt_example.yaml.example
diff --git a/configs/sd15_multicontrol.yaml.example b/configs/sd15_multicontrol.yaml.example
@@ -0,0 +1,74 @@
+# StreamDiffusion SD1.5 Multi-ControlNet + IPAdapter Configuration
+# Demonstrates: TensorRT depth processing, tile with feedback, and IPAdapter integration
+
+# Base model configuration (use HuggingFace model or local path)
+model_id: "KBlueLeaf/kohaku-v2.1"
+# model_id: "C:\\_dev\\models\\your_sd15_model.safetensors"
+
+# StreamDiffusion core parameters
+t_index_list: [16, 32]      # Denoising timesteps - lower values = less denoising
+width: 512
+height: 512
+device: "cuda"
+dtype: "float16"
+
+# Generation parameters
+# prompt: "masterpiece, high quality, detailed, cinematic lighting"  # Overridden by prompt_blending below
+
+# Prompt blending configuration - interpolates between multiple prompts
+prompt_blending:
+  prompt_list:
+    - ["masterpiece, studio ghibli style, detailed anime artwork", 1.0]
+    - ["cyberpunk aesthetic, neon lights, futuristic", 0.3]
+  interpolation_method: "slerp"  # or "linear"
+  enable_caching: true
+
+negative_prompt: "blurry, low quality, distorted, 3d render"
+guidance_scale: 1.1
+num_inference_steps: 50
+seed: 789
+
+# Temporal consistency and optimization
+frame_buffer_size: 1
+delta: 0.7
+use_denoising_batch: true
+use_lcm_lora: true
+use_tiny_vae: true
+acceleration: "tensorrt"   # "xformers" for non-TensorRT setups
+cfg_type: "self"
+
+# Engine directory for TensorRT (engines will be built here if not found)
+engine_dir: "./engines/sd15"
+
+# Enable multi-modal conditioning
+use_controlnet: true
+use_ipadapter: true
+
+# IPAdapter configuration for style conditioning
+ipadapters:
+  - ipadapter_model_path: "h94/IP-Adapter/models/ip-adapter_sd15.safetensors"
+    image_encoder_path: "h94/IP-Adapter/models/image_encoder"
+    # style_image: "path/to/your/style/image.jpg"  # Optional: specify style image
+    scale: 0.7
+    enabled: true
+
+# ControlNet configurations
+controlnets:
+  # TensorRT Depth ControlNet (requires TensorRT engine)
+  - model_id: "lllyasviel/control_v11f1p_sd15_depth"
+    conditioning_scale: 0.3
+    preprocessor: "depth_tensorrt"
+    preprocessor_params:
+      engine_path: "C:\\_dev\\models\\tensorrt\\depth_anything_v2_vits-fp16.engine"  # REQUIRED: Path to TensorRT engine
+      detect_resolution: 518    # Must match engine input size
+      image_resolution: 512
+    enabled: true
+
+  # Tile ControlNet with feedback processor for temporal consistency
+  - model_id: "lllyasviel/control_v11f1e_sd15_tile"
+    conditioning_scale: 0.2
+    preprocessor: "feedback"
+    preprocessor_params:
+      image_resolution: 512
+      feedback_strength: 0.15   # Controls temporal feedback intensity
+    enabled: true
diff --git a/configs/sd15_tile.yaml.example b/configs/sd15_tile.yaml.example
diff --git a/configs/sdturbo_color.yaml.example b/configs/sdturbo_color.yaml.example
diff --git a/configs/sdturbo_mediapipe_pose_depth_trt.yaml.example b/configs/sdturbo_mediapipe_pose_depth_trt.yaml.example
diff --git a/configs/sdturbo_multicontrol.yaml.example b/configs/sdturbo_multicontrol.yaml.example
@@ -0,0 +1,57 @@
+# StreamDiffusion SD-Turbo Multi-ControlNet Configuration  
+# Demonstrates: Fast inference with multiple ControlNet guidance (no IPAdapter for speed)
+
+# Base model configuration
+model_id: "stabilityai/sd-turbo"
+
+# StreamDiffusion core parameters  
+t_index_list: [0, 16]       # SD-Turbo optimized timesteps
+width: 512
+height: 512
+device: "cuda"
+dtype: "float16"
+
+# Generation parameters
+prompt: "masterpiece, high quality, detailed anime character"
+negative_prompt: "blurry, low quality, distorted, 3d render"
+guidance_scale: 1.0         # SD-Turbo typically uses lower guidance
+num_inference_steps: 4      # SD-Turbo optimized for few steps
+seed: 789
+
+# Temporal consistency and optimization
+frame_buffer_size: 1
+delta: 0.7
+use_denoising_batch: true
+use_lcm_lora: true          # SD-Turbo benefits from LCM LoRA
+use_tiny_vae: true
+acceleration: "tensorrt"    # "xformers" for non-TensorRT setups
+cfg_type: "self"
+
+# Engine directory for TensorRT
+engine_dir: "./engines/sdturbo"
+
+# Enable ControlNet (no IPAdapter for maximum speed)
+use_controlnet: true
+
+# ControlNet configurations
+controlnets:
+  # Canny edge detection for structural guidance
+  - model_id: "thibaud/controlnet-sd21-canny-diffusers"
+    conditioning_scale: 0.5
+    preprocessor: "canny"
+    preprocessor_params:
+      low_threshold: 100
+      high_threshold: 200
+    enabled: true
+
+  # Soft edge detection for artistic guidance
+  - model_id: "thibaud/controlnet-sd21-hed-diffusers" 
+    conditioning_scale: 0.3
+    preprocessor: "soft_edge"
+    preprocessor_params:
+      image_resolution: 512
+      strength: 1.0
+      soft_threshold: 0.5
+      multi_scale: true
+      gaussian_sigma: 1.0
+    enabled: true