From 6de7d5af51177209dbfb13c2febea372e8739c09 Mon Sep 17 00:00:00 2001 From: Giuseppe Franco Date: Fri, 12 Apr 2024 12:44:15 +0200 Subject: [PATCH] Fix (examples/stable_diffusion): README formatting and clarification (#932) --- .../stable_diffusion/README.md | 23 +++++++++---------- .../stable_diffusion/main.py | 14 +++++------ 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/src/brevitas_examples/stable_diffusion/README.md b/src/brevitas_examples/stable_diffusion/README.md index 8c0d1dae8..30754b3d8 100644 --- a/src/brevitas_examples/stable_diffusion/README.md +++ b/src/brevitas_examples/stable_diffusion/README.md @@ -1,6 +1,6 @@ # Stable Diffusion Quantization -It currently supports Stable Diffusion 2.1 and Stable Diffusion XL. +It supports Stable Diffusion 2.1 and Stable Diffusion XL. The following PTQ techniques are currently supported: - Activation Equalization (e.g., SmoothQuant), layerwise (with the addition of Mul ops) @@ -8,16 +8,18 @@ The following PTQ techniques are currently supported: - GPTQ - Bias Correction -These techniques can be applied for both integer and floating point quantization +These techniques can be applied for both integer and floating point quantization. +Activation quantization is optional, and disabled by default. To enable, set both `conv-input-bit-width` and `linear-input-bit-width`. We support ONNX integer export, and we are planning to release soon export for floating point quantization (e.g., FP8). -To export the model in fp16, enable `export-cuda-float16`. This will performing the tracing necessary for export on GPU, leaving the model in fp16. -If the flag is not enabled, the model will be moved to CPU and cast to float32 before export because of missing CPU kernels in fp16. -NB: when exporting Stable Diffusion XL, make sure to enable `is-sd-xl` flag. The flag is not needed when export is not executed. +To export the model with fp16 scale factors, enable `export-cuda-float16`. This will performing the tracing necessary for export on GPU, leaving the model in fp16. +If the flag is not enabled, the model will be moved to CPU and cast to float32 before export because of missing CPU kernels in fp16. ## Run + +```bash usage: main.py [-h] [-m MODEL] [-d DEVICE] [-b BATCH_SIZE] [--prompt PROMPT] [--resolution RESOLUTION] [--output-path OUTPUT_PATH | --no-output-path] @@ -25,7 +27,7 @@ usage: main.py [-h] [-m MODEL] [-d DEVICE] [-b BATCH_SIZE] [--prompt PROMPT] [--activation-equalization | --no-activation-equalization] [--gptq | --no-gptq] [--float16 | --no-float16] [--attention-slicing | --no-attention-slicing] - [--is-sd-xl | --no-is-sd-xl] [--export-target {,onnx}] + [--export-target {,onnx}] [--export-weight-q-node | --no-export-weight-q-node] [--conv-weight-bit-width CONV_WEIGHT_BIT_WIDTH] [--linear-weight-bit-width LINEAR_WEIGHT_BIT_WIDTH] @@ -79,10 +81,6 @@ options: --attention-slicing Enable Enable attention slicing. Default: Disabled --no-attention-slicing Disable Enable attention slicing. Default: Disabled - --is-sd-xl Enable Enable this flag to correctly export SDXL. - Default: Disabled - --no-is-sd-xl Disable Enable this flag to correctly export SDXL. - Default: Disabled --export-target {,onnx} Target export flow. --export-weight-q-node @@ -117,8 +115,8 @@ options: Weight quantization type. Either int or eXmY, with X+Y==weight_bit_width-1. Default: int. --input-quant-format INPUT_QUANT_FORMAT - Weight quantization type. Either int or eXmY, with - X+Y==weight_bit_width-1. Default: int. + Input quantization type. Either int or eXmY, with + X+Y==input_bit_width-1. Default: int. --weight-quant-granularity {per_channel,per_tensor,per_group} Granularity for scales/zero-point of weights. Default: per_channel. @@ -139,3 +137,4 @@ options: Enable Export FP16 on CUDA. Default: Disabled --no-export-cuda-float16 Disable Export FP16 on CUDA. Default: Disabled +``` diff --git a/src/brevitas_examples/stable_diffusion/main.py b/src/brevitas_examples/stable_diffusion/main.py index 3fa5e9ca1..5d626accb 100644 --- a/src/brevitas_examples/stable_diffusion/main.py +++ b/src/brevitas_examples/stable_diffusion/main.py @@ -11,12 +11,12 @@ from dependencies import value from diffusers import DiffusionPipeline +from diffusers import StableDiffusionXLPipeline import torch from torch import nn from tqdm import tqdm from brevitas.export.onnx.standard.qcdq.manager import StdQCDQONNXManager -from brevitas.export.torch.qcdq.manager import TorchQCDQManager from brevitas.graph.calibrate import bias_correction_mode from brevitas.graph.calibrate import calibration_mode from brevitas.graph.equalize import activation_equalization_mode @@ -97,6 +97,9 @@ def main(args): pipe = DiffusionPipeline.from_pretrained(args.model, torch_dtype=dtype) print(f"Model loaded from {args.model}.") + # Detect Stable Diffusion XL pipeline + is_sd_xl = isinstance(pipe, StableDiffusionXLPipeline) + # Enable attention slicing if args.attention_slicing: pipe.enable_attention_slicing() @@ -226,7 +229,7 @@ def input_bit_width(module): dtype = next(iter(pipe.unet.parameters())).dtype # Define tracing input - if args.is_sd_xl: + if is_sd_xl: generate_fn = generate_unet_xl_rand_inputs shape = SD_XL_EMBEDDINGS_SHAPE else: @@ -287,11 +290,6 @@ def input_bit_width(module): 'attention-slicing', default=False, help='Enable attention slicing. Default: Disabled') - add_bool_arg( - parser, - 'is-sd-xl', - default=False, - help='Enable this flag to correctly export SDXL. Default: Disabled') parser.add_argument( '--export-target', type=str, default='', choices=['', 'onnx'], help='Target export flow.') add_bool_arg( @@ -362,7 +360,7 @@ def input_bit_width(module): type=quant_format_validator, default='int', help= - 'Weight quantization type. Either int or eXmY, with X+Y==weight_bit_width-1. Default: int.') + 'Input quantization type. Either int or eXmY, with X+Y==input_bit_width-1. Default: int.') parser.add_argument( '--weight-quant-granularity', type=str,