Add 2 WebUI Sample apps

quic · Feb 6, 2025 · 7b186f7 · 7b186f7
1 parent 5bc8341
commit 7b186f7
Show file tree

Hide file tree

Showing 6 changed files with 635 additions and 52 deletions.
diff --git a/samples/python/real_esrgan_x4plus/real_esrgan_x4plus.py b/samples/python/real_esrgan_x4plus/real_esrgan_x4plus.py
@@ -14,9 +14,15 @@
 import torchvision.transforms as transforms
 from PIL import Image
 from PIL.Image import fromarray as ImageFromArray
-
+from utils.image_processing import (
+    preprocess_PIL_image,
+    torch_tensor_to_PIL_image,
+    pil_resize_pad,
+    pil_undo_resize_pad
+)
 from qai_appbuilder import (QNNContext, Runtime, LogLevel, ProfilingLevel, PerfProfile, QNNConfig)
 
+
 ####################################################################
 
 MODEL_ID = "mnz1l2exq"
@@ -27,6 +33,10 @@
 ####################################################################
 
 execution_ws = os.getcwd()
+
+if not "python" in execution_ws:
+    execution_ws = execution_ws + "\\..\\" + "python"
+
 qnn_dir = execution_ws + "\\qai_libs"
 
 if not MODEL_NAME in execution_ws:
@@ -35,27 +45,12 @@
 model_dir = execution_ws + "\\models"
 madel_path = model_dir + "\\" + MODEL_NAME + ".bin"
 
+
 ####################################################################
 
 image_buffer = None
 realesrgan = None
 
-def preprocess_PIL_image(image: Image) -> torch.Tensor:
-    """Convert a PIL image into a pyTorch tensor with range [0, 1] and shape NCHW."""
-    transform = transforms.Compose([transforms.Resize(IMAGE_SIZE),      # bgr image
-                                    transforms.CenterCrop(IMAGE_SIZE),
-                                    transforms.PILToTensor()])
-    img: torch.Tensor = transform(image)  # type: ignore
-    img = img.float() / 255.0  # int 0 - 255 to float 0.0 - 1.0
-    return img
-
-def torch_tensor_to_PIL_image(data: torch.Tensor) -> Image:
-    """
-    Convert a Torch tensor (dtype float32) with range [0, 1] and shape CHW into PIL image CHW
-    """
-    out = torch.clip(data, min=0.0, max=1.0)
-    np_out = (out.detach().numpy() * 255).astype(np.uint8)
-    return ImageFromArray(np_out)
 
 # RealESRGan class which inherited from the class QNNContext.
 class RealESRGan(QNNContext):
@@ -85,13 +80,15 @@ def Init():
     # Instance for RealESRGan objects.
     realesrgan = RealESRGan("realesrgan", madel_path)
 
-def Inference(input_image_path, output_image_path):
+def Inference(input_image_path, output_image_path, show_image = True):
     global image_buffer
 
     # Read and preprocess the image.
-    image = Image.open(input_image_path)
-    image = preprocess_PIL_image(image).numpy()
-    image = np.transpose(image, (1, 2, 0))  # CHW -> HWC
+    orig_image = Image.open(input_image_path)
+    image, scale, padding = pil_resize_pad(orig_image, (IMAGE_SIZE, IMAGE_SIZE))
+
+    image = np.array(image)
+    image = np.clip(image, 0, 255) / 255.0  # normalization
 
     # Burst the HTP.
     PerfProfile.SetPerfProfileGlobal(PerfProfile.BURST)
@@ -102,25 +99,31 @@ def Inference(input_image_path, output_image_path):
     # Reset the HTP.
     PerfProfile.RelPerfProfileGlobal()
 
-    # show & save the result
-    output_image = torch.from_numpy(output_image)
     output_image = output_image.reshape(IMAGE_SIZE * 4, IMAGE_SIZE * 4, 3)
-    output_image = torch.unsqueeze(output_image, 0)
-    output_image = [torch_tensor_to_PIL_image(img) for img in output_image]
-    image_buffer = output_image[0]
+
+    output_image = np.clip(output_image, 0.0, 1.0)
+    output_image = (output_image * 255).astype(np.uint8) # un-normalization
+    output_image = ImageFromArray(output_image)
+
+    image_size = (orig_image.size[0] * 4, orig_image.size[1] * 4)
+    image_padding = (padding[0] * 4, padding[1] * 4)
+    image_buffer = pil_undo_resize_pad(output_image, image_size, scale, image_padding)
+
+    # show & save the result
     image_buffer.save(output_image_path)
-    image_buffer.show()
+
+    if show_image:
+        image_buffer.show()
 
 def Release():
     global realesrgan
 
     # Release the resources.
     del(realesrgan)
 
+if __name__ == '__main__':
+    Init()
 
-Init()
-
-Inference(execution_ws + "\\input.jpg", execution_ws + "\\output.jpg")
-
-Release()
+    Inference(execution_ws + "\\input.jpg", execution_ws + "\\output.jpg")
 
+    Release()
diff --git a/samples/python/stable_diffusion_v1_5/stable_diffusion_v1_5.py b/samples/python/stable_diffusion_v1_5/stable_diffusion_v1_5.py
@@ -45,6 +45,10 @@
 ####################################################################
 
 execution_ws = os.getcwd()
+
+if not "python" in execution_ws:
+    execution_ws = execution_ws + "\\..\\" + "python"
+
 qnn_dir = execution_ws + "\\qai_libs"
 
 if not MODEL_NAME in execution_ws:
@@ -117,6 +121,10 @@ def model_initialize():
 
     result = True
 
+    SetQNNConfig()
+
+    model_download()
+
     # model names
     model_text_encoder  = "text_encoder"
     model_unet          = "model_unet"
@@ -166,13 +174,15 @@ def setup_parameters(prompt, un_prompt, seed, step, text_guidance):
 
     user_prompt = prompt
     uncond_prompt = un_prompt
-    user_seed = seed
+    user_seed = np.int64(seed)
     user_step = step
     user_text_guidance = text_guidance
+
+    if user_seed == -1:
+        user_seed = np.random.randint(low=0, high=9999999999, size=None, dtype=np.int64)
 
     assert isinstance(user_seed, np.int64) == True, "user_seed should be of type int64"
     assert isinstance(user_step, int) == True, "user_step should be of type int"
-    assert user_step == 20 or user_step == 30 or user_step == 50, "user_step should be either 20, 30 or 50"
     assert isinstance(user_text_guidance, float) == True, "user_text_guidance should be of type float"
     assert user_text_guidance >= 5.0 and user_text_guidance <= 15.0, "user_text_guidance should be a float from [5.0, 15.0]"
 
@@ -209,7 +219,7 @@ def get_time_embedding(timestep, time_embeddings):
     return emb
 
 # Execute the Stable Diffusion pipeline
-def model_execute(callback):
+def model_execute(callback, image_path, show_image = True):
     PerfProfile.SetPerfProfileGlobal(PerfProfile.BURST)
 
     scheduler.set_timesteps(user_step)  # Setting up user provided time steps for Scheduler
@@ -259,7 +269,7 @@ def model_execute(callback):
         callback(None)
     else:
         image_size = 512
-        image_path = execution_ws + "\\images"
+
         if not os.path.exists(image_path):
             os.makedirs(image_path, exist_ok=True)
         image_path = image_path + "\\%s_%s_%s.jpg"%(formatted_time, str(user_seed), str(image_size))
@@ -268,12 +278,16 @@ def model_execute(callback):
         output_image = output_image.reshape(image_size, image_size, -1)
         output_image = Image.fromarray(output_image, mode="RGB")
         output_image.save(image_path)
-        output_image.show()
+
+        if show_image:
+            output_image.show()
 
         callback(image_path)
 
     PerfProfile.RelPerfProfileGlobal()
 
+    return image_path
+
 # Release all the models.
 def model_destroy():
     global text_encoder
@@ -332,10 +346,6 @@ def model_download():
     parser.add_argument("--prompt", default=DEFAULT_PROMPT, type=str)
     args = parser.parse_args()
 
-    SetQNNConfig()
-
-    model_download()
-
     model_initialize()
 
     time_start = time.time()
@@ -347,7 +357,7 @@ def model_download():
     user_text_guidance = 7.5
 
     setup_parameters(user_prompt, uncond_prompt, user_seed, user_step, user_text_guidance)
-    model_execute(modelExecuteCallback)
+    model_execute(modelExecuteCallback, execution_ws + "\\images")
 
     time_end = time.time()
     print("time consumes for inference {}(s)".format(str(time_end - time_start)))

diff --git a/samples/python/stable_diffusion_v2_1/stable_diffusion_v2_1.py b/samples/python/stable_diffusion_v2_1/stable_diffusion_v2_1.py
@@ -45,6 +45,10 @@
 ####################################################################
 
 execution_ws = os.getcwd()
+
+if not "python" in execution_ws:
+    execution_ws = execution_ws + "\\..\\" + "python"
+
 qnn_dir = execution_ws + "\\qai_libs"
 
 if not MODEL_NAME in execution_ws:
@@ -117,6 +121,10 @@ def model_initialize():
 
     result = True
 
+    SetQNNConfig()
+
+    model_download()
+
     # model names
     model_text_encoder  = "text_encoder"
     model_unet          = "model_unet"
@@ -166,13 +174,15 @@ def setup_parameters(prompt, un_prompt, seed, step, text_guidance):
 
     user_prompt = prompt
     uncond_prompt = un_prompt
-    user_seed = seed
+    user_seed = np.int64(seed)
     user_step = step
     user_text_guidance = text_guidance
+
+    if user_seed == -1:
+        user_seed = np.random.randint(low=0, high=9999999999, size=None, dtype=np.int64)
 
     assert isinstance(user_seed, np.int64) == True, "user_seed should be of type int64"
     assert isinstance(user_step, int) == True, "user_step should be of type int"
-    assert user_step == 20 or user_step == 30 or user_step == 50, "user_step should be either 20, 30 or 50"
     assert isinstance(user_text_guidance, float) == True, "user_text_guidance should be of type float"
     assert user_text_guidance >= 5.0 and user_text_guidance <= 15.0, "user_text_guidance should be a float from [5.0, 15.0]"
 
@@ -201,7 +211,7 @@ def get_time_embedding(timestep, time_embeddings):
     return emb
 
 # Execute the Stable Diffusion pipeline
-def model_execute(callback):
+def model_execute(callback, image_path, show_image = True):
     PerfProfile.SetPerfProfileGlobal(PerfProfile.BURST)
 
     scheduler.set_timesteps(user_step)  # Setting up user provided time steps for Scheduler
@@ -250,7 +260,7 @@ def model_execute(callback):
         callback(None)
     else:
         image_size = 512
-        image_path = execution_ws + "\\images"
+
         if not os.path.exists(image_path):
             os.makedirs(image_path, exist_ok=True)
         image_path = image_path + "\\%s_%s_%s.jpg"%(formatted_time, str(user_seed), str(image_size))
@@ -259,12 +269,16 @@ def model_execute(callback):
         output_image = output_image.reshape(image_size, image_size, -1)
         output_image = Image.fromarray(output_image, mode="RGB")
         output_image.save(image_path)
-        output_image.show()
+
+        if show_image:
+            output_image.show()
 
         callback(image_path)
 
     PerfProfile.RelPerfProfileGlobal()
 
+    return image_path
+
 # Release all the models.
 def model_destroy():
     global text_encoder
@@ -323,10 +337,6 @@ def model_download():
     parser.add_argument("--prompt", default=DEFAULT_PROMPT, type=str)
     args = parser.parse_args()
 
-    SetQNNConfig()
-
-    model_download()
-
     model_initialize()
 
     time_start = time.time()
@@ -338,7 +348,7 @@ def model_download():
     user_text_guidance = 7.5
 
     setup_parameters(user_prompt, uncond_prompt, user_seed, user_step, user_text_guidance)
-    model_execute(modelExecuteCallback)
+    model_execute(modelExecuteCallback, execution_ws + "\\images")
 
     time_end = time.time()
     print("time consumes for inference {}(s)".format(str(time_end - time_start)))