quic
diff --git a/‎samples/python/README.md‎
Lines changed: 4 additions & 1 deletion b/‎samples/python/README.md‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎samples/python/aotgan/aotgan.py‎
Lines changed: 58 additions & 46 deletions b/‎samples/python/aotgan/aotgan.py‎
Lines changed: 58 additions & 46 deletions
diff --git a/‎samples/python/fastsam_x/fastsam_x.py‎
Lines changed: 48 additions & 11 deletions b/‎samples/python/fastsam_x/fastsam_x.py‎
Lines changed: 48 additions & 11 deletions
@@ -18,7 +18,7 @@ Download and install [git](https://github.com/dennisameling/git/releases/downloa
 ### Step 2: Install basic Python dependencies:
 Run below commands in Windows terminal:
 ```
-pip install requests wget tqdm importlib-metadata qai-hub qai_hub_models huggingface_hub Pillow numpy opencv-python torch torchvision torchaudio transformers diffusers
+pip install requests wget tqdm importlib-metadata qai-hub qai_hub_models huggingface_hub Pillow numpy opencv-python torch torchvision torchaudio transformers diffusers ultralytics==8.0.193
 ```
 
 ### Step 3: Download QAI AppBuilder repository:
@@ -60,5 +60,8 @@ python stable_diffusion_v2_1\stable_diffusion_v2_1.py --prompt "spectacular view
 | inception_v3  | 2.28 | python inception_v3\inception_v3.py |
 | yolov8_det  | 2.28 | python yolov8_det\yolov8_det.py |
 | unet_segmentation  | 2.28 | python unet_segmentation\unet_segmentation.py |
+| openpose  | 2.28 | python openpose\openpose.py |
+| lama_dilated  | 2.28 | python lama_dilated\lama_dilated.py |
+| aotgan  | 2.28 | python aotgan\aotgan.py |
 
 *More models will be supported soon!*
@@ -3,28 +3,49 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # ---------------------------------------------------------------------
 
+import sys
 import os
+sys.path.append(".")
+sys.path.append("..")
+import utils.install as install
 import numpy as np
 import torch
 import torchvision.transforms as transforms
 
 from PIL import Image
 from PIL.Image import fromarray as ImageFromArray
-from torch.nn.functional import interpolate, pad
-from torchvision import transforms
-from typing import Callable, Dict, List, Tuple
-
+from utils.image_processing import (
+    preprocess_inputs
+)
 from qai_appbuilder import (QNNContext, Runtime, LogLevel, ProfilingLevel, PerfProfile, QNNConfig)
 
-image_size = 512
-aotgan = None
-image_buffer = None
+####################################################################
+
+MODEL_ID = "mn1w65o8m"
+MODEL_NAME = "aotgan"
+MODEL_HELP_URL = "https://github.com/quic/ai-engine-direct-helper/tree/main/samples/python/" + MODEL_NAME + "#" + MODEL_NAME + "-qnn-models"
+IMAGE_SIZE = 512
+
+####################################################################
+
+execution_ws = os.getcwd()
+qnn_dir = execution_ws + "\\qai_libs"
 
+if not MODEL_NAME in execution_ws:
+    execution_ws = execution_ws + "\\" + MODEL_NAME
+
+model_dir = execution_ws + "\\models"
+madel_path = model_dir + "\\" + MODEL_NAME + ".bin"
+
+####################################################################
+
+image_buffer = None
+aotgan = None
 
 def preprocess_PIL_image(image: Image) -> torch.Tensor:
     """Convert a PIL image into a pyTorch tensor with range [0, 1] and shape NCHW."""
-    transform = transforms.Compose([transforms.Resize(image_size),      # bgr image
-                                    transforms.CenterCrop(image_size),
+    transform = transforms.Compose([transforms.Resize(IMAGE_SIZE),      # bgr image
+                                    transforms.CenterCrop(IMAGE_SIZE),
                                     transforms.PILToTensor()])
     img: torch.Tensor = transform(image)  # type: ignore
     img = img.float().unsqueeze(0) / 255.0  # int 0 - 255 to float 0.0 - 1.0
@@ -37,49 +58,39 @@ def torch_tensor_to_PIL_image(data: torch.Tensor) -> Image:
     out = torch.clip(data, min=0.0, max=1.0)
     np_out = (out.detach().numpy() * 255).astype(np.uint8)
     return ImageFromArray(np_out)
-   
-def preprocess_inputs(
-    pixel_values_or_image: Image,
-    mask_pixel_values_or_image: Image,
-) -> Dict[str, torch.Tensor]:
-
-    NCHW_fp32_torch_frames = preprocess_PIL_image(pixel_values_or_image)
-    NCHW_fp32_torch_masks = preprocess_PIL_image(mask_pixel_values_or_image)
-    
-    # The number of input images should equal the number of input masks.
-    if NCHW_fp32_torch_masks.shape[0] != 1:
-        NCHW_fp32_torch_masks = NCHW_fp32_torch_masks.tile(
-            (NCHW_fp32_torch_frames.shape[0], 1, 1, 1)
-        )
-  
-    # Mask input image
-    image_masked = (
-        NCHW_fp32_torch_frames * (1 - NCHW_fp32_torch_masks) + NCHW_fp32_torch_masks
-    )
-    
-    return {"image": image_masked, "mask": NCHW_fp32_torch_masks}
-
-# AotGan class which inherited from the class QNNContext.
+
+# LamaDilated class which inherited from the class QNNContext.
 class AotGan(QNNContext):
     def Inference(self, input_data, input_mask):
         input_datas=[input_data, input_mask]
         output_data = super().Inference(input_datas)[0]
         return output_data
-        
+
+def model_download():
+    ret = True
+
+    desc = f"Downloading {MODEL_NAME} model... "
+    fail = f"\nFailed to download {MODEL_NAME} model. Please prepare the model according to the steps in below link:\n{MODEL_HELP_URL}"
+    ret = install.download_qai_hubmodel(MODEL_ID, madel_path, desc=desc, fail=fail)
+
+    if not ret:
+        exit()
+
 def Init():
     global aotgan
 
+    model_download()
+
     # Config AppBuilder environment.
     QNNConfig.Config(os.getcwd() + "\\qai_libs", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC)
 
     # Instance for AotGan objects.
-    aotgan_model = "models\\aotgan.bin"
-    aotgan = AotGan("aotgan", aotgan_model)
+    aotgan = AotGan("aotgan", madel_path)
 
 def Inference(input_image_path, input_mask_path, output_image_path):
     global image_buffer
 
-    # Read and preprocess the image&mask.
+    # Read and preprocess the image & mask.
     image = Image.open(input_image_path)
     mask = Image.open(input_mask_path)   
     inputs = preprocess_inputs(image, mask)
@@ -89,26 +100,26 @@ def Inference(input_image_path, input_mask_path, output_image_path):
 
     image_masked = np.transpose(image_masked, (0, 2, 3, 1)) 
     mask_torch = np.transpose(mask_torch, (0, 2, 3, 1)) 
-             
+
     # Burst the HTP.
     PerfProfile.SetPerfProfileGlobal(PerfProfile.BURST)
 
     # Run the inference.
     output_image = aotgan.Inference([image_masked], [mask_torch])
-    
+
     # Reset the HTP.
     PerfProfile.RelPerfProfileGlobal()
-    
-    # show%save the result
-    output_image = torch.from_numpy(output_image)    
-    output_image = output_image.reshape(image_size, image_size, 3)  
-    output_image = torch.unsqueeze(output_image, 0)      
+
+    # show & save the result
+    output_image = torch.from_numpy(output_image)
+    output_image = output_image.reshape(IMAGE_SIZE, IMAGE_SIZE, 3)
+    output_image = torch.unsqueeze(output_image, 0)
     output_image = [torch_tensor_to_PIL_image(img) for img in output_image]
     image_buffer = output_image[0]
     image_buffer.save(output_image_path)
-    image_buffer.show()  
+    image_buffer.show()
+    image.show()
 
-    
 def Release():
     global aotgan
 
@@ -118,6 +129,7 @@ def Release():
 
 Init()
 
-Inference("input.png", "mask.png", "output.png")
+Inference(execution_ws + "\\input.png", execution_ws + "\\mask.png", execution_ws + "\\output.png")
 
 Release()
+
@@ -4,13 +4,15 @@
 # ---------------------------------------------------------------------
 
 from __future__ import annotations
-
+import sys
 import os
+sys.path.append(".")
+sys.path.append("..")
+import utils.install as install
 import numpy as np
 import math
 import torch
 import torchvision.transforms as transforms
-
 from typing import Callable, Dict, List, Tuple
 from PIL import Image
 from PIL.Image import fromarray as ImageFromArray
@@ -23,7 +25,27 @@
 
 from qai_appbuilder import (QNNContext, Runtime, LogLevel, ProfilingLevel, PerfProfile, QNNConfig)
 
+####################################################################
+
+MODEL_ID = "mn7x79pvq"
+MODEL_NAME = "fastsam_x"
+MODEL_HELP_URL = "https://github.com/quic/ai-engine-direct-helper/tree/main/samples/python/" + MODEL_NAME + "#" + MODEL_NAME + "-qnn-models"
+
+####################################################################
+
+execution_ws = os.getcwd()
+qnn_dir = execution_ws + "\\qai_libs"
+
+if not MODEL_NAME in execution_ws:
+    execution_ws = execution_ws + "\\" + MODEL_NAME
+
+model_dir = execution_ws + "\\models"
+madel_path = model_dir + "\\" + MODEL_NAME + ".bin"
+
+####################################################################
+
 fastsam = None
+
 confidence: float = 0.4,
 iou_threshold: float = 0.9,
 retina_masks: bool = True,
@@ -146,16 +168,27 @@ def Inference(self, input_data):
         input_datas=[input_data]
         output_data = super().Inference(input_datas)
         return output_data
-        
+
+def model_download():
+    ret = True
+
+    desc = f"Downloading {MODEL_NAME} model... "
+    fail = f"\nFailed to download {MODEL_NAME} model. Please prepare the model according to the steps in below link:\n{MODEL_HELP_URL}"
+    ret = install.download_qai_hubmodel(MODEL_ID, madel_path, desc=desc, fail=fail)
+
+    if not ret:
+        exit()
+
 def Init():
     global fastsam
 
+    model_download()
+
     # Config AppBuilder environment.
     QNNConfig.Config(os.getcwd() + "\\qai_libs", Runtime.HTP, LogLevel.WARN, ProfilingLevel.BASIC)
 
     # Instance for FastSam_x objects.
-    fastsam_model = "models\\fastsam_x.bin"
-    fastsam = FastSam("fastsam", fastsam_model)
+    fastsam = FastSam("fastsam", madel_path)
 
 def Inference(input_image_path, output_image_path): 
     global confidence, iou_threshold, retina_masks, model_image_input_shape
@@ -188,11 +221,11 @@ def Inference(input_image_path, output_image_path):
         torch.tensor(preds[4]).reshape(1, 105, 20, 20),
         torch.tensor(preds[5]).reshape(1, 37, 8400)
     ]
-    
+
     preds = tuple(
         (preds[5], tuple(([preds[2], preds[3], preds[4]], preds[1], preds[0])))
     )
-    
+
     p = ops.non_max_suppression(
         preds[0],
         0.4,
@@ -202,7 +235,7 @@ def Inference(input_image_path, output_image_path):
         nc=1,  # set to 1 class since SAM has no class predictions
         classes=None,
     )
-    
+
     full_box = torch.zeros(p[0].shape[1], device=p[0].device)
     full_box[2], full_box[3], full_box[4], full_box[6:] = (
         Img.shape[3],
@@ -266,9 +299,12 @@ def Inference(input_image_path, output_image_path):
     binary_mask = segmented_result[0].masks.data.squeeze().cpu().numpy().astype(np.uint8)
     binary_mask = binary_mask * 255
     mask_image = Image.fromarray(binary_mask)
-    mask_image.show()
+
+    #save and display the output_image
     mask_image.save(output_image_path)
-        
+    mask_image.show()
+
+
 def Release():
     global fastsam
 
@@ -278,6 +314,7 @@ def Release():
 
 Init()
 
-Inference("input.jpg", "output.jpg")
+Inference(execution_ws + "\\input.jpg", execution_ws + "\\output.jpg")
 
 Release()
+