oracle · lu-ohai · Feb 7, 2025 · Feb 7, 2025
@@ -50,7 +50,7 @@ def get(self, id: Union[str, List[str]] = None):
             return self.get_deployment_config(id)
         elif paths.startswith("aqua/deployments/modelconfig"):
             if isinstance(id, list):
-                return self.get_multimodel_compatible_shapes(id)
+                return self.get_multimodel_deployment_config(id)
             elif isinstance(id, str):
                 return self.get_deployment_config(id)
             else:
@@ -132,11 +132,11 @@ def get_deployment_config(self, model_id):
         """Gets the deployment config for Aqua model."""
         return self.finish(AquaDeploymentApp().get_deployment_config(model_id=model_id))
 
-    def get_multimodel_compatible_shapes(self, model_ids: List[str]):
+    def get_multimodel_deployment_config(self, model_ids: List[str]):
         """Gets the multi model deployment config and optimal GPU allocations for Aqua models."""
         primary_model_id = self.get_argument("primary_model_id", default=None)
         return self.finish(
-            AquaDeploymentApp().get_multimodel_compatible_shapes(
+            AquaDeploymentApp().get_multimodel_deployment_config(
                 model_ids=model_ids, primary_model_id=primary_model_id
             )
         )

@@ -2,18 +2,14 @@
 # Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
-import copy
 import shlex
-from typing import Dict, List, Union
+from typing import Dict, List, Optional, Union
 
 from pydantic import ValidationError
 
 from ads.aqua.app import AquaApp, logger
 from ads.aqua.common.entities import ContainerSpec
-from ads.aqua.common.enums import (
-    InferenceContainerTypeFamily,
-    Tags,
-)
+from ads.aqua.common.enums import InferenceContainerTypeFamily, Tags
 from ads.aqua.common.errors import AquaRuntimeError, AquaValueError
 from ads.aqua.common.utils import (
     build_pydantic_error_message,
@@ -42,14 +38,11 @@
 from ads.aqua.model import AquaModelApp
 from ads.aqua.modeldeployment.entities import (
     AquaDeployment,
-    AquaDeploymentConfig,
     AquaDeploymentDetail,
     CreateModelDeploymentDetails,
-    GPUModelAllocation,
-    GPUShapeAllocation,
     ModelDeploymentConfigSummary,
 )
-from ads.aqua.modeldeployment.utils import get_combinations
+from ads.aqua.modeldeployment.utils import MultiModelDeploymentConfigLoader
 from ads.aqua.ui import ModelFormat
 from ads.common.object_storage_details import ObjectStorageDetails
 from ads.common.utils import get_log_links
@@ -628,107 +621,16 @@ def get_deployment_config(self, model_id: str) -> Dict:
         return config
 
     @telemetry(
-        entry_point="plugin=deployment&action=get_multimodel_compatible_shapes",
+        entry_point="plugin=deployment&action=get_multimodel_deployment_config",
         name="aqua",
     )
-    def get_multimodel_compatible_shapes(
-        self, model_ids: List[str], primary_model_id: str = None
+    def get_multimodel_deployment_config(
+        self, model_ids: List[str], primary_model_id: Optional[str] = None
     ) -> ModelDeploymentConfigSummary:
-        """Gets the deployment config of multiple Aqua models and calculate the gpu allocations for all compatible shapes.
-        If no primary Aqua model id provided, gpu count for each compatible shape will be evenly allocated.
-        If provided, gpu count for each compatible shape will be prioritized for primary model.
-
-        For example, there is one compatible shape "BM.GPU.H100.8" for three models A, B, C, and each model has a gpu count as below:
-
-        A - BM.GPU.H100.8 - 1, 2, 4, 8
-        B - BM.GPU.H100.8 - 1, 2, 4, 8
-        C - BM.GPU.H100.8 - 1, 2, 4, 8
-
-        If no primary model is provided, the gpu allocation for A, B, C could be [2, 4, 2], [2, 2, 4] or [4, 2, 2]
-        If B is the primary model, the gpu allocation is [2, 4, 2] as B always gets the maximum gpu count.
-
-        Parameters
-        ----------
-        model_ids: List[str]
-            A list of OCID of the Aqua model.
-        primary_model_id: str
-            The OCID of the primary Aqua model
-
-        Returns
-        -------
-        ModelDeploymentConfigSummary:
-            An instance of ModelDeploymentConfigSummary.
         """
-        deployment = {}
-        model_shape_gpu = {}
-        for model_id in model_ids:
-            deployment_config = AquaDeploymentConfig(
-                **self.get_deployment_config(model_id=model_id)
-            )
-            model_shape_gpu[model_id] = {
-                shape: [
-                    item.gpu_count
-                    for item in deployment_config.configuration[
-                        shape
-                    ].multi_model_deployment
-                ]
-                for shape in deployment_config.shape
-            }
-
-            deployment.update(
-                {
-                    model_id: {
-                        "shape": deployment_config.shape,
-                        "configuration": {
-                            shape: deployment_config.configuration[shape]
-                            for shape in deployment_config.shape
-                        },
-                    }
-                }
-            )
-
-        common_shapes = []
-        for shape_gpu in model_shape_gpu.values():
-            if not common_shapes:
-                common_shapes = list(shape_gpu.keys())
-            else:
-                common_shapes = [
-                    shape for shape in common_shapes if shape in list(shape_gpu.keys())
-                ]
-
-        if not common_shapes:
-            raise AquaValueError(
-                "There are no available shapes for models selected at this moment, please select different model to deploy."
-            )
+        Retrieves the deployment configuration for multiple Aqua models and calculates
+        the GPU allocations for all compatible shapes.
 
-        gpu_allocation = {}
-        for common_shape in common_shapes:
-            model_gpu = {
-                model: shape_gpu[common_shape]
-                for model, shape_gpu in model_shape_gpu.items()
-            }
-            is_compatible, maximum_gpu_count, combination = self._verify_compatibility(
-                model_gpu, primary_model_id
-            )
-            if is_compatible:
-                gpu_allocation[common_shape] = GPUShapeAllocation(
-                    models=combination, total_gpus_available=maximum_gpu_count
-                )
-
-        if not gpu_allocation:
-            raise AquaValueError(
-                "There are no available gpu allocations for models selected at this moment, please select different model to deploy."
-            )
-
-        return ModelDeploymentConfigSummary(
-            deployment_config=deployment, gpu_allocation=gpu_allocation
-        )
-
-    @staticmethod
-    def _verify_compatibility(
-        model_gpu_dict: Dict, primary_model_id: str = None
-    ) -> tuple:
-        """Calculates the gpu allocations for all compatible shapes.
         If no primary Aqua model id provided, gpu count for each compatible shape will be evenly allocated.
         If provided, gpu count for each compatible shape will be prioritized for primary model.
 
@@ -743,66 +645,19 @@ def _verify_compatibility(
 
         Parameters
         ----------
-        model_gpu_dict: Dict
-            A dict of Aqua model and its gpu counts.
-        primary_model_id: str
-            The OCID of the primary Aqua model
+        model_ids : List[str]
+            A list of OCIDs for the Aqua models.
+        primary_model_id : Optional[str]
+            The OCID of the primary Aqua model. If provided, GPU allocation will prioritize
+            this model. Otherwise, GPUs will be evenly allocated.
 
         Returns
         -------
-        tuple:
-            A tuple of gpu count allocation result.
+        ModelDeploymentConfigSummary
+            A summary of the model deployment configurations and GPU allocations.
         """
-        maximum_gpu_count = max([sorted(gpus)[-1] for gpus in model_gpu_dict.values()])
-        model_gpu_dict_copy = copy.deepcopy(model_gpu_dict)
-        if primary_model_id:
-            primary_model_gpu_list = sorted(model_gpu_dict_copy.pop(primary_model_id))
-            for gpu_count in reversed(primary_model_gpu_list):
-                combinations = get_combinations(model_gpu_dict_copy)
-                for combination in combinations:
-                    if (
-                        len(combination) == len(model_gpu_dict_copy)
-                        and sum(combination.values()) == maximum_gpu_count - gpu_count
-                    ):
-                        combination[primary_model_id] = gpu_count
-                        return (
-                            True,
-                            maximum_gpu_count,
-                            [
-                                GPUModelAllocation(ocid=ocid, gpu_count=gpu_count)
-                                for ocid, gpu_count in combination.items()
-                            ],
-                        )
-
-        else:
-            combinations = get_combinations(model_gpu_dict_copy)
-            minimal_difference = float("inf")  # gets the positive infinity
-            optimal_combination = []
-            for combination in combinations:
-                if (
-                    len(combination) == len(model_gpu_dict_copy)
-                    and sum(combination.values()) == maximum_gpu_count
-                ):
-                    difference = max(combination.values()) - min(combination.values())
-                    if difference < minimal_difference:
-                        minimal_difference = difference
-                        optimal_combination = combination
-
-                        # find the optimal combination, no need to continue
-                        if minimal_difference == 0:
-                            break
-
-            if optimal_combination:
-                return (
-                    True,
-                    maximum_gpu_count,
-                    [
-                        GPUModelAllocation(ocid=ocid, gpu_count=gpu_count)
-                        for ocid, gpu_count in optimal_combination.items()
-                    ],
-                )
 
-        return (False, 0, [])
+        return MultiModelDeploymentConfigLoader(self).load(model_ids, primary_model_id)
 
     def get_deployment_default_params(
         self,