Skip to content

Commit e533c14

Browse files
authored
Optimize Multi-Model Configuration Retrieval Using Parallel Execution (#1062)
2 parents 0f08a64 + fbd77d5 commit e533c14

File tree

5 files changed

+303
-213
lines changed

5 files changed

+303
-213
lines changed

ads/aqua/extension/deployment_handler.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def get(self, id: Union[str, List[str]] = None):
5050
return self.get_deployment_config(id)
5151
elif paths.startswith("aqua/deployments/modelconfig"):
5252
if isinstance(id, list):
53-
return self.get_multimodel_compatible_shapes(id)
53+
return self.get_multimodel_deployment_config(id)
5454
elif isinstance(id, str):
5555
return self.get_deployment_config(id)
5656
else:
@@ -132,11 +132,11 @@ def get_deployment_config(self, model_id):
132132
"""Gets the deployment config for Aqua model."""
133133
return self.finish(AquaDeploymentApp().get_deployment_config(model_id=model_id))
134134

135-
def get_multimodel_compatible_shapes(self, model_ids: List[str]):
135+
def get_multimodel_deployment_config(self, model_ids: List[str]):
136136
"""Gets the multi model deployment config and optimal GPU allocations for Aqua models."""
137137
primary_model_id = self.get_argument("primary_model_id", default=None)
138138
return self.finish(
139-
AquaDeploymentApp().get_multimodel_compatible_shapes(
139+
AquaDeploymentApp().get_multimodel_deployment_config(
140140
model_ids=model_ids, primary_model_id=primary_model_id
141141
)
142142
)

ads/aqua/modeldeployment/deployment.py

Lines changed: 16 additions & 161 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,14 @@
22
# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
33
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
44

5-
import copy
65
import shlex
7-
from typing import Dict, List, Union
6+
from typing import Dict, List, Optional, Union
87

98
from pydantic import ValidationError
109

1110
from ads.aqua.app import AquaApp, logger
1211
from ads.aqua.common.entities import ContainerSpec
13-
from ads.aqua.common.enums import (
14-
InferenceContainerTypeFamily,
15-
Tags,
16-
)
12+
from ads.aqua.common.enums import InferenceContainerTypeFamily, Tags
1713
from ads.aqua.common.errors import AquaRuntimeError, AquaValueError
1814
from ads.aqua.common.utils import (
1915
build_pydantic_error_message,
@@ -42,14 +38,11 @@
4238
from ads.aqua.model import AquaModelApp
4339
from ads.aqua.modeldeployment.entities import (
4440
AquaDeployment,
45-
AquaDeploymentConfig,
4641
AquaDeploymentDetail,
4742
CreateModelDeploymentDetails,
48-
GPUModelAllocation,
49-
GPUShapeAllocation,
5043
ModelDeploymentConfigSummary,
5144
)
52-
from ads.aqua.modeldeployment.utils import get_combinations
45+
from ads.aqua.modeldeployment.utils import MultiModelDeploymentConfigLoader
5346
from ads.aqua.ui import ModelFormat
5447
from ads.common.object_storage_details import ObjectStorageDetails
5548
from ads.common.utils import get_log_links
@@ -628,107 +621,16 @@ def get_deployment_config(self, model_id: str) -> Dict:
628621
return config
629622

630623
@telemetry(
631-
entry_point="plugin=deployment&action=get_multimodel_compatible_shapes",
624+
entry_point="plugin=deployment&action=get_multimodel_deployment_config",
632625
name="aqua",
633626
)
634-
def get_multimodel_compatible_shapes(
635-
self, model_ids: List[str], primary_model_id: str = None
627+
def get_multimodel_deployment_config(
628+
self, model_ids: List[str], primary_model_id: Optional[str] = None
636629
) -> ModelDeploymentConfigSummary:
637-
"""Gets the deployment config of multiple Aqua models and calculate the gpu allocations for all compatible shapes.
638-
If no primary Aqua model id provided, gpu count for each compatible shape will be evenly allocated.
639-
If provided, gpu count for each compatible shape will be prioritized for primary model.
640-
641-
For example, there is one compatible shape "BM.GPU.H100.8" for three models A, B, C, and each model has a gpu count as below:
642-
643-
A - BM.GPU.H100.8 - 1, 2, 4, 8
644-
B - BM.GPU.H100.8 - 1, 2, 4, 8
645-
C - BM.GPU.H100.8 - 1, 2, 4, 8
646-
647-
If no primary model is provided, the gpu allocation for A, B, C could be [2, 4, 2], [2, 2, 4] or [4, 2, 2]
648-
If B is the primary model, the gpu allocation is [2, 4, 2] as B always gets the maximum gpu count.
649-
650-
Parameters
651-
----------
652-
model_ids: List[str]
653-
A list of OCID of the Aqua model.
654-
primary_model_id: str
655-
The OCID of the primary Aqua model
656-
657-
Returns
658-
-------
659-
ModelDeploymentConfigSummary:
660-
An instance of ModelDeploymentConfigSummary.
661630
"""
662-
deployment = {}
663-
model_shape_gpu = {}
664-
for model_id in model_ids:
665-
deployment_config = AquaDeploymentConfig(
666-
**self.get_deployment_config(model_id=model_id)
667-
)
668-
model_shape_gpu[model_id] = {
669-
shape: [
670-
item.gpu_count
671-
for item in deployment_config.configuration[
672-
shape
673-
].multi_model_deployment
674-
]
675-
for shape in deployment_config.shape
676-
}
677-
678-
deployment.update(
679-
{
680-
model_id: {
681-
"shape": deployment_config.shape,
682-
"configuration": {
683-
shape: deployment_config.configuration[shape]
684-
for shape in deployment_config.shape
685-
},
686-
}
687-
}
688-
)
689-
690-
common_shapes = []
691-
for shape_gpu in model_shape_gpu.values():
692-
if not common_shapes:
693-
common_shapes = list(shape_gpu.keys())
694-
else:
695-
common_shapes = [
696-
shape for shape in common_shapes if shape in list(shape_gpu.keys())
697-
]
698-
699-
if not common_shapes:
700-
raise AquaValueError(
701-
"There are no available shapes for models selected at this moment, please select different model to deploy."
702-
)
631+
Retrieves the deployment configuration for multiple Aqua models and calculates
632+
the GPU allocations for all compatible shapes.
703633
704-
gpu_allocation = {}
705-
for common_shape in common_shapes:
706-
model_gpu = {
707-
model: shape_gpu[common_shape]
708-
for model, shape_gpu in model_shape_gpu.items()
709-
}
710-
is_compatible, maximum_gpu_count, combination = self._verify_compatibility(
711-
model_gpu, primary_model_id
712-
)
713-
if is_compatible:
714-
gpu_allocation[common_shape] = GPUShapeAllocation(
715-
models=combination, total_gpus_available=maximum_gpu_count
716-
)
717-
718-
if not gpu_allocation:
719-
raise AquaValueError(
720-
"There are no available gpu allocations for models selected at this moment, please select different model to deploy."
721-
)
722-
723-
return ModelDeploymentConfigSummary(
724-
deployment_config=deployment, gpu_allocation=gpu_allocation
725-
)
726-
727-
@staticmethod
728-
def _verify_compatibility(
729-
model_gpu_dict: Dict, primary_model_id: str = None
730-
) -> tuple:
731-
"""Calculates the gpu allocations for all compatible shapes.
732634
If no primary Aqua model id provided, gpu count for each compatible shape will be evenly allocated.
733635
If provided, gpu count for each compatible shape will be prioritized for primary model.
734636
@@ -743,66 +645,19 @@ def _verify_compatibility(
743645
744646
Parameters
745647
----------
746-
model_gpu_dict: Dict
747-
A dict of Aqua model and its gpu counts.
748-
primary_model_id: str
749-
The OCID of the primary Aqua model
648+
model_ids : List[str]
649+
A list of OCIDs for the Aqua models.
650+
primary_model_id : Optional[str]
651+
The OCID of the primary Aqua model. If provided, GPU allocation will prioritize
652+
this model. Otherwise, GPUs will be evenly allocated.
750653
751654
Returns
752655
-------
753-
tuple:
754-
A tuple of gpu count allocation result.
656+
ModelDeploymentConfigSummary
657+
A summary of the model deployment configurations and GPU allocations.
755658
"""
756-
maximum_gpu_count = max([sorted(gpus)[-1] for gpus in model_gpu_dict.values()])
757-
model_gpu_dict_copy = copy.deepcopy(model_gpu_dict)
758-
if primary_model_id:
759-
primary_model_gpu_list = sorted(model_gpu_dict_copy.pop(primary_model_id))
760-
for gpu_count in reversed(primary_model_gpu_list):
761-
combinations = get_combinations(model_gpu_dict_copy)
762-
for combination in combinations:
763-
if (
764-
len(combination) == len(model_gpu_dict_copy)
765-
and sum(combination.values()) == maximum_gpu_count - gpu_count
766-
):
767-
combination[primary_model_id] = gpu_count
768-
return (
769-
True,
770-
maximum_gpu_count,
771-
[
772-
GPUModelAllocation(ocid=ocid, gpu_count=gpu_count)
773-
for ocid, gpu_count in combination.items()
774-
],
775-
)
776-
777-
else:
778-
combinations = get_combinations(model_gpu_dict_copy)
779-
minimal_difference = float("inf") # gets the positive infinity
780-
optimal_combination = []
781-
for combination in combinations:
782-
if (
783-
len(combination) == len(model_gpu_dict_copy)
784-
and sum(combination.values()) == maximum_gpu_count
785-
):
786-
difference = max(combination.values()) - min(combination.values())
787-
if difference < minimal_difference:
788-
minimal_difference = difference
789-
optimal_combination = combination
790-
791-
# find the optimal combination, no need to continue
792-
if minimal_difference == 0:
793-
break
794-
795-
if optimal_combination:
796-
return (
797-
True,
798-
maximum_gpu_count,
799-
[
800-
GPUModelAllocation(ocid=ocid, gpu_count=gpu_count)
801-
for ocid, gpu_count in optimal_combination.items()
802-
],
803-
)
804659

805-
return (False, 0, [])
660+
return MultiModelDeploymentConfigLoader(self).load(model_ids, primary_model_id)
806661

807662
def get_deployment_default_params(
808663
self,

0 commit comments

Comments
 (0)