Skip to content

Commit 0f08a64

Browse files
Added API to get multi model deployment config (#1055)
2 parents bc2e0b7 + 1e418db commit 0f08a64

File tree

7 files changed

+651
-23
lines changed

7 files changed

+651
-23
lines changed

ads/aqua/extension/deployment_handler.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
#!/usr/bin/env python
22
# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
33
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
4-
import logging
4+
5+
from typing import List, Union
56
from urllib.parse import urlparse
67

78
from tornado.web import HTTPError
@@ -20,7 +21,7 @@ class AquaDeploymentHandler(AquaAPIhandler):
2021
2122
Methods
2223
-------
23-
get(self, id="")
24+
get(self, id: Union[str, List[str]])
2425
Retrieves a list of AQUA deployments or model info or logs by ID.
2526
post(self, *args, **kwargs)
2627
Creates a new AQUA deployment.
@@ -37,7 +38,7 @@ class AquaDeploymentHandler(AquaAPIhandler):
3738
"""
3839

3940
@handle_exceptions
40-
def get(self, id=""):
41+
def get(self, id: Union[str, List[str]] = None):
4142
"""Handle GET request."""
4243
url_parse = urlparse(self.request.path)
4344
paths = url_parse.path.strip("/")
@@ -47,6 +48,16 @@ def get(self, id=""):
4748
400, f"The request {self.request.path} requires model id."
4849
)
4950
return self.get_deployment_config(id)
51+
elif paths.startswith("aqua/deployments/modelconfig"):
52+
if isinstance(id, list):
53+
return self.get_multimodel_compatible_shapes(id)
54+
elif isinstance(id, str):
55+
return self.get_deployment_config(id)
56+
else:
57+
raise HTTPError(
58+
400,
59+
f"The request {self.request.path} requires either a model id or a list of model ids.",
60+
)
5061
elif paths.startswith("aqua/deployments"):
5162
if not id:
5263
return self.list()
@@ -121,6 +132,15 @@ def get_deployment_config(self, model_id):
121132
"""Gets the deployment config for Aqua model."""
122133
return self.finish(AquaDeploymentApp().get_deployment_config(model_id=model_id))
123134

135+
def get_multimodel_compatible_shapes(self, model_ids: List[str]):
136+
"""Gets the multi model deployment config and optimal GPU allocations for Aqua models."""
137+
primary_model_id = self.get_argument("primary_model_id", default=None)
138+
return self.finish(
139+
AquaDeploymentApp().get_multimodel_compatible_shapes(
140+
model_ids=model_ids, primary_model_id=primary_model_id
141+
)
142+
)
143+
124144

125145
class AquaDeploymentInferenceHandler(AquaAPIhandler):
126146
@staticmethod
@@ -237,6 +257,7 @@ def post(self, *args, **kwargs): # noqa: ARG002
237257
__handlers__ = [
238258
("deployments/?([^/]*)/params", AquaDeploymentParamsHandler),
239259
("deployments/config/?([^/]*)", AquaDeploymentHandler),
260+
("deployments/modelconfig/?([^/]*)", AquaDeploymentHandler),
240261
("deployments/?([^/]*)", AquaDeploymentHandler),
241262
("deployments/?([^/]*)/activate", AquaDeploymentHandler),
242263
("deployments/?([^/]*)/deactivate", AquaDeploymentHandler),

ads/aqua/modeldeployment/deployment.py

Lines changed: 188 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
33
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
44

5+
import copy
56
import shlex
67
from typing import Dict, List, Union
78

@@ -41,9 +42,14 @@
4142
from ads.aqua.model import AquaModelApp
4243
from ads.aqua.modeldeployment.entities import (
4344
AquaDeployment,
45+
AquaDeploymentConfig,
4446
AquaDeploymentDetail,
4547
CreateModelDeploymentDetails,
48+
GPUModelAllocation,
49+
GPUShapeAllocation,
50+
ModelDeploymentConfigSummary,
4651
)
52+
from ads.aqua.modeldeployment.utils import get_combinations
4753
from ads.aqua.ui import ModelFormat
4854
from ads.common.object_storage_details import ObjectStorageDetails
4955
from ads.common.utils import get_log_links
@@ -621,6 +627,183 @@ def get_deployment_config(self, model_id: str) -> Dict:
621627
)
622628
return config
623629

630+
@telemetry(
631+
entry_point="plugin=deployment&action=get_multimodel_compatible_shapes",
632+
name="aqua",
633+
)
634+
def get_multimodel_compatible_shapes(
635+
self, model_ids: List[str], primary_model_id: str = None
636+
) -> ModelDeploymentConfigSummary:
637+
"""Gets the deployment config of multiple Aqua models and calculate the gpu allocations for all compatible shapes.
638+
If no primary Aqua model id provided, gpu count for each compatible shape will be evenly allocated.
639+
If provided, gpu count for each compatible shape will be prioritized for primary model.
640+
641+
For example, there is one compatible shape "BM.GPU.H100.8" for three models A, B, C, and each model has a gpu count as below:
642+
643+
A - BM.GPU.H100.8 - 1, 2, 4, 8
644+
B - BM.GPU.H100.8 - 1, 2, 4, 8
645+
C - BM.GPU.H100.8 - 1, 2, 4, 8
646+
647+
If no primary model is provided, the gpu allocation for A, B, C could be [2, 4, 2], [2, 2, 4] or [4, 2, 2]
648+
If B is the primary model, the gpu allocation is [2, 4, 2] as B always gets the maximum gpu count.
649+
650+
Parameters
651+
----------
652+
model_ids: List[str]
653+
A list of OCID of the Aqua model.
654+
primary_model_id: str
655+
The OCID of the primary Aqua model
656+
657+
Returns
658+
-------
659+
ModelDeploymentConfigSummary:
660+
An instance of ModelDeploymentConfigSummary.
661+
"""
662+
deployment = {}
663+
model_shape_gpu = {}
664+
for model_id in model_ids:
665+
deployment_config = AquaDeploymentConfig(
666+
**self.get_deployment_config(model_id=model_id)
667+
)
668+
model_shape_gpu[model_id] = {
669+
shape: [
670+
item.gpu_count
671+
for item in deployment_config.configuration[
672+
shape
673+
].multi_model_deployment
674+
]
675+
for shape in deployment_config.shape
676+
}
677+
678+
deployment.update(
679+
{
680+
model_id: {
681+
"shape": deployment_config.shape,
682+
"configuration": {
683+
shape: deployment_config.configuration[shape]
684+
for shape in deployment_config.shape
685+
},
686+
}
687+
}
688+
)
689+
690+
common_shapes = []
691+
for shape_gpu in model_shape_gpu.values():
692+
if not common_shapes:
693+
common_shapes = list(shape_gpu.keys())
694+
else:
695+
common_shapes = [
696+
shape for shape in common_shapes if shape in list(shape_gpu.keys())
697+
]
698+
699+
if not common_shapes:
700+
raise AquaValueError(
701+
"There are no available shapes for models selected at this moment, please select different model to deploy."
702+
)
703+
704+
gpu_allocation = {}
705+
for common_shape in common_shapes:
706+
model_gpu = {
707+
model: shape_gpu[common_shape]
708+
for model, shape_gpu in model_shape_gpu.items()
709+
}
710+
is_compatible, maximum_gpu_count, combination = self._verify_compatibility(
711+
model_gpu, primary_model_id
712+
)
713+
if is_compatible:
714+
gpu_allocation[common_shape] = GPUShapeAllocation(
715+
models=combination, total_gpus_available=maximum_gpu_count
716+
)
717+
718+
if not gpu_allocation:
719+
raise AquaValueError(
720+
"There are no available gpu allocations for models selected at this moment, please select different model to deploy."
721+
)
722+
723+
return ModelDeploymentConfigSummary(
724+
deployment_config=deployment, gpu_allocation=gpu_allocation
725+
)
726+
727+
@staticmethod
728+
def _verify_compatibility(
729+
model_gpu_dict: Dict, primary_model_id: str = None
730+
) -> tuple:
731+
"""Calculates the gpu allocations for all compatible shapes.
732+
If no primary Aqua model id provided, gpu count for each compatible shape will be evenly allocated.
733+
If provided, gpu count for each compatible shape will be prioritized for primary model.
734+
735+
For example, there is one compatible shape "BM.GPU.H100.8" for three models A, B, C, and each model has a gpu count as below:
736+
737+
A - BM.GPU.H100.8 - 1, 2, 4, 8
738+
B - BM.GPU.H100.8 - 1, 2, 4, 8
739+
C - BM.GPU.H100.8 - 1, 2, 4, 8
740+
741+
If no primary model is provided, the gpu allocation for A, B, C could be [2, 4, 2], [2, 2, 4] or [4, 2, 2]
742+
If B is the primary model, the gpu allocation is [2, 4, 2] as B always gets the maximum gpu count.
743+
744+
Parameters
745+
----------
746+
model_gpu_dict: Dict
747+
A dict of Aqua model and its gpu counts.
748+
primary_model_id: str
749+
The OCID of the primary Aqua model
750+
751+
Returns
752+
-------
753+
tuple:
754+
A tuple of gpu count allocation result.
755+
"""
756+
maximum_gpu_count = max([sorted(gpus)[-1] for gpus in model_gpu_dict.values()])
757+
model_gpu_dict_copy = copy.deepcopy(model_gpu_dict)
758+
if primary_model_id:
759+
primary_model_gpu_list = sorted(model_gpu_dict_copy.pop(primary_model_id))
760+
for gpu_count in reversed(primary_model_gpu_list):
761+
combinations = get_combinations(model_gpu_dict_copy)
762+
for combination in combinations:
763+
if (
764+
len(combination) == len(model_gpu_dict_copy)
765+
and sum(combination.values()) == maximum_gpu_count - gpu_count
766+
):
767+
combination[primary_model_id] = gpu_count
768+
return (
769+
True,
770+
maximum_gpu_count,
771+
[
772+
GPUModelAllocation(ocid=ocid, gpu_count=gpu_count)
773+
for ocid, gpu_count in combination.items()
774+
],
775+
)
776+
777+
else:
778+
combinations = get_combinations(model_gpu_dict_copy)
779+
minimal_difference = float("inf") # gets the positive infinity
780+
optimal_combination = []
781+
for combination in combinations:
782+
if (
783+
len(combination) == len(model_gpu_dict_copy)
784+
and sum(combination.values()) == maximum_gpu_count
785+
):
786+
difference = max(combination.values()) - min(combination.values())
787+
if difference < minimal_difference:
788+
minimal_difference = difference
789+
optimal_combination = combination
790+
791+
# find the optimal combination, no need to continue
792+
if minimal_difference == 0:
793+
break
794+
795+
if optimal_combination:
796+
return (
797+
True,
798+
maximum_gpu_count,
799+
[
800+
GPUModelAllocation(ocid=ocid, gpu_count=gpu_count)
801+
for ocid, gpu_count in optimal_combination.items()
802+
],
803+
)
804+
805+
return (False, 0, [])
806+
624807
def get_deployment_default_params(
625808
self,
626809
model_id: str,
@@ -671,9 +854,8 @@ def get_deployment_default_params(
671854
).get(instance_shape, UNKNOWN_DICT)
672855

673856
if "multi_model_deployment" in instance_shape_config and gpu_count:
674-
gpu_params = (
675-
instance_shape_config
676-
.get("multi_model_deployment", UNKNOWN_DICT)
857+
gpu_params = instance_shape_config.get(
858+
"multi_model_deployment", UNKNOWN_DICT
677859
)
678860

679861
for gpu_config in gpu_params:
@@ -684,11 +866,9 @@ def get_deployment_default_params(
684866
break
685867

686868
else:
687-
config_params = (
688-
instance_shape_config
689-
.get("parameters", UNKNOWN_DICT)
690-
.get(get_container_params_type(container_type_key), UNKNOWN)
691-
)
869+
config_params = instance_shape_config.get(
870+
"parameters", UNKNOWN_DICT
871+
).get(get_container_params_type(container_type_key), UNKNOWN)
692872

693873
if config_params:
694874
params_list = get_params_list(config_params)

0 commit comments

Comments
 (0)