Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize Multi-Model Configuration Retrieval Using Parallel Execution #1062

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions ads/aqua/extension/deployment_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def get(self, id: Union[str, List[str]] = None):
return self.get_deployment_config(id)
elif paths.startswith("aqua/deployments/modelconfig"):
if isinstance(id, list):
return self.get_multimodel_compatible_shapes(id)
return self.get_multimodel_deployment_config(id)
elif isinstance(id, str):
return self.get_deployment_config(id)
else:
Expand Down Expand Up @@ -132,11 +132,11 @@ def get_deployment_config(self, model_id):
"""Gets the deployment config for Aqua model."""
return self.finish(AquaDeploymentApp().get_deployment_config(model_id=model_id))

def get_multimodel_compatible_shapes(self, model_ids: List[str]):
def get_multimodel_deployment_config(self, model_ids: List[str]):
"""Gets the multi model deployment config and optimal GPU allocations for Aqua models."""
primary_model_id = self.get_argument("primary_model_id", default=None)
return self.finish(
AquaDeploymentApp().get_multimodel_compatible_shapes(
AquaDeploymentApp().get_multimodel_deployment_config(
model_ids=model_ids, primary_model_id=primary_model_id
)
)
Expand Down
177 changes: 16 additions & 161 deletions ads/aqua/modeldeployment/deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,14 @@
# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

import copy
import shlex
from typing import Dict, List, Union
from typing import Dict, List, Optional, Union

from pydantic import ValidationError

from ads.aqua.app import AquaApp, logger
from ads.aqua.common.entities import ContainerSpec
from ads.aqua.common.enums import (
InferenceContainerTypeFamily,
Tags,
)
from ads.aqua.common.enums import InferenceContainerTypeFamily, Tags
from ads.aqua.common.errors import AquaRuntimeError, AquaValueError
from ads.aqua.common.utils import (
build_pydantic_error_message,
Expand Down Expand Up @@ -42,14 +38,11 @@
from ads.aqua.model import AquaModelApp
from ads.aqua.modeldeployment.entities import (
AquaDeployment,
AquaDeploymentConfig,
AquaDeploymentDetail,
CreateModelDeploymentDetails,
GPUModelAllocation,
GPUShapeAllocation,
ModelDeploymentConfigSummary,
)
from ads.aqua.modeldeployment.utils import get_combinations
from ads.aqua.modeldeployment.utils import MultiModelDeploymentConfigLoader
from ads.aqua.ui import ModelFormat
from ads.common.object_storage_details import ObjectStorageDetails
from ads.common.utils import get_log_links
Expand Down Expand Up @@ -628,107 +621,16 @@ def get_deployment_config(self, model_id: str) -> Dict:
return config

@telemetry(
entry_point="plugin=deployment&action=get_multimodel_compatible_shapes",
entry_point="plugin=deployment&action=get_multimodel_deployment_config",
name="aqua",
)
def get_multimodel_compatible_shapes(
self, model_ids: List[str], primary_model_id: str = None
def get_multimodel_deployment_config(
self, model_ids: List[str], primary_model_id: Optional[str] = None
) -> ModelDeploymentConfigSummary:
"""Gets the deployment config of multiple Aqua models and calculate the gpu allocations for all compatible shapes.
If no primary Aqua model id provided, gpu count for each compatible shape will be evenly allocated.
If provided, gpu count for each compatible shape will be prioritized for primary model.

For example, there is one compatible shape "BM.GPU.H100.8" for three models A, B, C, and each model has a gpu count as below:

A - BM.GPU.H100.8 - 1, 2, 4, 8
B - BM.GPU.H100.8 - 1, 2, 4, 8
C - BM.GPU.H100.8 - 1, 2, 4, 8

If no primary model is provided, the gpu allocation for A, B, C could be [2, 4, 2], [2, 2, 4] or [4, 2, 2]
If B is the primary model, the gpu allocation is [2, 4, 2] as B always gets the maximum gpu count.

Parameters
----------
model_ids: List[str]
A list of OCID of the Aqua model.
primary_model_id: str
The OCID of the primary Aqua model

Returns
-------
ModelDeploymentConfigSummary:
An instance of ModelDeploymentConfigSummary.
"""
deployment = {}
model_shape_gpu = {}
for model_id in model_ids:
deployment_config = AquaDeploymentConfig(
**self.get_deployment_config(model_id=model_id)
)
model_shape_gpu[model_id] = {
shape: [
item.gpu_count
for item in deployment_config.configuration[
shape
].multi_model_deployment
]
for shape in deployment_config.shape
}

deployment.update(
{
model_id: {
"shape": deployment_config.shape,
"configuration": {
shape: deployment_config.configuration[shape]
for shape in deployment_config.shape
},
}
}
)

common_shapes = []
for shape_gpu in model_shape_gpu.values():
if not common_shapes:
common_shapes = list(shape_gpu.keys())
else:
common_shapes = [
shape for shape in common_shapes if shape in list(shape_gpu.keys())
]

if not common_shapes:
raise AquaValueError(
"There are no available shapes for models selected at this moment, please select different model to deploy."
)
Retrieves the deployment configuration for multiple Aqua models and calculates
the GPU allocations for all compatible shapes.

gpu_allocation = {}
for common_shape in common_shapes:
model_gpu = {
model: shape_gpu[common_shape]
for model, shape_gpu in model_shape_gpu.items()
}
is_compatible, maximum_gpu_count, combination = self._verify_compatibility(
model_gpu, primary_model_id
)
if is_compatible:
gpu_allocation[common_shape] = GPUShapeAllocation(
models=combination, total_gpus_available=maximum_gpu_count
)

if not gpu_allocation:
raise AquaValueError(
"There are no available gpu allocations for models selected at this moment, please select different model to deploy."
)

return ModelDeploymentConfigSummary(
deployment_config=deployment, gpu_allocation=gpu_allocation
)

@staticmethod
def _verify_compatibility(
model_gpu_dict: Dict, primary_model_id: str = None
) -> tuple:
"""Calculates the gpu allocations for all compatible shapes.
If no primary Aqua model id provided, gpu count for each compatible shape will be evenly allocated.
If provided, gpu count for each compatible shape will be prioritized for primary model.

Expand All @@ -743,66 +645,19 @@ def _verify_compatibility(

Parameters
----------
model_gpu_dict: Dict
A dict of Aqua model and its gpu counts.
primary_model_id: str
The OCID of the primary Aqua model
model_ids : List[str]
A list of OCIDs for the Aqua models.
primary_model_id : Optional[str]
The OCID of the primary Aqua model. If provided, GPU allocation will prioritize
this model. Otherwise, GPUs will be evenly allocated.

Returns
-------
tuple:
A tuple of gpu count allocation result.
ModelDeploymentConfigSummary
A summary of the model deployment configurations and GPU allocations.
"""
maximum_gpu_count = max([sorted(gpus)[-1] for gpus in model_gpu_dict.values()])
model_gpu_dict_copy = copy.deepcopy(model_gpu_dict)
if primary_model_id:
primary_model_gpu_list = sorted(model_gpu_dict_copy.pop(primary_model_id))
for gpu_count in reversed(primary_model_gpu_list):
combinations = get_combinations(model_gpu_dict_copy)
for combination in combinations:
if (
len(combination) == len(model_gpu_dict_copy)
and sum(combination.values()) == maximum_gpu_count - gpu_count
):
combination[primary_model_id] = gpu_count
return (
True,
maximum_gpu_count,
[
GPUModelAllocation(ocid=ocid, gpu_count=gpu_count)
for ocid, gpu_count in combination.items()
],
)

else:
combinations = get_combinations(model_gpu_dict_copy)
minimal_difference = float("inf") # gets the positive infinity
optimal_combination = []
for combination in combinations:
if (
len(combination) == len(model_gpu_dict_copy)
and sum(combination.values()) == maximum_gpu_count
):
difference = max(combination.values()) - min(combination.values())
if difference < minimal_difference:
minimal_difference = difference
optimal_combination = combination

# find the optimal combination, no need to continue
if minimal_difference == 0:
break

if optimal_combination:
return (
True,
maximum_gpu_count,
[
GPUModelAllocation(ocid=ocid, gpu_count=gpu_count)
for ocid, gpu_count in optimal_combination.items()
],
)

return (False, 0, [])
return MultiModelDeploymentConfigLoader(self).load(model_ids, primary_model_id)

def get_deployment_default_params(
self,
Expand Down
Loading