2
2
# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
3
3
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
4
4
5
- import copy
6
5
import shlex
7
- from typing import Dict , List , Union
6
+ from typing import Dict , List , Optional , Union
8
7
9
8
from pydantic import ValidationError
10
9
11
10
from ads .aqua .app import AquaApp , logger
12
11
from ads .aqua .common .entities import ContainerSpec
13
- from ads .aqua .common .enums import (
14
- InferenceContainerTypeFamily ,
15
- Tags ,
16
- )
12
+ from ads .aqua .common .enums import InferenceContainerTypeFamily , Tags
17
13
from ads .aqua .common .errors import AquaRuntimeError , AquaValueError
18
14
from ads .aqua .common .utils import (
19
15
build_pydantic_error_message ,
42
38
from ads .aqua .model import AquaModelApp
43
39
from ads .aqua .modeldeployment .entities import (
44
40
AquaDeployment ,
45
- AquaDeploymentConfig ,
46
41
AquaDeploymentDetail ,
47
42
CreateModelDeploymentDetails ,
48
- GPUModelAllocation ,
49
- GPUShapeAllocation ,
50
43
ModelDeploymentConfigSummary ,
51
44
)
52
- from ads .aqua .modeldeployment .utils import get_combinations
45
+ from ads .aqua .modeldeployment .utils import MultiModelDeploymentConfigLoader
53
46
from ads .aqua .ui import ModelFormat
54
47
from ads .common .object_storage_details import ObjectStorageDetails
55
48
from ads .common .utils import get_log_links
@@ -628,107 +621,16 @@ def get_deployment_config(self, model_id: str) -> Dict:
628
621
return config
629
622
630
623
@telemetry (
631
- entry_point = "plugin=deployment&action=get_multimodel_compatible_shapes " ,
624
+ entry_point = "plugin=deployment&action=get_multimodel_deployment_config " ,
632
625
name = "aqua" ,
633
626
)
634
- def get_multimodel_compatible_shapes (
635
- self , model_ids : List [str ], primary_model_id : str = None
627
+ def get_multimodel_deployment_config (
628
+ self , model_ids : List [str ], primary_model_id : Optional [ str ] = None
636
629
) -> ModelDeploymentConfigSummary :
637
- """Gets the deployment config of multiple Aqua models and calculate the gpu allocations for all compatible shapes.
638
- If no primary Aqua model id provided, gpu count for each compatible shape will be evenly allocated.
639
- If provided, gpu count for each compatible shape will be prioritized for primary model.
640
-
641
- For example, there is one compatible shape "BM.GPU.H100.8" for three models A, B, C, and each model has a gpu count as below:
642
-
643
- A - BM.GPU.H100.8 - 1, 2, 4, 8
644
- B - BM.GPU.H100.8 - 1, 2, 4, 8
645
- C - BM.GPU.H100.8 - 1, 2, 4, 8
646
-
647
- If no primary model is provided, the gpu allocation for A, B, C could be [2, 4, 2], [2, 2, 4] or [4, 2, 2]
648
- If B is the primary model, the gpu allocation is [2, 4, 2] as B always gets the maximum gpu count.
649
-
650
- Parameters
651
- ----------
652
- model_ids: List[str]
653
- A list of OCID of the Aqua model.
654
- primary_model_id: str
655
- The OCID of the primary Aqua model
656
-
657
- Returns
658
- -------
659
- ModelDeploymentConfigSummary:
660
- An instance of ModelDeploymentConfigSummary.
661
630
"""
662
- deployment = {}
663
- model_shape_gpu = {}
664
- for model_id in model_ids :
665
- deployment_config = AquaDeploymentConfig (
666
- ** self .get_deployment_config (model_id = model_id )
667
- )
668
- model_shape_gpu [model_id ] = {
669
- shape : [
670
- item .gpu_count
671
- for item in deployment_config .configuration [
672
- shape
673
- ].multi_model_deployment
674
- ]
675
- for shape in deployment_config .shape
676
- }
677
-
678
- deployment .update (
679
- {
680
- model_id : {
681
- "shape" : deployment_config .shape ,
682
- "configuration" : {
683
- shape : deployment_config .configuration [shape ]
684
- for shape in deployment_config .shape
685
- },
686
- }
687
- }
688
- )
689
-
690
- common_shapes = []
691
- for shape_gpu in model_shape_gpu .values ():
692
- if not common_shapes :
693
- common_shapes = list (shape_gpu .keys ())
694
- else :
695
- common_shapes = [
696
- shape for shape in common_shapes if shape in list (shape_gpu .keys ())
697
- ]
698
-
699
- if not common_shapes :
700
- raise AquaValueError (
701
- "There are no available shapes for models selected at this moment, please select different model to deploy."
702
- )
631
+ Retrieves the deployment configuration for multiple Aqua models and calculates
632
+ the GPU allocations for all compatible shapes.
703
633
704
- gpu_allocation = {}
705
- for common_shape in common_shapes :
706
- model_gpu = {
707
- model : shape_gpu [common_shape ]
708
- for model , shape_gpu in model_shape_gpu .items ()
709
- }
710
- is_compatible , maximum_gpu_count , combination = self ._verify_compatibility (
711
- model_gpu , primary_model_id
712
- )
713
- if is_compatible :
714
- gpu_allocation [common_shape ] = GPUShapeAllocation (
715
- models = combination , total_gpus_available = maximum_gpu_count
716
- )
717
-
718
- if not gpu_allocation :
719
- raise AquaValueError (
720
- "There are no available gpu allocations for models selected at this moment, please select different model to deploy."
721
- )
722
-
723
- return ModelDeploymentConfigSummary (
724
- deployment_config = deployment , gpu_allocation = gpu_allocation
725
- )
726
-
727
- @staticmethod
728
- def _verify_compatibility (
729
- model_gpu_dict : Dict , primary_model_id : str = None
730
- ) -> tuple :
731
- """Calculates the gpu allocations for all compatible shapes.
732
634
If no primary Aqua model id provided, gpu count for each compatible shape will be evenly allocated.
733
635
If provided, gpu count for each compatible shape will be prioritized for primary model.
734
636
@@ -743,66 +645,19 @@ def _verify_compatibility(
743
645
744
646
Parameters
745
647
----------
746
- model_gpu_dict: Dict
747
- A dict of Aqua model and its gpu counts.
748
- primary_model_id: str
749
- The OCID of the primary Aqua model
648
+ model_ids : List[str]
649
+ A list of OCIDs for the Aqua models.
650
+ primary_model_id : Optional[str]
651
+ The OCID of the primary Aqua model. If provided, GPU allocation will prioritize
652
+ this model. Otherwise, GPUs will be evenly allocated.
750
653
751
654
Returns
752
655
-------
753
- tuple:
754
- A tuple of gpu count allocation result .
656
+ ModelDeploymentConfigSummary
657
+ A summary of the model deployment configurations and GPU allocations .
755
658
"""
756
- maximum_gpu_count = max ([sorted (gpus )[- 1 ] for gpus in model_gpu_dict .values ()])
757
- model_gpu_dict_copy = copy .deepcopy (model_gpu_dict )
758
- if primary_model_id :
759
- primary_model_gpu_list = sorted (model_gpu_dict_copy .pop (primary_model_id ))
760
- for gpu_count in reversed (primary_model_gpu_list ):
761
- combinations = get_combinations (model_gpu_dict_copy )
762
- for combination in combinations :
763
- if (
764
- len (combination ) == len (model_gpu_dict_copy )
765
- and sum (combination .values ()) == maximum_gpu_count - gpu_count
766
- ):
767
- combination [primary_model_id ] = gpu_count
768
- return (
769
- True ,
770
- maximum_gpu_count ,
771
- [
772
- GPUModelAllocation (ocid = ocid , gpu_count = gpu_count )
773
- for ocid , gpu_count in combination .items ()
774
- ],
775
- )
776
-
777
- else :
778
- combinations = get_combinations (model_gpu_dict_copy )
779
- minimal_difference = float ("inf" ) # gets the positive infinity
780
- optimal_combination = []
781
- for combination in combinations :
782
- if (
783
- len (combination ) == len (model_gpu_dict_copy )
784
- and sum (combination .values ()) == maximum_gpu_count
785
- ):
786
- difference = max (combination .values ()) - min (combination .values ())
787
- if difference < minimal_difference :
788
- minimal_difference = difference
789
- optimal_combination = combination
790
-
791
- # find the optimal combination, no need to continue
792
- if minimal_difference == 0 :
793
- break
794
-
795
- if optimal_combination :
796
- return (
797
- True ,
798
- maximum_gpu_count ,
799
- [
800
- GPUModelAllocation (ocid = ocid , gpu_count = gpu_count )
801
- for ocid , gpu_count in optimal_combination .items ()
802
- ],
803
- )
804
659
805
- return ( False , 0 , [] )
660
+ return MultiModelDeploymentConfigLoader ( self ). load ( model_ids , primary_model_id )
806
661
807
662
def get_deployment_default_params (
808
663
self ,
0 commit comments