2
2
# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
3
3
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
4
4
5
+ import copy
5
6
import shlex
6
7
from typing import Dict , List , Union
7
8
41
42
from ads .aqua .model import AquaModelApp
42
43
from ads .aqua .modeldeployment .entities import (
43
44
AquaDeployment ,
45
+ AquaDeploymentConfig ,
44
46
AquaDeploymentDetail ,
45
47
CreateModelDeploymentDetails ,
48
+ GPUModelAllocation ,
49
+ GPUShapeAllocation ,
50
+ ModelDeploymentConfigSummary ,
46
51
)
52
+ from ads .aqua .modeldeployment .utils import get_combinations
47
53
from ads .aqua .ui import ModelFormat
48
54
from ads .common .object_storage_details import ObjectStorageDetails
49
55
from ads .common .utils import get_log_links
@@ -621,6 +627,183 @@ def get_deployment_config(self, model_id: str) -> Dict:
621
627
)
622
628
return config
623
629
630
+ @telemetry (
631
+ entry_point = "plugin=deployment&action=get_multimodel_compatible_shapes" ,
632
+ name = "aqua" ,
633
+ )
634
+ def get_multimodel_compatible_shapes (
635
+ self , model_ids : List [str ], primary_model_id : str = None
636
+ ) -> ModelDeploymentConfigSummary :
637
+ """Gets the deployment config of multiple Aqua models and calculate the gpu allocations for all compatible shapes.
638
+ If no primary Aqua model id provided, gpu count for each compatible shape will be evenly allocated.
639
+ If provided, gpu count for each compatible shape will be prioritized for primary model.
640
+
641
+ For example, there is one compatible shape "BM.GPU.H100.8" for three models A, B, C, and each model has a gpu count as below:
642
+
643
+ A - BM.GPU.H100.8 - 1, 2, 4, 8
644
+ B - BM.GPU.H100.8 - 1, 2, 4, 8
645
+ C - BM.GPU.H100.8 - 1, 2, 4, 8
646
+
647
+ If no primary model is provided, the gpu allocation for A, B, C could be [2, 4, 2], [2, 2, 4] or [4, 2, 2]
648
+ If B is the primary model, the gpu allocation is [2, 4, 2] as B always gets the maximum gpu count.
649
+
650
+ Parameters
651
+ ----------
652
+ model_ids: List[str]
653
+ A list of OCID of the Aqua model.
654
+ primary_model_id: str
655
+ The OCID of the primary Aqua model
656
+
657
+ Returns
658
+ -------
659
+ ModelDeploymentConfigSummary:
660
+ An instance of ModelDeploymentConfigSummary.
661
+ """
662
+ deployment = {}
663
+ model_shape_gpu = {}
664
+ for model_id in model_ids :
665
+ deployment_config = AquaDeploymentConfig (
666
+ ** self .get_deployment_config (model_id = model_id )
667
+ )
668
+ model_shape_gpu [model_id ] = {
669
+ shape : [
670
+ item .gpu_count
671
+ for item in deployment_config .configuration [
672
+ shape
673
+ ].multi_model_deployment
674
+ ]
675
+ for shape in deployment_config .shape
676
+ }
677
+
678
+ deployment .update (
679
+ {
680
+ model_id : {
681
+ "shape" : deployment_config .shape ,
682
+ "configuration" : {
683
+ shape : deployment_config .configuration [shape ]
684
+ for shape in deployment_config .shape
685
+ },
686
+ }
687
+ }
688
+ )
689
+
690
+ common_shapes = []
691
+ for shape_gpu in model_shape_gpu .values ():
692
+ if not common_shapes :
693
+ common_shapes = list (shape_gpu .keys ())
694
+ else :
695
+ common_shapes = [
696
+ shape for shape in common_shapes if shape in list (shape_gpu .keys ())
697
+ ]
698
+
699
+ if not common_shapes :
700
+ raise AquaValueError (
701
+ "There are no available shapes for models selected at this moment, please select different model to deploy."
702
+ )
703
+
704
+ gpu_allocation = {}
705
+ for common_shape in common_shapes :
706
+ model_gpu = {
707
+ model : shape_gpu [common_shape ]
708
+ for model , shape_gpu in model_shape_gpu .items ()
709
+ }
710
+ is_compatible , maximum_gpu_count , combination = self ._verify_compatibility (
711
+ model_gpu , primary_model_id
712
+ )
713
+ if is_compatible :
714
+ gpu_allocation [common_shape ] = GPUShapeAllocation (
715
+ models = combination , total_gpus_available = maximum_gpu_count
716
+ )
717
+
718
+ if not gpu_allocation :
719
+ raise AquaValueError (
720
+ "There are no available gpu allocations for models selected at this moment, please select different model to deploy."
721
+ )
722
+
723
+ return ModelDeploymentConfigSummary (
724
+ deployment_config = deployment , gpu_allocation = gpu_allocation
725
+ )
726
+
727
+ @staticmethod
728
+ def _verify_compatibility (
729
+ model_gpu_dict : Dict , primary_model_id : str = None
730
+ ) -> tuple :
731
+ """Calculates the gpu allocations for all compatible shapes.
732
+ If no primary Aqua model id provided, gpu count for each compatible shape will be evenly allocated.
733
+ If provided, gpu count for each compatible shape will be prioritized for primary model.
734
+
735
+ For example, there is one compatible shape "BM.GPU.H100.8" for three models A, B, C, and each model has a gpu count as below:
736
+
737
+ A - BM.GPU.H100.8 - 1, 2, 4, 8
738
+ B - BM.GPU.H100.8 - 1, 2, 4, 8
739
+ C - BM.GPU.H100.8 - 1, 2, 4, 8
740
+
741
+ If no primary model is provided, the gpu allocation for A, B, C could be [2, 4, 2], [2, 2, 4] or [4, 2, 2]
742
+ If B is the primary model, the gpu allocation is [2, 4, 2] as B always gets the maximum gpu count.
743
+
744
+ Parameters
745
+ ----------
746
+ model_gpu_dict: Dict
747
+ A dict of Aqua model and its gpu counts.
748
+ primary_model_id: str
749
+ The OCID of the primary Aqua model
750
+
751
+ Returns
752
+ -------
753
+ tuple:
754
+ A tuple of gpu count allocation result.
755
+ """
756
+ maximum_gpu_count = max ([sorted (gpus )[- 1 ] for gpus in model_gpu_dict .values ()])
757
+ model_gpu_dict_copy = copy .deepcopy (model_gpu_dict )
758
+ if primary_model_id :
759
+ primary_model_gpu_list = sorted (model_gpu_dict_copy .pop (primary_model_id ))
760
+ for gpu_count in reversed (primary_model_gpu_list ):
761
+ combinations = get_combinations (model_gpu_dict_copy )
762
+ for combination in combinations :
763
+ if (
764
+ len (combination ) == len (model_gpu_dict_copy )
765
+ and sum (combination .values ()) == maximum_gpu_count - gpu_count
766
+ ):
767
+ combination [primary_model_id ] = gpu_count
768
+ return (
769
+ True ,
770
+ maximum_gpu_count ,
771
+ [
772
+ GPUModelAllocation (ocid = ocid , gpu_count = gpu_count )
773
+ for ocid , gpu_count in combination .items ()
774
+ ],
775
+ )
776
+
777
+ else :
778
+ combinations = get_combinations (model_gpu_dict_copy )
779
+ minimal_difference = float ("inf" ) # gets the positive infinity
780
+ optimal_combination = []
781
+ for combination in combinations :
782
+ if (
783
+ len (combination ) == len (model_gpu_dict_copy )
784
+ and sum (combination .values ()) == maximum_gpu_count
785
+ ):
786
+ difference = max (combination .values ()) - min (combination .values ())
787
+ if difference < minimal_difference :
788
+ minimal_difference = difference
789
+ optimal_combination = combination
790
+
791
+ # find the optimal combination, no need to continue
792
+ if minimal_difference == 0 :
793
+ break
794
+
795
+ if optimal_combination :
796
+ return (
797
+ True ,
798
+ maximum_gpu_count ,
799
+ [
800
+ GPUModelAllocation (ocid = ocid , gpu_count = gpu_count )
801
+ for ocid , gpu_count in optimal_combination .items ()
802
+ ],
803
+ )
804
+
805
+ return (False , 0 , [])
806
+
624
807
def get_deployment_default_params (
625
808
self ,
626
809
model_id : str ,
@@ -671,9 +854,8 @@ def get_deployment_default_params(
671
854
).get (instance_shape , UNKNOWN_DICT )
672
855
673
856
if "multi_model_deployment" in instance_shape_config and gpu_count :
674
- gpu_params = (
675
- instance_shape_config
676
- .get ("multi_model_deployment" , UNKNOWN_DICT )
857
+ gpu_params = instance_shape_config .get (
858
+ "multi_model_deployment" , UNKNOWN_DICT
677
859
)
678
860
679
861
for gpu_config in gpu_params :
@@ -684,11 +866,9 @@ def get_deployment_default_params(
684
866
break
685
867
686
868
else :
687
- config_params = (
688
- instance_shape_config
689
- .get ("parameters" , UNKNOWN_DICT )
690
- .get (get_container_params_type (container_type_key ), UNKNOWN )
691
- )
869
+ config_params = instance_shape_config .get (
870
+ "parameters" , UNKNOWN_DICT
871
+ ).get (get_container_params_type (container_type_key ), UNKNOWN )
692
872
693
873
if config_params :
694
874
params_list = get_params_list (config_params )
0 commit comments