diff --git a/python/ray/autoscaler/v2/event_logger.py b/python/ray/autoscaler/v2/event_logger.py index ba8a00498a99..961dc37bb0ad 100644 --- a/python/ray/autoscaler/v2/event_logger.py +++ b/python/ray/autoscaler/v2/event_logger.py @@ -142,13 +142,13 @@ def log_cluster_scheduling_update( for infeasible_constraint in infeasible_cluster_resource_constraints: log_str = "No available node types can fulfill cluster constraint: " for i, requests_by_count in enumerate( - infeasible_constraint.min_bundles + infeasible_constraint.resource_requests ): resource_map = ResourceRequestUtil.to_resource_map( requests_by_count.request ) log_str += f"{resource_map}*{requests_by_count.count}" - if i < len(infeasible_constraint.min_bundles) - 1: + if i < len(infeasible_constraint.resource_requests) - 1: log_str += ", " log_str += ( diff --git a/python/ray/autoscaler/v2/scheduler.py b/python/ray/autoscaler/v2/scheduler.py index 2d5a70065066..dac774c9446d 100644 --- a/python/ray/autoscaler/v2/scheduler.py +++ b/python/ray/autoscaler/v2/scheduler.py @@ -1194,9 +1194,8 @@ def _enforce_resource_constraints( return [] constraint = constraints[0] - min_bundles = constraint.min_bundles # Flatten the requests for iterating through. - requests = ResourceRequestUtil.ungroup_by_count(min_bundles) + requests = ResourceRequestUtil.ungroup_by_count(constraint.resource_requests) # Pass the empty nodes to schedule. scheduled_nodes, infeasible = ResourceDemandScheduler._try_schedule( diff --git a/python/ray/autoscaler/v2/tests/test_event_logger.py b/python/ray/autoscaler/v2/tests/test_event_logger.py index 18bf269f1bf9..da127b1a2be0 100644 --- a/python/ray/autoscaler/v2/tests/test_event_logger.py +++ b/python/ray/autoscaler/v2/tests/test_event_logger.py @@ -78,7 +78,7 @@ def test_log_scheduling_updates(): ], infeasible_cluster_resource_constraints=[ ClusterResourceConstraint( - min_bundles=ResourceRequestUtil.group_by_count( + resource_requests=ResourceRequestUtil.group_by_count( cluster_resource_constraints ) ) diff --git a/python/ray/autoscaler/v2/tests/test_scheduler.py b/python/ray/autoscaler/v2/tests/test_scheduler.py index 3a188bdaf2ce..53e4cdf34de7 100644 --- a/python/ray/autoscaler/v2/tests/test_scheduler.py +++ b/python/ray/autoscaler/v2/tests/test_scheduler.py @@ -70,7 +70,7 @@ def sched_request( cluster_resource_constraints=( [ ClusterResourceConstraint( - min_bundles=ResourceRequestUtil.group_by_count( + resource_requests=ResourceRequestUtil.group_by_count( cluster_resource_constraints ) ) diff --git a/python/ray/autoscaler/v2/tests/test_sdk.py b/python/ray/autoscaler/v2/tests/test_sdk.py index f8f157a6a0b5..aa88c1c7a7a7 100644 --- a/python/ray/autoscaler/v2/tests/test_sdk.py +++ b/python/ray/autoscaler/v2/tests/test_sdk.py @@ -61,12 +61,12 @@ def assert_cluster_resource_constraints( # We only have 1 constraint for now. assert len(state.cluster_resource_constraints) == 1 - min_bundles = state.cluster_resource_constraints[0].min_bundles - assert len(min_bundles) == len(expected_bundles) == len(expected_count) + resource_requests = state.cluster_resource_constraints[0].resource_requests + assert len(resource_requests) == len(expected_bundles) == len(expected_count) # Sort all the bundles by bundle's resource names - min_bundles = sorted( - min_bundles, + resource_requests = sorted( + resource_requests, key=lambda bundle_by_count: "".join( bundle_by_count.request.resources_bundle.keys() ), @@ -76,7 +76,7 @@ def assert_cluster_resource_constraints( expected, key=lambda bundle_count: "".join(bundle_count[0].keys()) ) - for actual_bundle_count, expected_bundle_count in zip(min_bundles, expected): + for actual_bundle_count, expected_bundle_count in zip(resource_requests, expected): assert ( dict(actual_bundle_count.request.resources_bundle) == expected_bundle_count[0] diff --git a/python/ray/autoscaler/v2/tests/test_utils.py b/python/ray/autoscaler/v2/tests/test_utils.py index 3efebcdf2747..a6bb4ee43c9a 100644 --- a/python/ray/autoscaler/v2/tests/test_utils.py +++ b/python/ray/autoscaler/v2/tests/test_utils.py @@ -189,7 +189,7 @@ def test_cluster_status_parser_cluster_resource_state(): ], "cluster_resource_constraints": [ { - "min_bundles": [ + "resource_requests": [ { "request": { "resources_bundle": {"GPU": 2, "CPU": 100}, diff --git a/python/ray/autoscaler/v2/utils.py b/python/ray/autoscaler/v2/utils.py index d9eaaf606b29..687a23420764 100644 --- a/python/ray/autoscaler/v2/utils.py +++ b/python/ray/autoscaler/v2/utils.py @@ -559,7 +559,7 @@ def _parse_resource_demands( ResourceRequestByCount( bundle=dict(r.request.resources_bundle.items()), count=r.count ) - for r in constraint_request.min_bundles + for r in constraint_request.resource_requests ] ) constraint_demand.append(demand) diff --git a/src/ray/gcs/gcs_client/accessor.cc b/src/ray/gcs/gcs_client/accessor.cc index 1463caccfaa2..c8c7023ef1ef 100644 --- a/src/ray/gcs/gcs_client/accessor.cc +++ b/src/ray/gcs/gcs_client/accessor.cc @@ -1440,7 +1440,7 @@ Status AutoscalerStateAccessor::RequestClusterResourceConstraint( auto count = count_array[i]; auto new_resource_requests_by_count = - request.mutable_cluster_resource_constraint()->add_min_bundles(); + request.mutable_cluster_resource_constraint()->add_resource_requests(); new_resource_requests_by_count->mutable_request()->mutable_resources_bundle()->insert( bundle.begin(), bundle.end()); diff --git a/src/ray/gcs/gcs_server/test/gcs_autoscaler_state_manager_test.cc b/src/ray/gcs/gcs_server/test/gcs_autoscaler_state_manager_test.cc index 2f281fa31844..85451d5a3e20 100644 --- a/src/ray/gcs/gcs_server/test/gcs_autoscaler_state_manager_test.cc +++ b/src/ray/gcs/gcs_server/test/gcs_autoscaler_state_manager_test.cc @@ -636,9 +636,10 @@ TEST_F(GcsAutoscalerStateManagerTest, TestClusterResourcesConstraint) { Mocker::GenClusterResourcesConstraint({{{"CPU", 2}, {"GPU", 1}}}, {1})); const auto &state = GetClusterResourceStateSync(); ASSERT_EQ(state.cluster_resource_constraints_size(), 1); - ASSERT_EQ(state.cluster_resource_constraints(0).min_bundles_size(), 1); - CheckResourceRequest(state.cluster_resource_constraints(0).min_bundles(0).request(), - {{"CPU", 2}, {"GPU", 1}}); + ASSERT_EQ(state.cluster_resource_constraints(0).resource_requests_size(), 1); + CheckResourceRequest( + state.cluster_resource_constraints(0).resource_requests(0).request(), + {{"CPU", 2}, {"GPU", 1}}); } // Override it @@ -647,9 +648,10 @@ TEST_F(GcsAutoscalerStateManagerTest, TestClusterResourcesConstraint) { {{{"CPU", 4}, {"GPU", 5}, {"TPU", 1}}}, {1})); const auto &state = GetClusterResourceStateSync(); ASSERT_EQ(state.cluster_resource_constraints_size(), 1); - ASSERT_EQ(state.cluster_resource_constraints(0).min_bundles_size(), 1); - CheckResourceRequest(state.cluster_resource_constraints(0).min_bundles(0).request(), - {{"CPU", 4}, {"GPU", 5}, {"TPU", 1}}); + ASSERT_EQ(state.cluster_resource_constraints(0).resource_requests_size(), 1); + CheckResourceRequest( + state.cluster_resource_constraints(0).resource_requests(0).request(), + {{"CPU", 4}, {"GPU", 5}, {"TPU", 1}}); } } diff --git a/src/ray/gcs/test/gcs_test_util.h b/src/ray/gcs/test/gcs_test_util.h index 72565ba07564..ce3827c16b28 100644 --- a/src/ray/gcs/test/gcs_test_util.h +++ b/src/ray/gcs/test/gcs_test_util.h @@ -421,7 +421,7 @@ struct Mocker { for (size_t i = 0; i < request_resources.size(); i++) { auto &resource = request_resources[i]; auto count = count_array[i]; - auto bundle = constraint.add_min_bundles(); + auto bundle = constraint.add_resource_requests(); bundle->set_count(count); bundle->mutable_request()->mutable_resources_bundle()->insert(resource.begin(), resource.end()); diff --git a/src/ray/protobuf/autoscaler.proto b/src/ray/protobuf/autoscaler.proto index 5172c889542f..cea8e6a38a33 100644 --- a/src/ray/protobuf/autoscaler.proto +++ b/src/ray/protobuf/autoscaler.proto @@ -77,9 +77,15 @@ message GangResourceRequest { // Cluster resource constraint represents minimal cluster size requirement, // this is issued through ray.autoscaler.sdk.request_resources. message ClusterResourceConstraint { - // If not emtpy, the cluster should have the capacity (total resource) to fit - // the min_bundles. - repeated ResourceRequestByCount min_bundles = 1; + // Commands the autoscaler to accommodate the specified requests. + // + // For more context, please check out py-doc for `ray.autoscaler.sdk.request_resources` + // method + // + // NOTE: This call is only a hint to the autoscaler. The actual resulting cluster + // size may be slightly larger or smaller than expected depending on the + // internal bin packing algorithm and max worker count restrictions. + repeated ResourceRequestByCount resource_requests = 1; } // Node status for a ray node.