From 83622157bafed1aad33f397ce9b8086443b7eaad Mon Sep 17 00:00:00 2001 From: ryanaoleary Date: Sat, 21 Dec 2024 00:40:45 +0000 Subject: [PATCH 1/3] Refactor UpgradeStrategy to UpgradeSpec.Type Signed-off-by: ryanaoleary --- docs/reference/api.md | 20 ++++++++++-- .../crds/ray.io_rayservices.yaml | 7 +++-- ray-operator/apis/ray/v1/rayservice_types.go | 9 ++++-- .../apis/ray/v1/zz_generated.deepcopy.go | 28 ++++++++++++++--- .../config/crd/bases/ray.io_rayservices.yaml | 7 +++-- .../controllers/ray/rayservice_controller.go | 31 +++++++++++-------- .../ray/rayservice_controller_unit_test.go | 9 ++++-- .../ray/v1/rayservicespec.go | 23 +++++++------- .../ray/v1/rayserviceupgradespec.go | 27 ++++++++++++++++ .../pkg/client/applyconfiguration/utils.go | 2 ++ 10 files changed, 123 insertions(+), 40 deletions(-) create mode 100644 ray-operator/pkg/client/applyconfiguration/ray/v1/rayserviceupgradespec.go diff --git a/docs/reference/api.md b/docs/reference/api.md index 5d0a2ed062..2593554202 100644 --- a/docs/reference/api.md +++ b/docs/reference/api.md @@ -209,7 +209,7 @@ _Appears in:_ | `serviceUnhealthySecondThreshold` _integer_ | Deprecated: This field is not used anymore. ref: https://github.com/ray-project/kuberay/issues/1685 | | | | `deploymentUnhealthySecondThreshold` _integer_ | Deprecated: This field is not used anymore. ref: https://github.com/ray-project/kuberay/issues/1685 | | | | `serveService` _[Service](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#service-v1-core)_ | ServeService is the Kubernetes service for head node and worker nodes who have healthy http proxy to serve traffics. | | | -| `upgradeStrategy` _[RayServiceUpgradeStrategy](#rayserviceupgradestrategy)_ | UpgradeStrategy represents the strategy used when upgrading the RayService. Currently supports `NewCluster` and `None` | | | +| `upgradeSpec` _[RayServiceUpgradeSpec](#rayserviceupgradespec)_ | UpgradeSpec defines the scaling policy used when upgrading the RayService. | | | | `serveConfigV2` _string_ | Important: Run "make" to regenerate code after modifying this file
Defines the applications and deployments to deploy, should be a YAML multi-line scalar string. | | | | `rayClusterConfig` _[RayClusterSpec](#rayclusterspec)_ | | | | | `excludeHeadPodFromServeSvc` _boolean_ | If the field is set to true, the value of the label `ray.io/serve` on the head Pod should always be false.
Therefore, the head Pod's endpoint will not be added to the Kubernetes Serve service. | | | @@ -217,6 +217,22 @@ _Appears in:_ +#### RayServiceUpgradeSpec + + + + + + + +_Appears in:_ +- [RayServiceSpec](#rayservicespec) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `type` _[RayServiceUpgradeStrategy](#rayserviceupgradestrategy)_ | Type represents the strategy used when upgrading the RayService. Currently supports `NewCluster` and `None`. | | | + + #### RayServiceUpgradeStrategy _Underlying type:_ _string_ @@ -226,7 +242,7 @@ _Underlying type:_ _string_ _Appears in:_ -- [RayServiceSpec](#rayservicespec) +- [RayServiceUpgradeSpec](#rayserviceupgradespec) diff --git a/helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml b/helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml index 1c11cc038e..f4f9687425 100644 --- a/helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml +++ b/helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml @@ -7943,8 +7943,11 @@ spec: serviceUnhealthySecondThreshold: format: int32 type: integer - upgradeStrategy: - type: string + upgradeSpec: + properties: + type: + type: string + type: object type: object status: properties: diff --git a/ray-operator/apis/ray/v1/rayservice_types.go b/ray-operator/apis/ray/v1/rayservice_types.go index b81d9cbc8d..aca7a46f3e 100644 --- a/ray-operator/apis/ray/v1/rayservice_types.go +++ b/ray-operator/apis/ray/v1/rayservice_types.go @@ -58,6 +58,11 @@ var DeploymentStatusEnum = struct { UNHEALTHY: "UNHEALTHY", } +type RayServiceUpgradeSpec struct { + // Type represents the strategy used when upgrading the RayService. Currently supports `NewCluster` and `None`. + Type *RayServiceUpgradeStrategy `json:"type,omitempty"` +} + // RayServiceSpec defines the desired state of RayService type RayServiceSpec struct { // Deprecated: This field is not used anymore. ref: https://github.com/ray-project/kuberay/issues/1685 @@ -66,8 +71,8 @@ type RayServiceSpec struct { DeploymentUnhealthySecondThreshold *int32 `json:"deploymentUnhealthySecondThreshold,omitempty"` // ServeService is the Kubernetes service for head node and worker nodes who have healthy http proxy to serve traffics. ServeService *corev1.Service `json:"serveService,omitempty"` - // UpgradeStrategy represents the strategy used when upgrading the RayService. Currently supports `NewCluster` and `None` - UpgradeStrategy *RayServiceUpgradeStrategy `json:"upgradeStrategy,omitempty"` + // UpgradeSpec defines the scaling policy used when upgrading the RayService. + UpgradeSpec *RayServiceUpgradeSpec `json:"upgradeSpec,omitempty"` // Important: Run "make" to regenerate code after modifying this file // Defines the applications and deployments to deploy, should be a YAML multi-line scalar string. ServeConfigV2 string `json:"serveConfigV2,omitempty"` diff --git a/ray-operator/apis/ray/v1/zz_generated.deepcopy.go b/ray-operator/apis/ray/v1/zz_generated.deepcopy.go index 586c39ac5a..722a7d2837 100644 --- a/ray-operator/apis/ray/v1/zz_generated.deepcopy.go +++ b/ray-operator/apis/ray/v1/zz_generated.deepcopy.go @@ -533,10 +533,10 @@ func (in *RayServiceSpec) DeepCopyInto(out *RayServiceSpec) { *out = new(corev1.Service) (*in).DeepCopyInto(*out) } - if in.UpgradeStrategy != nil { - in, out := &in.UpgradeStrategy, &out.UpgradeStrategy - *out = new(RayServiceUpgradeStrategy) - **out = **in + if in.UpgradeSpec != nil { + in, out := &in.UpgradeSpec, &out.UpgradeSpec + *out = new(RayServiceUpgradeSpec) + (*in).DeepCopyInto(*out) } in.RayClusterSpec.DeepCopyInto(&out.RayClusterSpec) } @@ -595,6 +595,26 @@ func (in *RayServiceStatuses) DeepCopy() *RayServiceStatuses { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RayServiceUpgradeSpec) DeepCopyInto(out *RayServiceUpgradeSpec) { + *out = *in + if in.Type != nil { + in, out := &in.Type, &out.Type + *out = new(RayServiceUpgradeStrategy) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RayServiceUpgradeSpec. +func (in *RayServiceUpgradeSpec) DeepCopy() *RayServiceUpgradeSpec { + if in == nil { + return nil + } + out := new(RayServiceUpgradeSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ScaleStrategy) DeepCopyInto(out *ScaleStrategy) { *out = *in diff --git a/ray-operator/config/crd/bases/ray.io_rayservices.yaml b/ray-operator/config/crd/bases/ray.io_rayservices.yaml index 1c11cc038e..f4f9687425 100644 --- a/ray-operator/config/crd/bases/ray.io_rayservices.yaml +++ b/ray-operator/config/crd/bases/ray.io_rayservices.yaml @@ -7943,8 +7943,11 @@ spec: serviceUnhealthySecondThreshold: format: int32 type: integer - upgradeStrategy: - type: string + upgradeSpec: + properties: + type: + type: string + type: object type: object status: properties: diff --git a/ray-operator/controllers/ray/rayservice_controller.go b/ray-operator/controllers/ray/rayservice_controller.go index 52aad6c304..36eebcc7b2 100644 --- a/ray-operator/controllers/ray/rayservice_controller.go +++ b/ray-operator/controllers/ray/rayservice_controller.go @@ -247,10 +247,12 @@ func validateRayServiceSpec(rayService *rayv1.RayService) error { if headSvc := rayService.Spec.RayClusterSpec.HeadGroupSpec.HeadService; headSvc != nil && headSvc.Name != "" { return fmt.Errorf("spec.rayClusterConfig.headGroupSpec.headService.metadata.name should not be set") } - - if upgradeStrategy := rayService.Spec.UpgradeStrategy; upgradeStrategy != nil { - if *upgradeStrategy != rayv1.NewCluster && *upgradeStrategy != rayv1.None { - return fmt.Errorf("spec.UpgradeStrategy value %s is invalid, valid options are %s or %s", *upgradeStrategy, rayv1.NewCluster, rayv1.None) + if rayService.Spec.UpgradeSpec == nil { + return nil + } + if upgradeType := rayService.Spec.UpgradeSpec.Type; upgradeType != nil { + if *upgradeType != rayv1.NewCluster && *upgradeType != rayv1.None { + return fmt.Errorf("Spec.UpgradeSpec.Type value %s is invalid, valid options are %s or %s", *upgradeType, rayv1.NewCluster, rayv1.None) } } return nil @@ -430,24 +432,27 @@ func (r *RayServiceReconciler) reconcileRayCluster(ctx context.Context, rayServi // For LLM serving, some users might not have sufficient GPU resources to run two RayClusters simultaneously. // Therefore, KubeRay offers ENABLE_ZERO_DOWNTIME as a feature flag for zero-downtime upgrades. zeroDowntimeEnvVar := os.Getenv(ENABLE_ZERO_DOWNTIME) - rayServiceSpecUpgradeStrategy := rayServiceInstance.Spec.UpgradeStrategy - // There are two ways to enable zero downtime upgrade. Through ENABLE_ZERO_DOWNTIME env var or setting Spec.UpgradeStrategy. + var rayServiceSpecUpgradeType *rayv1.RayServiceUpgradeStrategy + if rayServiceInstance.Spec.UpgradeSpec != nil { + rayServiceSpecUpgradeType = rayServiceInstance.Spec.UpgradeSpec.Type + } + // There are two ways to enable zero downtime upgrade. Through ENABLE_ZERO_DOWNTIME env var or setting Spec.UpgradeSpec.Type. // If no fields are set, zero downtime upgrade by default is enabled. - // Spec.UpgradeStrategy takes precedence over ENABLE_ZERO_DOWNTIME. + // Spec.UpgradeSpec.Type takes precedence over ENABLE_ZERO_DOWNTIME. enableZeroDowntime := true - strategyMessage := "" + typeMessage := "" if zeroDowntimeEnvVar != "" { enableZeroDowntime = strings.ToLower(zeroDowntimeEnvVar) != "false" - strategyMessage = fmt.Sprintf("ENABLE_ZERO_DOWNTIME environmental variable is set to %q", strings.ToLower(zeroDowntimeEnvVar)) + typeMessage = fmt.Sprintf("ENABLE_ZERO_DOWNTIME environmental variable is set to %q", strings.ToLower(zeroDowntimeEnvVar)) } - if rayServiceSpecUpgradeStrategy != nil { - enableZeroDowntime = *rayServiceSpecUpgradeStrategy == rayv1.NewCluster - strategyMessage = fmt.Sprintf("Upgrade Strategy is set to %q", *rayServiceSpecUpgradeStrategy) + if rayServiceSpecUpgradeType != nil { + enableZeroDowntime = *rayServiceSpecUpgradeType == rayv1.NewCluster + typeMessage = fmt.Sprintf("Upgrade Type is set to %q", *rayServiceSpecUpgradeType) } if enableZeroDowntime || !enableZeroDowntime && activeRayCluster == nil { if enableZeroDowntime && activeRayCluster != nil { - r.Recorder.Event(rayServiceInstance, "Normal", "UpgradeStrategy", strategyMessage) + r.Recorder.Event(rayServiceInstance, "Normal", "UpgradeSpec.Type", typeMessage) } // Add a pending cluster name. In the next reconcile loop, shouldPrepareNewRayCluster will return DoNothing and we will // actually create the pending RayCluster instance. diff --git a/ray-operator/controllers/ray/rayservice_controller_unit_test.go b/ray-operator/controllers/ray/rayservice_controller_unit_test.go index 7fd2ccf297..1f870d239f 100644 --- a/ray-operator/controllers/ray/rayservice_controller_unit_test.go +++ b/ray-operator/controllers/ray/rayservice_controller_unit_test.go @@ -50,10 +50,12 @@ func TestValidateRayServiceSpec(t *testing.T) { var upgradeStrat rayv1.RayServiceUpgradeStrategy = "invalidStrategy" err = validateRayServiceSpec(&rayv1.RayService{ Spec: rayv1.RayServiceSpec{ - UpgradeStrategy: &upgradeStrat, + UpgradeSpec: &rayv1.RayServiceUpgradeSpec{ + Type: &upgradeStrat, + }, }, }) - assert.Error(t, err, "spec.UpgradeStrategy is invalid") + assert.Error(t, err, "spec.UpgradeSpec.Type is invalid") } func TestGenerateHashWithoutReplicasAndWorkersToDelete(t *testing.T) { @@ -888,8 +890,9 @@ func TestReconcileRayCluster(t *testing.T) { Recorder: record.NewFakeRecorder(1), } service := rayService.DeepCopy() + service.Spec.UpgradeSpec = &rayv1.RayServiceUpgradeSpec{} if tc.rayServiceUpgradeStrategy != "" { - service.Spec.UpgradeStrategy = &tc.rayServiceUpgradeStrategy + service.Spec.UpgradeSpec.Type = &tc.rayServiceUpgradeStrategy } if tc.updateRayClusterSpec { service.Spec.RayClusterSpec.RayVersion = "new-version" diff --git a/ray-operator/pkg/client/applyconfiguration/ray/v1/rayservicespec.go b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayservicespec.go index e98ee45ca3..962b3c9825 100644 --- a/ray-operator/pkg/client/applyconfiguration/ray/v1/rayservicespec.go +++ b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayservicespec.go @@ -3,20 +3,19 @@ package v1 import ( - rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" v1 "k8s.io/api/core/v1" ) // RayServiceSpecApplyConfiguration represents an declarative configuration of the RayServiceSpec type for use // with apply. type RayServiceSpecApplyConfiguration struct { - ServiceUnhealthySecondThreshold *int32 `json:"serviceUnhealthySecondThreshold,omitempty"` - DeploymentUnhealthySecondThreshold *int32 `json:"deploymentUnhealthySecondThreshold,omitempty"` - ServeService *v1.Service `json:"serveService,omitempty"` - UpgradeStrategy *rayv1.RayServiceUpgradeStrategy `json:"upgradeStrategy,omitempty"` - ServeConfigV2 *string `json:"serveConfigV2,omitempty"` - RayClusterSpec *RayClusterSpecApplyConfiguration `json:"rayClusterConfig,omitempty"` - ExcludeHeadPodFromServeSvc *bool `json:"excludeHeadPodFromServeSvc,omitempty"` + ServiceUnhealthySecondThreshold *int32 `json:"serviceUnhealthySecondThreshold,omitempty"` + DeploymentUnhealthySecondThreshold *int32 `json:"deploymentUnhealthySecondThreshold,omitempty"` + ServeService *v1.Service `json:"serveService,omitempty"` + UpgradeSpec *RayServiceUpgradeSpecApplyConfiguration `json:"upgradeSpec,omitempty"` + ServeConfigV2 *string `json:"serveConfigV2,omitempty"` + RayClusterSpec *RayClusterSpecApplyConfiguration `json:"rayClusterConfig,omitempty"` + ExcludeHeadPodFromServeSvc *bool `json:"excludeHeadPodFromServeSvc,omitempty"` } // RayServiceSpecApplyConfiguration constructs an declarative configuration of the RayServiceSpec type for use with @@ -49,11 +48,11 @@ func (b *RayServiceSpecApplyConfiguration) WithServeService(value v1.Service) *R return b } -// WithUpgradeStrategy sets the UpgradeStrategy field in the declarative configuration to the given value +// WithUpgradeSpec sets the UpgradeSpec field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the UpgradeStrategy field is set to the value of the last call. -func (b *RayServiceSpecApplyConfiguration) WithUpgradeStrategy(value rayv1.RayServiceUpgradeStrategy) *RayServiceSpecApplyConfiguration { - b.UpgradeStrategy = &value +// If called multiple times, the UpgradeSpec field is set to the value of the last call. +func (b *RayServiceSpecApplyConfiguration) WithUpgradeSpec(value *RayServiceUpgradeSpecApplyConfiguration) *RayServiceSpecApplyConfiguration { + b.UpgradeSpec = value return b } diff --git a/ray-operator/pkg/client/applyconfiguration/ray/v1/rayserviceupgradespec.go b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayserviceupgradespec.go new file mode 100644 index 0000000000..758e49dd2a --- /dev/null +++ b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayserviceupgradespec.go @@ -0,0 +1,27 @@ +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1 + +import ( + v1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" +) + +// RayServiceUpgradeSpecApplyConfiguration represents an declarative configuration of the RayServiceUpgradeSpec type for use +// with apply. +type RayServiceUpgradeSpecApplyConfiguration struct { + Type *v1.RayServiceUpgradeStrategy `json:"type,omitempty"` +} + +// RayServiceUpgradeSpecApplyConfiguration constructs an declarative configuration of the RayServiceUpgradeSpec type for use with +// apply. +func RayServiceUpgradeSpec() *RayServiceUpgradeSpecApplyConfiguration { + return &RayServiceUpgradeSpecApplyConfiguration{} +} + +// WithType sets the Type field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Type field is set to the value of the last call. +func (b *RayServiceUpgradeSpecApplyConfiguration) WithType(value v1.RayServiceUpgradeStrategy) *RayServiceUpgradeSpecApplyConfiguration { + b.Type = &value + return b +} diff --git a/ray-operator/pkg/client/applyconfiguration/utils.go b/ray-operator/pkg/client/applyconfiguration/utils.go index 79e0a8cc1f..4f4e4ca51e 100644 --- a/ray-operator/pkg/client/applyconfiguration/utils.go +++ b/ray-operator/pkg/client/applyconfiguration/utils.go @@ -41,6 +41,8 @@ func ForKind(kind schema.GroupVersionKind) interface{} { return &rayv1.RayServiceStatusApplyConfiguration{} case v1.SchemeGroupVersion.WithKind("RayServiceStatuses"): return &rayv1.RayServiceStatusesApplyConfiguration{} + case v1.SchemeGroupVersion.WithKind("RayServiceUpgradeSpec"): + return &rayv1.RayServiceUpgradeSpecApplyConfiguration{} case v1.SchemeGroupVersion.WithKind("ScaleStrategy"): return &rayv1.ScaleStrategyApplyConfiguration{} case v1.SchemeGroupVersion.WithKind("ServeDeploymentStatus"): From 97816e810b58207f645525ef2f9e543ff8b6ac62 Mon Sep 17 00:00:00 2001 From: ryanaoleary Date: Mon, 30 Dec 2024 21:25:13 +0000 Subject: [PATCH 2/3] Change naming Signed-off-by: ryanaoleary --- docs/reference/api.md | 10 +++---- .../crds/ray.io_rayservices.yaml | 2 +- ray-operator/apis/ray/v1/rayservice_types.go | 14 +++++----- .../apis/ray/v1/zz_generated.deepcopy.go | 16 +++++------ .../config/crd/bases/ray.io_rayservices.yaml | 2 +- .../ray/v1/rayservicespec.go | 22 +++++++-------- .../ray/v1/rayserviceupgradestrategy.go | 27 +++++++++++++++++++ .../pkg/client/applyconfiguration/utils.go | 4 +-- 8 files changed, 62 insertions(+), 35 deletions(-) create mode 100644 ray-operator/pkg/client/applyconfiguration/ray/v1/rayserviceupgradestrategy.go diff --git a/docs/reference/api.md b/docs/reference/api.md index 2593554202..eb72b80c8b 100644 --- a/docs/reference/api.md +++ b/docs/reference/api.md @@ -209,7 +209,7 @@ _Appears in:_ | `serviceUnhealthySecondThreshold` _integer_ | Deprecated: This field is not used anymore. ref: https://github.com/ray-project/kuberay/issues/1685 | | | | `deploymentUnhealthySecondThreshold` _integer_ | Deprecated: This field is not used anymore. ref: https://github.com/ray-project/kuberay/issues/1685 | | | | `serveService` _[Service](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#service-v1-core)_ | ServeService is the Kubernetes service for head node and worker nodes who have healthy http proxy to serve traffics. | | | -| `upgradeSpec` _[RayServiceUpgradeSpec](#rayserviceupgradespec)_ | UpgradeSpec defines the scaling policy used when upgrading the RayService. | | | +| `upgradeStrategy` _[RayServiceUpgradeStrategy](#rayserviceupgradestrategy)_ | UpgradeStrategy defines the scaling policy used when upgrading the RayService. | | | | `serveConfigV2` _string_ | Important: Run "make" to regenerate code after modifying this file
Defines the applications and deployments to deploy, should be a YAML multi-line scalar string. | | | | `rayClusterConfig` _[RayClusterSpec](#rayclusterspec)_ | | | | | `excludeHeadPodFromServeSvc` _boolean_ | If the field is set to true, the value of the label `ray.io/serve` on the head Pod should always be false.
Therefore, the head Pod's endpoint will not be added to the Kubernetes Serve service. | | | @@ -217,7 +217,7 @@ _Appears in:_ -#### RayServiceUpgradeSpec +#### RayServiceUpgradeStrategy @@ -230,10 +230,10 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | -| `type` _[RayServiceUpgradeStrategy](#rayserviceupgradestrategy)_ | Type represents the strategy used when upgrading the RayService. Currently supports `NewCluster` and `None`. | | | +| `type` _[RayServiceUpgradeType](#rayserviceupgradetype)_ | Type represents the strategy used when upgrading the RayService. Currently supports `NewCluster` and `None`. | | | -#### RayServiceUpgradeStrategy +#### RayServiceUpgradeType _Underlying type:_ _string_ @@ -242,7 +242,7 @@ _Underlying type:_ _string_ _Appears in:_ -- [RayServiceUpgradeSpec](#rayserviceupgradespec) +- [RayServiceUpgradeStrategy](#rayserviceupgradestrategy) diff --git a/helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml b/helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml index f4f9687425..9d91430966 100644 --- a/helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml +++ b/helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml @@ -7943,7 +7943,7 @@ spec: serviceUnhealthySecondThreshold: format: int32 type: integer - upgradeSpec: + upgradeStrategy: properties: type: type: string diff --git a/ray-operator/apis/ray/v1/rayservice_types.go b/ray-operator/apis/ray/v1/rayservice_types.go index aca7a46f3e..2ff77aaebf 100644 --- a/ray-operator/apis/ray/v1/rayservice_types.go +++ b/ray-operator/apis/ray/v1/rayservice_types.go @@ -20,13 +20,13 @@ const ( FailedToUpdateService ServiceStatus = "FailedToUpdateService" ) -type RayServiceUpgradeStrategy string +type RayServiceUpgradeType string const ( // During upgrade, NewCluster strategy will create new upgraded cluster and switch to it when it becomes ready - NewCluster RayServiceUpgradeStrategy = "NewCluster" + NewCluster RayServiceUpgradeType = "NewCluster" // No new cluster will be created while the strategy is set to None - None RayServiceUpgradeStrategy = "None" + None RayServiceUpgradeType = "None" ) // These statuses should match Ray Serve's application statuses @@ -58,9 +58,9 @@ var DeploymentStatusEnum = struct { UNHEALTHY: "UNHEALTHY", } -type RayServiceUpgradeSpec struct { +type RayServiceUpgradeStrategy struct { // Type represents the strategy used when upgrading the RayService. Currently supports `NewCluster` and `None`. - Type *RayServiceUpgradeStrategy `json:"type,omitempty"` + Type *RayServiceUpgradeType `json:"type,omitempty"` } // RayServiceSpec defines the desired state of RayService @@ -71,8 +71,8 @@ type RayServiceSpec struct { DeploymentUnhealthySecondThreshold *int32 `json:"deploymentUnhealthySecondThreshold,omitempty"` // ServeService is the Kubernetes service for head node and worker nodes who have healthy http proxy to serve traffics. ServeService *corev1.Service `json:"serveService,omitempty"` - // UpgradeSpec defines the scaling policy used when upgrading the RayService. - UpgradeSpec *RayServiceUpgradeSpec `json:"upgradeSpec,omitempty"` + // UpgradeStrategy defines the scaling policy used when upgrading the RayService. + UpgradeStrategy *RayServiceUpgradeStrategy `json:"upgradeStrategy,omitempty"` // Important: Run "make" to regenerate code after modifying this file // Defines the applications and deployments to deploy, should be a YAML multi-line scalar string. ServeConfigV2 string `json:"serveConfigV2,omitempty"` diff --git a/ray-operator/apis/ray/v1/zz_generated.deepcopy.go b/ray-operator/apis/ray/v1/zz_generated.deepcopy.go index 722a7d2837..9c864364cd 100644 --- a/ray-operator/apis/ray/v1/zz_generated.deepcopy.go +++ b/ray-operator/apis/ray/v1/zz_generated.deepcopy.go @@ -533,9 +533,9 @@ func (in *RayServiceSpec) DeepCopyInto(out *RayServiceSpec) { *out = new(corev1.Service) (*in).DeepCopyInto(*out) } - if in.UpgradeSpec != nil { - in, out := &in.UpgradeSpec, &out.UpgradeSpec - *out = new(RayServiceUpgradeSpec) + if in.UpgradeStrategy != nil { + in, out := &in.UpgradeStrategy, &out.UpgradeStrategy + *out = new(RayServiceUpgradeStrategy) (*in).DeepCopyInto(*out) } in.RayClusterSpec.DeepCopyInto(&out.RayClusterSpec) @@ -596,21 +596,21 @@ func (in *RayServiceStatuses) DeepCopy() *RayServiceStatuses { } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *RayServiceUpgradeSpec) DeepCopyInto(out *RayServiceUpgradeSpec) { +func (in *RayServiceUpgradeStrategy) DeepCopyInto(out *RayServiceUpgradeStrategy) { *out = *in if in.Type != nil { in, out := &in.Type, &out.Type - *out = new(RayServiceUpgradeStrategy) + *out = new(RayServiceUpgradeType) **out = **in } } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RayServiceUpgradeSpec. -func (in *RayServiceUpgradeSpec) DeepCopy() *RayServiceUpgradeSpec { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RayServiceUpgradeStrategy. +func (in *RayServiceUpgradeStrategy) DeepCopy() *RayServiceUpgradeStrategy { if in == nil { return nil } - out := new(RayServiceUpgradeSpec) + out := new(RayServiceUpgradeStrategy) in.DeepCopyInto(out) return out } diff --git a/ray-operator/config/crd/bases/ray.io_rayservices.yaml b/ray-operator/config/crd/bases/ray.io_rayservices.yaml index f4f9687425..9d91430966 100644 --- a/ray-operator/config/crd/bases/ray.io_rayservices.yaml +++ b/ray-operator/config/crd/bases/ray.io_rayservices.yaml @@ -7943,7 +7943,7 @@ spec: serviceUnhealthySecondThreshold: format: int32 type: integer - upgradeSpec: + upgradeStrategy: properties: type: type: string diff --git a/ray-operator/pkg/client/applyconfiguration/ray/v1/rayservicespec.go b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayservicespec.go index 962b3c9825..f51872bdf7 100644 --- a/ray-operator/pkg/client/applyconfiguration/ray/v1/rayservicespec.go +++ b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayservicespec.go @@ -9,13 +9,13 @@ import ( // RayServiceSpecApplyConfiguration represents an declarative configuration of the RayServiceSpec type for use // with apply. type RayServiceSpecApplyConfiguration struct { - ServiceUnhealthySecondThreshold *int32 `json:"serviceUnhealthySecondThreshold,omitempty"` - DeploymentUnhealthySecondThreshold *int32 `json:"deploymentUnhealthySecondThreshold,omitempty"` - ServeService *v1.Service `json:"serveService,omitempty"` - UpgradeSpec *RayServiceUpgradeSpecApplyConfiguration `json:"upgradeSpec,omitempty"` - ServeConfigV2 *string `json:"serveConfigV2,omitempty"` - RayClusterSpec *RayClusterSpecApplyConfiguration `json:"rayClusterConfig,omitempty"` - ExcludeHeadPodFromServeSvc *bool `json:"excludeHeadPodFromServeSvc,omitempty"` + ServiceUnhealthySecondThreshold *int32 `json:"serviceUnhealthySecondThreshold,omitempty"` + DeploymentUnhealthySecondThreshold *int32 `json:"deploymentUnhealthySecondThreshold,omitempty"` + ServeService *v1.Service `json:"serveService,omitempty"` + UpgradeStrategy *RayServiceUpgradeStrategyApplyConfiguration `json:"upgradeStrategy,omitempty"` + ServeConfigV2 *string `json:"serveConfigV2,omitempty"` + RayClusterSpec *RayClusterSpecApplyConfiguration `json:"rayClusterConfig,omitempty"` + ExcludeHeadPodFromServeSvc *bool `json:"excludeHeadPodFromServeSvc,omitempty"` } // RayServiceSpecApplyConfiguration constructs an declarative configuration of the RayServiceSpec type for use with @@ -48,11 +48,11 @@ func (b *RayServiceSpecApplyConfiguration) WithServeService(value v1.Service) *R return b } -// WithUpgradeSpec sets the UpgradeSpec field in the declarative configuration to the given value +// WithUpgradeStrategy sets the UpgradeStrategy field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the UpgradeSpec field is set to the value of the last call. -func (b *RayServiceSpecApplyConfiguration) WithUpgradeSpec(value *RayServiceUpgradeSpecApplyConfiguration) *RayServiceSpecApplyConfiguration { - b.UpgradeSpec = value +// If called multiple times, the UpgradeStrategy field is set to the value of the last call. +func (b *RayServiceSpecApplyConfiguration) WithUpgradeStrategy(value *RayServiceUpgradeStrategyApplyConfiguration) *RayServiceSpecApplyConfiguration { + b.UpgradeStrategy = value return b } diff --git a/ray-operator/pkg/client/applyconfiguration/ray/v1/rayserviceupgradestrategy.go b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayserviceupgradestrategy.go new file mode 100644 index 0000000000..ecf111103e --- /dev/null +++ b/ray-operator/pkg/client/applyconfiguration/ray/v1/rayserviceupgradestrategy.go @@ -0,0 +1,27 @@ +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1 + +import ( + v1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" +) + +// RayServiceUpgradeStrategyApplyConfiguration represents an declarative configuration of the RayServiceUpgradeStrategy type for use +// with apply. +type RayServiceUpgradeStrategyApplyConfiguration struct { + Type *v1.RayServiceUpgradeType `json:"type,omitempty"` +} + +// RayServiceUpgradeStrategyApplyConfiguration constructs an declarative configuration of the RayServiceUpgradeStrategy type for use with +// apply. +func RayServiceUpgradeStrategy() *RayServiceUpgradeStrategyApplyConfiguration { + return &RayServiceUpgradeStrategyApplyConfiguration{} +} + +// WithType sets the Type field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Type field is set to the value of the last call. +func (b *RayServiceUpgradeStrategyApplyConfiguration) WithType(value v1.RayServiceUpgradeType) *RayServiceUpgradeStrategyApplyConfiguration { + b.Type = &value + return b +} diff --git a/ray-operator/pkg/client/applyconfiguration/utils.go b/ray-operator/pkg/client/applyconfiguration/utils.go index 4f4e4ca51e..8be558f040 100644 --- a/ray-operator/pkg/client/applyconfiguration/utils.go +++ b/ray-operator/pkg/client/applyconfiguration/utils.go @@ -41,8 +41,8 @@ func ForKind(kind schema.GroupVersionKind) interface{} { return &rayv1.RayServiceStatusApplyConfiguration{} case v1.SchemeGroupVersion.WithKind("RayServiceStatuses"): return &rayv1.RayServiceStatusesApplyConfiguration{} - case v1.SchemeGroupVersion.WithKind("RayServiceUpgradeSpec"): - return &rayv1.RayServiceUpgradeSpecApplyConfiguration{} + case v1.SchemeGroupVersion.WithKind("RayServiceUpgradeStrategy"): + return &rayv1.RayServiceUpgradeStrategyApplyConfiguration{} case v1.SchemeGroupVersion.WithKind("ScaleStrategy"): return &rayv1.ScaleStrategyApplyConfiguration{} case v1.SchemeGroupVersion.WithKind("ServeDeploymentStatus"): From 2a0cf6e1b2e63fa97f309e049686aea16445a30b Mon Sep 17 00:00:00 2001 From: ryanaoleary Date: Thu, 2 Jan 2025 18:52:49 +0000 Subject: [PATCH 3/3] Fix CI error Signed-off-by: ryanaoleary --- .../controllers/ray/rayservice_controller.go | 18 ++--- .../ray/rayservice_controller_unit_test.go | 72 +++++++++---------- 2 files changed, 45 insertions(+), 45 deletions(-) diff --git a/ray-operator/controllers/ray/rayservice_controller.go b/ray-operator/controllers/ray/rayservice_controller.go index 36eebcc7b2..3583f65e17 100644 --- a/ray-operator/controllers/ray/rayservice_controller.go +++ b/ray-operator/controllers/ray/rayservice_controller.go @@ -247,12 +247,12 @@ func validateRayServiceSpec(rayService *rayv1.RayService) error { if headSvc := rayService.Spec.RayClusterSpec.HeadGroupSpec.HeadService; headSvc != nil && headSvc.Name != "" { return fmt.Errorf("spec.rayClusterConfig.headGroupSpec.headService.metadata.name should not be set") } - if rayService.Spec.UpgradeSpec == nil { + if rayService.Spec.UpgradeStrategy == nil { return nil } - if upgradeType := rayService.Spec.UpgradeSpec.Type; upgradeType != nil { + if upgradeType := rayService.Spec.UpgradeStrategy.Type; upgradeType != nil { if *upgradeType != rayv1.NewCluster && *upgradeType != rayv1.None { - return fmt.Errorf("Spec.UpgradeSpec.Type value %s is invalid, valid options are %s or %s", *upgradeType, rayv1.NewCluster, rayv1.None) + return fmt.Errorf("Spec.UpgradeStrategy.Type value %s is invalid, valid options are %s or %s", *upgradeType, rayv1.NewCluster, rayv1.None) } } return nil @@ -432,13 +432,13 @@ func (r *RayServiceReconciler) reconcileRayCluster(ctx context.Context, rayServi // For LLM serving, some users might not have sufficient GPU resources to run two RayClusters simultaneously. // Therefore, KubeRay offers ENABLE_ZERO_DOWNTIME as a feature flag for zero-downtime upgrades. zeroDowntimeEnvVar := os.Getenv(ENABLE_ZERO_DOWNTIME) - var rayServiceSpecUpgradeType *rayv1.RayServiceUpgradeStrategy - if rayServiceInstance.Spec.UpgradeSpec != nil { - rayServiceSpecUpgradeType = rayServiceInstance.Spec.UpgradeSpec.Type + var rayServiceSpecUpgradeType *rayv1.RayServiceUpgradeType + if rayServiceInstance.Spec.UpgradeStrategy != nil { + rayServiceSpecUpgradeType = rayServiceInstance.Spec.UpgradeStrategy.Type } - // There are two ways to enable zero downtime upgrade. Through ENABLE_ZERO_DOWNTIME env var or setting Spec.UpgradeSpec.Type. + // There are two ways to enable zero downtime upgrade. Through ENABLE_ZERO_DOWNTIME env var or setting Spec.UpgradeStrategy.Type. // If no fields are set, zero downtime upgrade by default is enabled. - // Spec.UpgradeSpec.Type takes precedence over ENABLE_ZERO_DOWNTIME. + // Spec.UpgradeStrategy.Type takes precedence over ENABLE_ZERO_DOWNTIME. enableZeroDowntime := true typeMessage := "" if zeroDowntimeEnvVar != "" { @@ -452,7 +452,7 @@ func (r *RayServiceReconciler) reconcileRayCluster(ctx context.Context, rayServi if enableZeroDowntime || !enableZeroDowntime && activeRayCluster == nil { if enableZeroDowntime && activeRayCluster != nil { - r.Recorder.Event(rayServiceInstance, "Normal", "UpgradeSpec.Type", typeMessage) + r.Recorder.Event(rayServiceInstance, "Normal", "UpgradeStrategy.Type", typeMessage) } // Add a pending cluster name. In the next reconcile loop, shouldPrepareNewRayCluster will return DoNothing and we will // actually create the pending RayCluster instance. diff --git a/ray-operator/controllers/ray/rayservice_controller_unit_test.go b/ray-operator/controllers/ray/rayservice_controller_unit_test.go index 1f870d239f..c97f96f247 100644 --- a/ray-operator/controllers/ray/rayservice_controller_unit_test.go +++ b/ray-operator/controllers/ray/rayservice_controller_unit_test.go @@ -47,10 +47,10 @@ func TestValidateRayServiceSpec(t *testing.T) { }) assert.NoError(t, err, "The RayService spec is valid.") - var upgradeStrat rayv1.RayServiceUpgradeStrategy = "invalidStrategy" + var upgradeStrat rayv1.RayServiceUpgradeType = "invalidStrategy" err = validateRayServiceSpec(&rayv1.RayService{ Spec: rayv1.RayServiceSpec{ - UpgradeSpec: &rayv1.RayServiceUpgradeSpec{ + UpgradeStrategy: &rayv1.RayServiceUpgradeStrategy{ Type: &upgradeStrat, }, }, @@ -773,13 +773,13 @@ func TestReconcileRayCluster(t *testing.T) { } tests := map[string]struct { - activeCluster *rayv1.RayCluster - rayServiceUpgradeStrategy rayv1.RayServiceUpgradeStrategy - kubeRayVersion string - updateRayClusterSpec bool - enableZeroDowntime bool - shouldPrepareNewCluster bool - updateKubeRayVersion bool + activeCluster *rayv1.RayCluster + rayServiceUpgradeType rayv1.RayServiceUpgradeType + kubeRayVersion string + updateRayClusterSpec bool + enableZeroDowntime bool + shouldPrepareNewCluster bool + updateKubeRayVersion bool }{ // Test 1: Neither active nor pending clusters exist. The `markRestart` function will be called, so the `PendingServiceStatus.RayClusterName` should be set. "Zero-downtime upgrade is enabled. Neither active nor pending clusters exist.": { @@ -829,42 +829,42 @@ func TestReconcileRayCluster(t *testing.T) { }, // Test 7: Zero downtime upgrade is enabled, but is enabled through the RayServiceSpec "Zero-downtime upgrade enabled. The active cluster exist. Zero-downtime upgrade is triggered through RayServiceSpec.": { - activeCluster: activeCluster.DeepCopy(), - updateRayClusterSpec: true, - enableZeroDowntime: true, - shouldPrepareNewCluster: true, - rayServiceUpgradeStrategy: rayv1.NewCluster, + activeCluster: activeCluster.DeepCopy(), + updateRayClusterSpec: true, + enableZeroDowntime: true, + shouldPrepareNewCluster: true, + rayServiceUpgradeType: rayv1.NewCluster, }, // Test 8: Zero downtime upgrade is enabled. Env var is set to false but RayServiceSpec is set to NewCluster. Trigger the zero-downtime upgrade. "Zero-downtime upgrade is enabled through RayServiceSpec and not through env var. Active cluster exist. Trigger the zero-downtime upgrade.": { - activeCluster: activeCluster.DeepCopy(), - updateRayClusterSpec: true, - enableZeroDowntime: false, - shouldPrepareNewCluster: true, - rayServiceUpgradeStrategy: rayv1.NewCluster, + activeCluster: activeCluster.DeepCopy(), + updateRayClusterSpec: true, + enableZeroDowntime: false, + shouldPrepareNewCluster: true, + rayServiceUpgradeType: rayv1.NewCluster, }, // Test 9: Zero downtime upgrade is disabled. Env var is set to true but RayServiceSpec is set to None. "Zero-downtime upgrade is disabled. Env var is set to true but RayServiceSpec is set to None.": { - activeCluster: activeCluster.DeepCopy(), - updateRayClusterSpec: true, - enableZeroDowntime: true, - shouldPrepareNewCluster: false, - rayServiceUpgradeStrategy: rayv1.None, + activeCluster: activeCluster.DeepCopy(), + updateRayClusterSpec: true, + enableZeroDowntime: true, + shouldPrepareNewCluster: false, + rayServiceUpgradeType: rayv1.None, }, // Test 10: Zero downtime upgrade is enabled. Neither the env var nor the RayServiceSpec is set. Trigger the zero-downtime upgrade. "Zero-downtime upgrade is enabled. Neither the env var nor the RayServiceSpec is set.": { - activeCluster: nil, - updateRayClusterSpec: true, - shouldPrepareNewCluster: true, - rayServiceUpgradeStrategy: "", + activeCluster: nil, + updateRayClusterSpec: true, + shouldPrepareNewCluster: true, + rayServiceUpgradeType: "", }, // Test 11: Zero downtime upgrade is disabled. Both the env var and the RayServiceSpec is set to disable zero-downtime upgrade. "Zero-downtime upgrade is disabled by both env var and RayServiceSpec.": { - activeCluster: activeCluster.DeepCopy(), - updateRayClusterSpec: true, - enableZeroDowntime: false, - shouldPrepareNewCluster: false, - rayServiceUpgradeStrategy: rayv1.None, + activeCluster: activeCluster.DeepCopy(), + updateRayClusterSpec: true, + enableZeroDowntime: false, + shouldPrepareNewCluster: false, + rayServiceUpgradeType: rayv1.None, }, } @@ -890,9 +890,9 @@ func TestReconcileRayCluster(t *testing.T) { Recorder: record.NewFakeRecorder(1), } service := rayService.DeepCopy() - service.Spec.UpgradeSpec = &rayv1.RayServiceUpgradeSpec{} - if tc.rayServiceUpgradeStrategy != "" { - service.Spec.UpgradeSpec.Type = &tc.rayServiceUpgradeStrategy + service.Spec.UpgradeStrategy = &rayv1.RayServiceUpgradeStrategy{} + if tc.rayServiceUpgradeType != "" { + service.Spec.UpgradeStrategy.Type = &tc.rayServiceUpgradeType } if tc.updateRayClusterSpec { service.Spec.RayClusterSpec.RayVersion = "new-version"