Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
kevin85421 committed Oct 11, 2023
1 parent 18cbc7e commit 6123133
Show file tree
Hide file tree
Showing 14 changed files with 25,582 additions and 0 deletions.
20 changes: 20 additions & 0 deletions ray-operator/apis/ray/v1/groupversion_info.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Package v1 contains API Schema definitions for the ray v1 API group
// +kubebuilder:object:generate=true
// +groupName=ray.io
package v1

import (
"k8s.io/apimachinery/pkg/runtime/schema"
"sigs.k8s.io/controller-runtime/pkg/scheme"
)

var (
// GroupVersion is group version used to register these objects
GroupVersion = schema.GroupVersion{Group: "ray.io", Version: "v1"}

// SchemeBuilder is used to add go types to the GroupVersionKind scheme
SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}

// AddToScheme adds the types in this group-version to the given scheme.
AddToScheme = SchemeBuilder.AddToScheme
)
197 changes: 197 additions & 0 deletions ray-operator/apis/ray/v1/raycluster_types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
package v1

import (
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN!
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.

// RayClusterSpec defines the desired state of RayCluster
type RayClusterSpec struct {
// INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
// Important: Run "make" to regenerate code after modifying this file
// HeadGroupSpecs are the spec for the head pod
HeadGroupSpec HeadGroupSpec `json:"headGroupSpec"`
// WorkerGroupSpecs are the specs for the worker pods
WorkerGroupSpecs []WorkerGroupSpec `json:"workerGroupSpecs,omitempty"`
// RayVersion is used to determine the command for the Kubernetes Job managed by RayJob
RayVersion string `json:"rayVersion,omitempty"`
// EnableInTreeAutoscaling indicates whether operator should create in tree autoscaling configs
EnableInTreeAutoscaling *bool `json:"enableInTreeAutoscaling,omitempty"`
// AutoscalerOptions specifies optional configuration for the Ray autoscaler.
AutoscalerOptions *AutoscalerOptions `json:"autoscalerOptions,omitempty"`
HeadServiceAnnotations map[string]string `json:"headServiceAnnotations,omitempty"`
}

// HeadGroupSpec are the spec for the head pod
type HeadGroupSpec struct {
// ServiceType is Kubernetes service type of the head service. it will be used by the workers to connect to the head pod
ServiceType v1.ServiceType `json:"serviceType,omitempty"`
// HeadService is the Kubernetes service of the head pod.
HeadService *v1.Service `json:"headService,omitempty"`
// EnableIngress indicates whether operator should create ingress object for head service or not.
EnableIngress *bool `json:"enableIngress,omitempty"`
// HeadGroupSpec.Replicas is deprecated and ignored; there can only be one head pod per Ray cluster.
Replicas *int32 `json:"replicas,omitempty"`
// RayStartParams are the params of the start command: node-manager-port, object-store-memory, ...
RayStartParams map[string]string `json:"rayStartParams"`
// Template is the exact pod template used in K8s depoyments, statefulsets, etc.
Template v1.PodTemplateSpec `json:"template"`
}

// WorkerGroupSpec are the specs for the worker pods
type WorkerGroupSpec struct {
// we can have multiple worker groups, we distinguish them by name
GroupName string `json:"groupName"`
// Replicas is the number of desired Pods for this worker group. See https://github.com/ray-project/kuberay/pull/1443 for more details about the reason for making this field optional.
// +kubebuilder:default:=0
Replicas *int32 `json:"replicas,omitempty"`
// MinReplicas denotes the minimum number of desired Pods for this worker group.
// +kubebuilder:default:=0
MinReplicas *int32 `json:"minReplicas"`
// MaxReplicas denotes the maximum number of desired Pods for this worker group, and the default value is maxInt32.
// +kubebuilder:default:=2147483647
MaxReplicas *int32 `json:"maxReplicas"`
// RayStartParams are the params of the start command: address, object-store-memory, ...
RayStartParams map[string]string `json:"rayStartParams"`
// Template is a pod template for the worker
Template v1.PodTemplateSpec `json:"template"`
// ScaleStrategy defines which pods to remove
ScaleStrategy ScaleStrategy `json:"scaleStrategy,omitempty"`
}

// ScaleStrategy to remove workers
type ScaleStrategy struct {
// WorkersToDelete workers to be deleted
WorkersToDelete []string `json:"workersToDelete,omitempty"`
}

// AutoscalerOptions specifies optional configuration for the Ray autoscaler.
type AutoscalerOptions struct {
// Resources specifies optional resource request and limit overrides for the autoscaler container.
// Default values: 500m CPU request and limit. 512Mi memory request and limit.
Resources *v1.ResourceRequirements `json:"resources,omitempty"`
// Image optionally overrides the autoscaler's container image. This override is for provided for autoscaler testing and development.
Image *string `json:"image,omitempty"`
// ImagePullPolicy optionally overrides the autoscaler container's image pull policy. This override is for provided for autoscaler testing and development.
ImagePullPolicy *v1.PullPolicy `json:"imagePullPolicy,omitempty"`
// Optional list of environment variables to set in the autoscaler container.
Env []v1.EnvVar `json:"env,omitempty"`
// Optional list of sources to populate environment variables in the autoscaler container.
EnvFrom []v1.EnvFromSource `json:"envFrom,omitempty"`
// Optional list of volumeMounts. This is needed for enabling TLS for the autoscaler container.
VolumeMounts []v1.VolumeMount `json:"volumeMounts,omitempty"`
// SecurityContext defines the security options the container should be run with.
// If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext.
// More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/
SecurityContext *v1.SecurityContext `json:"securityContext,omitempty"`
// IdleTimeoutSeconds is the number of seconds to wait before scaling down a worker pod which is not using Ray resources.
// Defaults to 60 (one minute). It is not read by the KubeRay operator but by the Ray autoscaler.
IdleTimeoutSeconds *int32 `json:"idleTimeoutSeconds,omitempty"`
// UpscalingMode is "Conservative", "Default", or "Aggressive."
// Conservative: Upscaling is rate-limited; the number of pending worker pods is at most the size of the Ray cluster.
// Default: Upscaling is not rate-limited.
// Aggressive: An alias for Default; upscaling is not rate-limited.
// It is not read by the KubeRay operator but by the Ray autoscaler.
UpscalingMode *UpscalingMode `json:"upscalingMode,omitempty"`
}

// +kubebuilder:validation:Enum=Default;Aggressive;Conservative
type UpscalingMode string

// The overall state of the Ray cluster.
type ClusterState string

const (
Ready ClusterState = "ready"
Unhealthy ClusterState = "unhealthy"
Failed ClusterState = "failed"
)

// RayClusterStatus defines the observed state of RayCluster
type RayClusterStatus struct {
// INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
// Important: Run "make" to regenerate code after modifying this file
// Status reflects the status of the cluster
State ClusterState `json:"state,omitempty"`
// AvailableWorkerReplicas indicates how many replicas are available in the cluster
AvailableWorkerReplicas int32 `json:"availableWorkerReplicas,omitempty"`
// DesiredWorkerReplicas indicates overall desired replicas claimed by the user at the cluster level.
DesiredWorkerReplicas int32 `json:"desiredWorkerReplicas,omitempty"`
// MinWorkerReplicas indicates sum of minimum replicas of each node group.
MinWorkerReplicas int32 `json:"minWorkerReplicas,omitempty"`
// MaxWorkerReplicas indicates sum of maximum replicas of each node group.
MaxWorkerReplicas int32 `json:"maxWorkerReplicas,omitempty"`
// LastUpdateTime indicates last update timestamp for this cluster status.
// +nullable
LastUpdateTime *metav1.Time `json:"lastUpdateTime,omitempty"`
// Service Endpoints
Endpoints map[string]string `json:"endpoints,omitempty"`
// Head info
Head HeadInfo `json:"head,omitempty"`
// Reason provides more information about current State
Reason string `json:"reason,omitempty"`
// observedGeneration is the most recent generation observed for this RayCluster. It corresponds to the
// RayCluster's generation, which is updated on mutation by the API Server.
// +optional
ObservedGeneration int64 `json:"observedGeneration,omitempty"`
}

// HeadInfo gives info about head
type HeadInfo struct {
PodIP string `json:"podIP,omitempty"`
ServiceIP string `json:"serviceIP,omitempty"`
}

// RayNodeType the type of a ray node: head/worker
type RayNodeType string

const (
HeadNode RayNodeType = "head"
WorkerNode RayNodeType = "worker"
// RedisCleanupNode is a Pod managed by a Kubernetes Job that cleans up Redis data after
// a RayCluster with GCS fault tolerance enabled is deleted.
RedisCleanupNode RayNodeType = "redis-cleanup"
)

// RayCluster is the Schema for the RayClusters API
// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
// +kubebuilder:printcolumn:name="desired workers",type=integer,JSONPath=".status.desiredWorkerReplicas",priority=0
// +kubebuilder:printcolumn:name="available workers",type=integer,JSONPath=".status.availableWorkerReplicas",priority=0
// +kubebuilder:printcolumn:name="status",type="string",JSONPath=".status.state",priority=0
// +kubebuilder:printcolumn:name="age",type="date",JSONPath=".metadata.creationTimestamp",priority=0
// +kubebuilder:printcolumn:name="head pod IP",type="string",JSONPath=".status.head.podIP",priority=1
// +kubebuilder:printcolumn:name="head service IP",type="string",JSONPath=".status.head.serviceIP",priority=1
// +genclient
type RayCluster struct {
// Standard object metadata.
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`

// Specification of the desired behavior of the RayCluster.
Spec RayClusterSpec `json:"spec,omitempty"`
Status RayClusterStatus `json:"status,omitempty"`
}

//+kubebuilder:object:root=true

// RayClusterList contains a list of RayCluster
type RayClusterList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
Items []RayCluster `json:"items"`
}

func init() {
SchemeBuilder.Register(&RayCluster{}, &RayClusterList{})
}

type EventReason string

const (
RayConfigError EventReason = "RayConfigError"
PodReconciliationError EventReason = "PodReconciliationError"
)
107 changes: 107 additions & 0 deletions ray-operator/apis/ray/v1/raycluster_types_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
package v1

import (
"encoding/json"
"testing"

corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/utils/pointer"
)

var myRayCluster = &RayCluster{
ObjectMeta: metav1.ObjectMeta{
Name: "raycluster-sample",
Namespace: "default",
},
Spec: RayClusterSpec{
HeadGroupSpec: HeadGroupSpec{
Replicas: pointer.Int32Ptr(1),
RayStartParams: map[string]string{
"port": "6379",
"object-manager-port": "12345",
"node-manager-port": "12346",
"object-store-memory": "100000000",
"num-cpus": "1",
"dashboard-agent-listen-port": "52365",
},
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Namespace: "default",
Labels: map[string]string{
"groupName": "headgroup",
},
},
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "ray-head",
Image: "rayproject/autoscaler",
Command: []string{"python"},
Args: []string{"/opt/code.py"},
Env: []corev1.EnvVar{
{
Name: "MY_POD_IP",
ValueFrom: &corev1.EnvVarSource{
FieldRef: &corev1.ObjectFieldSelector{
FieldPath: "status.podIP",
},
},
},
},
},
},
},
},
},
WorkerGroupSpecs: []WorkerGroupSpec{
{
Replicas: pointer.Int32Ptr(3),
MinReplicas: pointer.Int32Ptr(0),
MaxReplicas: pointer.Int32Ptr(10000),
GroupName: "small-group",
RayStartParams: map[string]string{
"port": "6379",
"num-cpus": "1",
"dashboard-agent-listen-port": "52365",
},
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Namespace: "default",
Labels: map[string]string{
"groupName": "small-group",
},
},
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "ray-worker",
Image: "rayproject/autoscaler",
Command: []string{"echo"},
Args: []string{"Hello Ray"},
Env: []corev1.EnvVar{
{
Name: "MY_POD_IP",
ValueFrom: &corev1.EnvVarSource{
FieldRef: &corev1.ObjectFieldSelector{
FieldPath: "status.podIP",
},
},
},
},
},
},
},
},
},
},
},
}

func TestMarshalling(t *testing.T) {
// marshal successfully
_, err := json.Marshal(&myRayCluster)
if err != nil {
t.Fatalf("Expected `%v` but got `%v`", nil, err)
}
}
Loading

0 comments on commit 6123133

Please sign in to comment.