-
Notifications
You must be signed in to change notification settings - Fork 442
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
18cbc7e
commit 6123133
Showing
14 changed files
with
25,582 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
// Package v1 contains API Schema definitions for the ray v1 API group | ||
// +kubebuilder:object:generate=true | ||
// +groupName=ray.io | ||
package v1 | ||
|
||
import ( | ||
"k8s.io/apimachinery/pkg/runtime/schema" | ||
"sigs.k8s.io/controller-runtime/pkg/scheme" | ||
) | ||
|
||
var ( | ||
// GroupVersion is group version used to register these objects | ||
GroupVersion = schema.GroupVersion{Group: "ray.io", Version: "v1"} | ||
|
||
// SchemeBuilder is used to add go types to the GroupVersionKind scheme | ||
SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} | ||
|
||
// AddToScheme adds the types in this group-version to the given scheme. | ||
AddToScheme = SchemeBuilder.AddToScheme | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,197 @@ | ||
package v1 | ||
|
||
import ( | ||
v1 "k8s.io/api/core/v1" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
) | ||
|
||
// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! | ||
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. | ||
|
||
// RayClusterSpec defines the desired state of RayCluster | ||
type RayClusterSpec struct { | ||
// INSERT ADDITIONAL SPEC FIELDS - desired state of cluster | ||
// Important: Run "make" to regenerate code after modifying this file | ||
// HeadGroupSpecs are the spec for the head pod | ||
HeadGroupSpec HeadGroupSpec `json:"headGroupSpec"` | ||
// WorkerGroupSpecs are the specs for the worker pods | ||
WorkerGroupSpecs []WorkerGroupSpec `json:"workerGroupSpecs,omitempty"` | ||
// RayVersion is used to determine the command for the Kubernetes Job managed by RayJob | ||
RayVersion string `json:"rayVersion,omitempty"` | ||
// EnableInTreeAutoscaling indicates whether operator should create in tree autoscaling configs | ||
EnableInTreeAutoscaling *bool `json:"enableInTreeAutoscaling,omitempty"` | ||
// AutoscalerOptions specifies optional configuration for the Ray autoscaler. | ||
AutoscalerOptions *AutoscalerOptions `json:"autoscalerOptions,omitempty"` | ||
HeadServiceAnnotations map[string]string `json:"headServiceAnnotations,omitempty"` | ||
} | ||
|
||
// HeadGroupSpec are the spec for the head pod | ||
type HeadGroupSpec struct { | ||
// ServiceType is Kubernetes service type of the head service. it will be used by the workers to connect to the head pod | ||
ServiceType v1.ServiceType `json:"serviceType,omitempty"` | ||
// HeadService is the Kubernetes service of the head pod. | ||
HeadService *v1.Service `json:"headService,omitempty"` | ||
// EnableIngress indicates whether operator should create ingress object for head service or not. | ||
EnableIngress *bool `json:"enableIngress,omitempty"` | ||
// HeadGroupSpec.Replicas is deprecated and ignored; there can only be one head pod per Ray cluster. | ||
Replicas *int32 `json:"replicas,omitempty"` | ||
// RayStartParams are the params of the start command: node-manager-port, object-store-memory, ... | ||
RayStartParams map[string]string `json:"rayStartParams"` | ||
// Template is the exact pod template used in K8s depoyments, statefulsets, etc. | ||
Template v1.PodTemplateSpec `json:"template"` | ||
} | ||
|
||
// WorkerGroupSpec are the specs for the worker pods | ||
type WorkerGroupSpec struct { | ||
// we can have multiple worker groups, we distinguish them by name | ||
GroupName string `json:"groupName"` | ||
// Replicas is the number of desired Pods for this worker group. See https://github.com/ray-project/kuberay/pull/1443 for more details about the reason for making this field optional. | ||
// +kubebuilder:default:=0 | ||
Replicas *int32 `json:"replicas,omitempty"` | ||
// MinReplicas denotes the minimum number of desired Pods for this worker group. | ||
// +kubebuilder:default:=0 | ||
MinReplicas *int32 `json:"minReplicas"` | ||
// MaxReplicas denotes the maximum number of desired Pods for this worker group, and the default value is maxInt32. | ||
// +kubebuilder:default:=2147483647 | ||
MaxReplicas *int32 `json:"maxReplicas"` | ||
// RayStartParams are the params of the start command: address, object-store-memory, ... | ||
RayStartParams map[string]string `json:"rayStartParams"` | ||
// Template is a pod template for the worker | ||
Template v1.PodTemplateSpec `json:"template"` | ||
// ScaleStrategy defines which pods to remove | ||
ScaleStrategy ScaleStrategy `json:"scaleStrategy,omitempty"` | ||
} | ||
|
||
// ScaleStrategy to remove workers | ||
type ScaleStrategy struct { | ||
// WorkersToDelete workers to be deleted | ||
WorkersToDelete []string `json:"workersToDelete,omitempty"` | ||
} | ||
|
||
// AutoscalerOptions specifies optional configuration for the Ray autoscaler. | ||
type AutoscalerOptions struct { | ||
// Resources specifies optional resource request and limit overrides for the autoscaler container. | ||
// Default values: 500m CPU request and limit. 512Mi memory request and limit. | ||
Resources *v1.ResourceRequirements `json:"resources,omitempty"` | ||
// Image optionally overrides the autoscaler's container image. This override is for provided for autoscaler testing and development. | ||
Image *string `json:"image,omitempty"` | ||
// ImagePullPolicy optionally overrides the autoscaler container's image pull policy. This override is for provided for autoscaler testing and development. | ||
ImagePullPolicy *v1.PullPolicy `json:"imagePullPolicy,omitempty"` | ||
// Optional list of environment variables to set in the autoscaler container. | ||
Env []v1.EnvVar `json:"env,omitempty"` | ||
// Optional list of sources to populate environment variables in the autoscaler container. | ||
EnvFrom []v1.EnvFromSource `json:"envFrom,omitempty"` | ||
// Optional list of volumeMounts. This is needed for enabling TLS for the autoscaler container. | ||
VolumeMounts []v1.VolumeMount `json:"volumeMounts,omitempty"` | ||
// SecurityContext defines the security options the container should be run with. | ||
// If set, the fields of SecurityContext override the equivalent fields of PodSecurityContext. | ||
// More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ | ||
SecurityContext *v1.SecurityContext `json:"securityContext,omitempty"` | ||
// IdleTimeoutSeconds is the number of seconds to wait before scaling down a worker pod which is not using Ray resources. | ||
// Defaults to 60 (one minute). It is not read by the KubeRay operator but by the Ray autoscaler. | ||
IdleTimeoutSeconds *int32 `json:"idleTimeoutSeconds,omitempty"` | ||
// UpscalingMode is "Conservative", "Default", or "Aggressive." | ||
// Conservative: Upscaling is rate-limited; the number of pending worker pods is at most the size of the Ray cluster. | ||
// Default: Upscaling is not rate-limited. | ||
// Aggressive: An alias for Default; upscaling is not rate-limited. | ||
// It is not read by the KubeRay operator but by the Ray autoscaler. | ||
UpscalingMode *UpscalingMode `json:"upscalingMode,omitempty"` | ||
} | ||
|
||
// +kubebuilder:validation:Enum=Default;Aggressive;Conservative | ||
type UpscalingMode string | ||
|
||
// The overall state of the Ray cluster. | ||
type ClusterState string | ||
|
||
const ( | ||
Ready ClusterState = "ready" | ||
Unhealthy ClusterState = "unhealthy" | ||
Failed ClusterState = "failed" | ||
) | ||
|
||
// RayClusterStatus defines the observed state of RayCluster | ||
type RayClusterStatus struct { | ||
// INSERT ADDITIONAL STATUS FIELD - define observed state of cluster | ||
// Important: Run "make" to regenerate code after modifying this file | ||
// Status reflects the status of the cluster | ||
State ClusterState `json:"state,omitempty"` | ||
// AvailableWorkerReplicas indicates how many replicas are available in the cluster | ||
AvailableWorkerReplicas int32 `json:"availableWorkerReplicas,omitempty"` | ||
// DesiredWorkerReplicas indicates overall desired replicas claimed by the user at the cluster level. | ||
DesiredWorkerReplicas int32 `json:"desiredWorkerReplicas,omitempty"` | ||
// MinWorkerReplicas indicates sum of minimum replicas of each node group. | ||
MinWorkerReplicas int32 `json:"minWorkerReplicas,omitempty"` | ||
// MaxWorkerReplicas indicates sum of maximum replicas of each node group. | ||
MaxWorkerReplicas int32 `json:"maxWorkerReplicas,omitempty"` | ||
// LastUpdateTime indicates last update timestamp for this cluster status. | ||
// +nullable | ||
LastUpdateTime *metav1.Time `json:"lastUpdateTime,omitempty"` | ||
// Service Endpoints | ||
Endpoints map[string]string `json:"endpoints,omitempty"` | ||
// Head info | ||
Head HeadInfo `json:"head,omitempty"` | ||
// Reason provides more information about current State | ||
Reason string `json:"reason,omitempty"` | ||
// observedGeneration is the most recent generation observed for this RayCluster. It corresponds to the | ||
// RayCluster's generation, which is updated on mutation by the API Server. | ||
// +optional | ||
ObservedGeneration int64 `json:"observedGeneration,omitempty"` | ||
} | ||
|
||
// HeadInfo gives info about head | ||
type HeadInfo struct { | ||
PodIP string `json:"podIP,omitempty"` | ||
ServiceIP string `json:"serviceIP,omitempty"` | ||
} | ||
|
||
// RayNodeType the type of a ray node: head/worker | ||
type RayNodeType string | ||
|
||
const ( | ||
HeadNode RayNodeType = "head" | ||
WorkerNode RayNodeType = "worker" | ||
// RedisCleanupNode is a Pod managed by a Kubernetes Job that cleans up Redis data after | ||
// a RayCluster with GCS fault tolerance enabled is deleted. | ||
RedisCleanupNode RayNodeType = "redis-cleanup" | ||
) | ||
|
||
// RayCluster is the Schema for the RayClusters API | ||
// +kubebuilder:object:root=true | ||
// +kubebuilder:subresource:status | ||
// +kubebuilder:printcolumn:name="desired workers",type=integer,JSONPath=".status.desiredWorkerReplicas",priority=0 | ||
// +kubebuilder:printcolumn:name="available workers",type=integer,JSONPath=".status.availableWorkerReplicas",priority=0 | ||
// +kubebuilder:printcolumn:name="status",type="string",JSONPath=".status.state",priority=0 | ||
// +kubebuilder:printcolumn:name="age",type="date",JSONPath=".metadata.creationTimestamp",priority=0 | ||
// +kubebuilder:printcolumn:name="head pod IP",type="string",JSONPath=".status.head.podIP",priority=1 | ||
// +kubebuilder:printcolumn:name="head service IP",type="string",JSONPath=".status.head.serviceIP",priority=1 | ||
// +genclient | ||
type RayCluster struct { | ||
// Standard object metadata. | ||
metav1.TypeMeta `json:",inline"` | ||
metav1.ObjectMeta `json:"metadata,omitempty"` | ||
|
||
// Specification of the desired behavior of the RayCluster. | ||
Spec RayClusterSpec `json:"spec,omitempty"` | ||
Status RayClusterStatus `json:"status,omitempty"` | ||
} | ||
|
||
//+kubebuilder:object:root=true | ||
|
||
// RayClusterList contains a list of RayCluster | ||
type RayClusterList struct { | ||
metav1.TypeMeta `json:",inline"` | ||
metav1.ListMeta `json:"metadata,omitempty"` | ||
Items []RayCluster `json:"items"` | ||
} | ||
|
||
func init() { | ||
SchemeBuilder.Register(&RayCluster{}, &RayClusterList{}) | ||
} | ||
|
||
type EventReason string | ||
|
||
const ( | ||
RayConfigError EventReason = "RayConfigError" | ||
PodReconciliationError EventReason = "PodReconciliationError" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
package v1 | ||
|
||
import ( | ||
"encoding/json" | ||
"testing" | ||
|
||
corev1 "k8s.io/api/core/v1" | ||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" | ||
"k8s.io/utils/pointer" | ||
) | ||
|
||
var myRayCluster = &RayCluster{ | ||
ObjectMeta: metav1.ObjectMeta{ | ||
Name: "raycluster-sample", | ||
Namespace: "default", | ||
}, | ||
Spec: RayClusterSpec{ | ||
HeadGroupSpec: HeadGroupSpec{ | ||
Replicas: pointer.Int32Ptr(1), | ||
RayStartParams: map[string]string{ | ||
"port": "6379", | ||
"object-manager-port": "12345", | ||
"node-manager-port": "12346", | ||
"object-store-memory": "100000000", | ||
"num-cpus": "1", | ||
"dashboard-agent-listen-port": "52365", | ||
}, | ||
Template: corev1.PodTemplateSpec{ | ||
ObjectMeta: metav1.ObjectMeta{ | ||
Namespace: "default", | ||
Labels: map[string]string{ | ||
"groupName": "headgroup", | ||
}, | ||
}, | ||
Spec: corev1.PodSpec{ | ||
Containers: []corev1.Container{ | ||
{ | ||
Name: "ray-head", | ||
Image: "rayproject/autoscaler", | ||
Command: []string{"python"}, | ||
Args: []string{"/opt/code.py"}, | ||
Env: []corev1.EnvVar{ | ||
{ | ||
Name: "MY_POD_IP", | ||
ValueFrom: &corev1.EnvVarSource{ | ||
FieldRef: &corev1.ObjectFieldSelector{ | ||
FieldPath: "status.podIP", | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
WorkerGroupSpecs: []WorkerGroupSpec{ | ||
{ | ||
Replicas: pointer.Int32Ptr(3), | ||
MinReplicas: pointer.Int32Ptr(0), | ||
MaxReplicas: pointer.Int32Ptr(10000), | ||
GroupName: "small-group", | ||
RayStartParams: map[string]string{ | ||
"port": "6379", | ||
"num-cpus": "1", | ||
"dashboard-agent-listen-port": "52365", | ||
}, | ||
Template: corev1.PodTemplateSpec{ | ||
ObjectMeta: metav1.ObjectMeta{ | ||
Namespace: "default", | ||
Labels: map[string]string{ | ||
"groupName": "small-group", | ||
}, | ||
}, | ||
Spec: corev1.PodSpec{ | ||
Containers: []corev1.Container{ | ||
{ | ||
Name: "ray-worker", | ||
Image: "rayproject/autoscaler", | ||
Command: []string{"echo"}, | ||
Args: []string{"Hello Ray"}, | ||
Env: []corev1.EnvVar{ | ||
{ | ||
Name: "MY_POD_IP", | ||
ValueFrom: &corev1.EnvVarSource{ | ||
FieldRef: &corev1.ObjectFieldSelector{ | ||
FieldPath: "status.podIP", | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
} | ||
|
||
func TestMarshalling(t *testing.T) { | ||
// marshal successfully | ||
_, err := json.Marshal(&myRayCluster) | ||
if err != nil { | ||
t.Fatalf("Expected `%v` but got `%v`", nil, err) | ||
} | ||
} |
Oops, something went wrong.