From 72d84ef694f7a52db06146c3a67bef89da8336db Mon Sep 17 00:00:00 2001 From: Chris Sivanich Date: Wed, 30 Oct 2024 15:17:15 -0700 Subject: [PATCH] Release: 0.1.0 --- .../anyscale-kubernetes-operator/Chart.yaml | 3 + charts/anyscale-kubernetes-operator/README.md | 3 + .../templates/anyscale_cli_token_secret.yaml | 10 ++ .../templates/cluster_role.yaml | 14 ++ .../templates/cluster_role_binding.yaml | 18 ++ .../templates/configmap_instance_types.yaml | 18 ++ .../templates/configmap_patches.yaml | 162 ++++++++++++++++++ .../templates/configmap_vector.yaml | 123 +++++++++++++ .../templates/deployment.yaml | 105 ++++++++++++ .../templates/role.yaml | 12 ++ .../templates/role_binding.yaml | 13 ++ .../templates/service_account.yaml | 14 ++ .../templates/validating_webhook.yaml | 26 +++ .../anyscale-kubernetes-operator/values.yaml | 114 ++++++++++++ 14 files changed, 635 insertions(+) create mode 100755 charts/anyscale-kubernetes-operator/Chart.yaml create mode 100755 charts/anyscale-kubernetes-operator/README.md create mode 100755 charts/anyscale-kubernetes-operator/templates/anyscale_cli_token_secret.yaml create mode 100755 charts/anyscale-kubernetes-operator/templates/cluster_role.yaml create mode 100755 charts/anyscale-kubernetes-operator/templates/cluster_role_binding.yaml create mode 100755 charts/anyscale-kubernetes-operator/templates/configmap_instance_types.yaml create mode 100755 charts/anyscale-kubernetes-operator/templates/configmap_patches.yaml create mode 100755 charts/anyscale-kubernetes-operator/templates/configmap_vector.yaml create mode 100755 charts/anyscale-kubernetes-operator/templates/deployment.yaml create mode 100755 charts/anyscale-kubernetes-operator/templates/role.yaml create mode 100755 charts/anyscale-kubernetes-operator/templates/role_binding.yaml create mode 100755 charts/anyscale-kubernetes-operator/templates/service_account.yaml create mode 100755 charts/anyscale-kubernetes-operator/templates/validating_webhook.yaml create mode 100755 charts/anyscale-kubernetes-operator/values.yaml diff --git a/charts/anyscale-kubernetes-operator/Chart.yaml b/charts/anyscale-kubernetes-operator/Chart.yaml new file mode 100755 index 0000000..eadf38f --- /dev/null +++ b/charts/anyscale-kubernetes-operator/Chart.yaml @@ -0,0 +1,3 @@ +apiVersion: v2 +name: anyscale-kubernetes-operator +version: 0.1.0 diff --git a/charts/anyscale-kubernetes-operator/README.md b/charts/anyscale-kubernetes-operator/README.md new file mode 100755 index 0000000..3f87721 --- /dev/null +++ b/charts/anyscale-kubernetes-operator/README.md @@ -0,0 +1,3 @@ +# Anyscale Kubernetes Operator Helm Chart + +Refer to [Anyscale documentation](https://docs.anyscale.com/administration/cloud-deployment/kubernetes/) for latest installation instructions. diff --git a/charts/anyscale-kubernetes-operator/templates/anyscale_cli_token_secret.yaml b/charts/anyscale-kubernetes-operator/templates/anyscale_cli_token_secret.yaml new file mode 100755 index 0000000..53630c3 --- /dev/null +++ b/charts/anyscale-kubernetes-operator/templates/anyscale_cli_token_secret.yaml @@ -0,0 +1,10 @@ +{{if .Values.anyscaleCliToken}} +apiVersion: v1 +kind: Secret +metadata: + name: anyscale-cli-token + namespace: {{ .Release.Namespace }} +type: Opaque +data: + ANYSCALE_CLI_TOKEN: {{ .Values.anyscaleCliToken | b64enc }} +{{end}} diff --git a/charts/anyscale-kubernetes-operator/templates/cluster_role.yaml b/charts/anyscale-kubernetes-operator/templates/cluster_role.yaml new file mode 100755 index 0000000..2a09202 --- /dev/null +++ b/charts/anyscale-kubernetes-operator/templates/cluster_role.yaml @@ -0,0 +1,14 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + # Since this is a global resource, we append the namespace to it to support + # launching multiple cloud deployments into a single Kubernetes cluster (we + # assume that clouds to not share namespaces). + name: anyscale-operator-token-reviewer-{{ .Release.Namespace }} +rules: + - apiGroups: ["authentication.k8s.io"] + resources: ["tokenreviews"] + verbs: ["create"] + - apiGroups: [""] + resources: ["events", "nodes"] + verbs: ["get", "watch", "list"] diff --git a/charts/anyscale-kubernetes-operator/templates/cluster_role_binding.yaml b/charts/anyscale-kubernetes-operator/templates/cluster_role_binding.yaml new file mode 100755 index 0000000..e007a08 --- /dev/null +++ b/charts/anyscale-kubernetes-operator/templates/cluster_role_binding.yaml @@ -0,0 +1,18 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + # Since this is a global resource, we append the namespace to it to support + # launching multiple cloud deployments into a single Kubernetes cluster (we + # assume that clouds to not share namespaces). + name: anyscale-operator-token-reviewer-{{ .Release.Namespace }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + # Since this is a global resource, we append the namespace to it to support + # launching multiple cloud deployments into a single Kubernetes cluster (we + # assume that clouds to not share namespaces). + name: anyscale-operator-token-reviewer-{{ .Release.Namespace }} +subjects: + - kind: ServiceAccount + name: anyscale-operator + namespace: {{ .Release.Namespace }} diff --git a/charts/anyscale-kubernetes-operator/templates/configmap_instance_types.yaml b/charts/anyscale-kubernetes-operator/templates/configmap_instance_types.yaml new file mode 100755 index 0000000..c6daab9 --- /dev/null +++ b/charts/anyscale-kubernetes-operator/templates/configmap_instance_types.yaml @@ -0,0 +1,18 @@ +{{if or .Values.defaultInstanceTypes .Values.additionalInstanceTypes}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: instance-types + namespace: {{ .Release.Namespace }} + labels: + anyscale.com/name: instance-types +data: + version: v1 + instance_types.yaml: |- + {{- if .Values.defaultInstanceTypes }} +{{ toYaml .Values.defaultInstanceTypes | indent 4 }} + {{- end }} + {{- if .Values.additionalInstanceTypes }} +{{ toYaml .Values.additionalInstanceTypes | indent 4 }} + {{- end }} +{{- end }} diff --git a/charts/anyscale-kubernetes-operator/templates/configmap_patches.yaml b/charts/anyscale-kubernetes-operator/templates/configmap_patches.yaml new file mode 100755 index 0000000..c72b468 --- /dev/null +++ b/charts/anyscale-kubernetes-operator/templates/configmap_patches.yaml @@ -0,0 +1,162 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: patches + namespace: {{ .Release.Namespace }} +data: + version: v1 + patches.yaml: |- + {{- if .Values.workloadServiceAccountName }} + ######################################## + # Service Account Support + ######################################## + - kind: Pod + patch: + - op: add + path: /spec/serviceAccountName + value: {{ .Values.workloadServiceAccountName }} + {{- end }} + + ######################################## + # Taint tolerances + # NOTE: we handle these separate from affinity + ######################################## + # Tolerances for ALL workloads + - kind: Pod + patch: + {{- range $k, $v := .Values.workloadDefaultTolerances.all }} + - op: add + path: /spec/tolerations/- + value: + key: {{ $k }} + operator: {{ if $v.value }}Equal{{- else }}Exists{{- end }} + {{- if $v.value }} + value: {{ $v.value }} + {{- end }} + effect: {{ $v.effect | default "NoSchedule" }} + {{- end }} + + # Tolerances for SPOT workloads + - kind: Pod + selector: "anyscale.com/market-type in (SPOT)" + patch: + {{- range $k, $v := .Values.workloadDefaultTolerances.spot }} + - op: add + path: /spec/tolerations/- + value: + key: {{ $k }} + operator: {{ if $v.value }}Equal{{- else }}Exists{{- end }} + {{- if $v.value }} + value: {{ $v.value }} + {{- end }} + effect: {{ $v.effect | default "NoSchedule" }} + {{- end }} + + # Tolerances for GPU workloads + - kind: Pod + selector: "anyscale.com/accelerator-type" + patch: + {{- range $k, $v := .Values.workloadDefaultTolerances.gpu }} + - op: add + path: /spec/tolerations/- + value: + key: {{ $k }} + operator: {{ if $v.value }}Equal{{- else }}Exists{{- end }} + {{- if $v.value }} + value: {{ $v.value }} + {{- end }} + effect: {{ $v.effect | default "NoSchedule" }} + {{- end }} + + ######################################## + # Market Type Support + ######################################## + {{- if eq .Values.cloudProvider "aws" }} + - kind: Pod + selector: "anyscale.com/market-type in (ON_DEMAND)" + patch: + - op: add + path: /spec/nodeSelector/eks.amazonaws.com~1capacityType + value: "ON_DEMAND" + - op: add + path: /metadata/annotations/cluster-autoscaler.kubernetes.io~1safe-to-evict + value: "false" + - kind: Pod + selector: "anyscale.com/market-type in (SPOT)" + patch: + - op: add + path: /spec/nodeSelector/eks.amazonaws.com~1capacityType + value: "SPOT" + {{- else if eq .Values.cloudProvider "gcp" }} + - kind: Pod + selector: "anyscale.com/market-type in (SPOT)" + patch: + - op: add + path: /spec/nodeSelector/cloud.google.com~1gke-spot + value: "true" + {{- end }} + + {{- if .Values.enableZoneNodeSelector }} + ######################################## + # Zone Support + ######################################## + - kind: Pod + selector: anyscale.com/zone + patch: + - op: add + path: /spec/nodeSelector/topology.kubernetes.io~1zone + - op: copy + from: /metadata/annotations/anyscale.com~1zone + path: /spec/nodeSelector/topology.kubernetes.io~1zone + {{- end }} + + ######################################## + # GPU Support + ######################################## + {{- if eq .Values.cloudProvider "aws" }} + # Prevent CPU workloads from being scheduled on GPU nodes. + - kind: Pod + selector: "!anyscale.com/accelerator-type" + patch: + - op: add + path: /spec/affinity + value: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "nvidia.com/gpu.count" + operator: DoesNotExist + {{- range $key, $value := .Values.supportedAccelerators.aws }} + - kind: Pod + selector: "anyscale.com/accelerator-type in ({{ $key }})" + patch: + - op: add + {{- if $.Values.acceleratorNodeSelector }} + path: /spec/nodeSelector/{{ $.Values.acceleratorNodeSelector }} + {{- else }} + path: /spec/nodeSelector/nvidia.com~1gpu.product + {{- end }} + value: "{{ $value }}" + {{- end }} + {{- else if eq .Values.cloudProvider "gcp" }} + {{- range $key, $value := .Values.supportedAccelerators.gcp }} + - kind: Pod + selector: "anyscale.com/accelerator-type in ({{ $key }})" + patch: + - op: add + {{- if $.Values.acceleratorNodeSelector }} + path: /spec/nodeSelector/{{ $.Values.acceleratorNodeSelector }} + {{- else }} + path: /spec/nodeSelector/cloud.google.com~1gke-accelerator + {{- end }} + value: "{{ $value }}" + {{- end }} + {{- end }} + + ######################################## + # Additional Patches + ######################################## + {{- if .Values.additionalPatches}} +{{ toYaml .Values.additionalPatches | indent 4 }} + {{ end -}} diff --git a/charts/anyscale-kubernetes-operator/templates/configmap_vector.yaml b/charts/anyscale-kubernetes-operator/templates/configmap_vector.yaml new file mode 100755 index 0000000..acefca2 --- /dev/null +++ b/charts/anyscale-kubernetes-operator/templates/configmap_vector.yaml @@ -0,0 +1,123 @@ +apiVersion: v1 +data: + vector.yaml: | + data_dir: /tmp/vector + sources: + raw_operator_logs: + type: file + include: + - /tmp/anyscale/logs/*.log + raw_vector_logs: + type: internal_logs + raw_metrics: + type: prometheus_scrape + endpoints: + - http://localhost:2112/metrics + scrape_interval_secs: 15 + raw_metrics_grpc: + type: prometheus_scrape + endpoints: + - http://localhost:2112/metrics_grpc + scrape_interval_secs: 15 + transforms: + vector_logs: + type: remap + inputs: + - raw_vector_logs + source: |- + .file = "vector" + operator_logs: + type: remap + inputs: + - raw_operator_logs + source: |- + .file = "anyscale-operator" + combined_logs: + type: remap + inputs: + - operator_logs + - vector_logs + source: |- + x = .file + if (parsed, err = parse_json(.message); err == null) { + . = parsed + } else { + y = .message + . = {} + .msg = y + } + if !exists(.cloud_id) { + .cloud_id = "unknown" + } + .file = x + .pod = "${POD_NAME}" + .pod_ip = "${POD_IP}" + .namespace = "${POD_NAMESPACE}" + {{ if .Values.region }} + .region = "{{ .Values.region }}" + {{ end }} + system_metrics: + type: remap + inputs: + - raw_metrics + - raw_metrics_grpc + source: |- + .tags.pod = "${POD_NAME}" + .tags.pod_ip = "${POD_IP}" + .tags.namespace = "${POD_NAMESPACE}" + {{ if .Values.region }} + .tags.region = "{{ .Values.region }}" + {{ end }} + .tags.cloud_provider = "{{ .Values.cloudProvider }}" + .tags.cloud_resource_id = "{{ .Values.cloudDeploymentId }}" + .tags.cloud_id = get_enrichment_table_record("runtime_metadata", {"key": "cloud_id"}).value ?? "unknown" + sinks: + # Forward Vector internal metrics to the system cortex. + sink_loki: + healthcheck: false + type: loki + inputs: + - combined_logs + endpoint: http://localhost:3100 + labels: + cloud_id: '{{ "{{" }} cloud_id {{ "}}" }}' + pod_ip: "${POD_IP}" + cloudProvider: {{ .Values.cloudProvider }} + cloud_resource_id: {{ .Values.cloudDeploymentId }} + file: '{{ "{{" }} file {{ "}}" }}' + source: anyscale-operator + encoding: + codec: json + compression: snappy + az_metrics_sink: + type: prometheus_remote_write + inputs: + - system_metrics + endpoint: http://localhost:3101/api/v1/push + default_namespace: dataplane_aop + healthcheck: + enabled: false + api: + enabled: true + enrichment_tables: + runtime_metadata: + type: file + file: + path: /tmp/config/vector/runtime_metadata.csv + encoding: + type: csv + schema: + key: string + value: string + entrypoint.sh: | + #!/bin/sh + while [ ! -f '/tmp/config/vector/runtime_metadata.csv' ]; do + echo Waiting for metadata + sleep 1 + done + + /usr/bin/vector --watch-config --log-format json --config-yaml /etc/vector/vector.yaml +kind: ConfigMap +metadata: + name: vector + namespace: {{ .Release.Namespace }} diff --git a/charts/anyscale-kubernetes-operator/templates/deployment.yaml b/charts/anyscale-kubernetes-operator/templates/deployment.yaml new file mode 100755 index 0000000..6641112 --- /dev/null +++ b/charts/anyscale-kubernetes-operator/templates/deployment.yaml @@ -0,0 +1,105 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: anyscale-operator + namespace: {{ .Release.Namespace }} + labels: + app: anyscale-operator +spec: + replicas: 1 + selector: + matchLabels: + app: anyscale-operator + template: + metadata: + labels: + app: anyscale-operator + spec: + serviceAccount: anyscale-operator + containers: + - name: operator + image: "{{ required "operatorImage is required" .Values.operatorImage }}" + imagePullPolicy: {{or .Values.operatorImagePullPolicy "IfNotPresent"}} + command: ["/app/go/infra/kubernetes_manager/kubernetes_manager"] + args: + - --log-level=info + - --log-file=/tmp/anyscale/logs/operator.log + - start + - --cloud-deployment-id={{ required "cloudDeploymentId is required" .Values.cloudDeploymentId }} + - --control-plane-url={{or .Values.controlPlaneURL "https://console.anyscale.com"}} + - --cloud-provider={{ .Values.cloudProvider }} + {{ if not .Values.anyscaleCliToken }} + - --region={{ required "region is required for operator registration if anyscaleCliToken is not provided & cloud-native bootstrap scheme is used; must be set to the cloud provider region of this Kubernetes cluster" .Values.region }} + {{ end }} + - --patch-config-path=/tmp/config/patches.yaml + - --system-logs-ingress-proxy-port=3100 + - --system-metrics-ingress-proxy-port=3101 + - --vector-enrichment-table-path=/tmp/config/vector/runtime_metadata.csv + resources: +{{ toYaml .Values.operatorResources.operator | indent 10 }} + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace +{{if .Values.anyscaleCliToken}} + - name: ANYSCALE_CLI_TOKEN + valueFrom: + secretKeyRef: + name: anyscale-cli-token + key: ANYSCALE_CLI_TOKEN +{{end}} + volumeMounts: + - name: logs + mountPath: /tmp/anyscale/logs/ + - name: patches + mountPath: /tmp/config + - name: vector-config + mountPath: /tmp/config/vector/ + - name: vector + image: timberio/vector:0.40.0-debian + imagePullPolicy: IfNotPresent + command: ["/bin/sh"] + args: + - /etc/vector/entrypoint.sh + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + resources: +{{ toYaml .Values.operatorResources.vector | indent 10 }} + volumeMounts: + - mountPath: /tmp/anyscale/logs/ + name: logs + - mountPath: /etc/vector/ + name: vector + readOnly: true + - mountPath: /tmp/vector + name: vector-state + - mountPath: /tmp/config/vector/ + name: vector-config + volumes: + - name: logs + emptyDir: {} + - name: patches + configMap: + name: patches + - name: vector + projected: + defaultMode: 420 + sources: + - configMap: + name: vector + - name: vector-state + emptyDir: {} + - name: vector-config + emptyDir: {} diff --git a/charts/anyscale-kubernetes-operator/templates/role.yaml b/charts/anyscale-kubernetes-operator/templates/role.yaml new file mode 100755 index 0000000..cb1ab85 --- /dev/null +++ b/charts/anyscale-kubernetes-operator/templates/role.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: anyscale-operator + namespace: {{ .Release.Namespace }} +rules: +- apiGroups: [""] + resources: ["configmaps", "services", "pods", "secrets", "events"] + verbs: ["get", "watch", "list", "create", "update", "patch", "delete"] +- apiGroups: ["networking.k8s.io"] + resources: ["ingresses"] + verbs: ["get", "watch", "list", "create", "update", "patch", "delete"] diff --git a/charts/anyscale-kubernetes-operator/templates/role_binding.yaml b/charts/anyscale-kubernetes-operator/templates/role_binding.yaml new file mode 100755 index 0000000..8649668 --- /dev/null +++ b/charts/anyscale-kubernetes-operator/templates/role_binding.yaml @@ -0,0 +1,13 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: anyscale-operator + namespace: {{ .Release.Namespace }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: anyscale-operator +subjects: +- kind: ServiceAccount + name: anyscale-operator + namespace: {{ .Release.Namespace }} diff --git a/charts/anyscale-kubernetes-operator/templates/service_account.yaml b/charts/anyscale-kubernetes-operator/templates/service_account.yaml new file mode 100755 index 0000000..7938552 --- /dev/null +++ b/charts/anyscale-kubernetes-operator/templates/service_account.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: anyscale-operator + namespace: {{ .Release.Namespace }} + {{- if .Values.operatorIamIdentity}} + {{- if eq .Values.cloudProvider "aws" }} + annotations: + eks.amazonaws.com/role-arn: {{ .Values.operatorIamIdentity }} + {{- else if eq .Values.cloudProvider "gcp" }} + annotations: + iam.gke.io/gcp-service-account: {{ .Values.operatorIamIdentity }} + {{- end }} + {{- end }} diff --git a/charts/anyscale-kubernetes-operator/templates/validating_webhook.yaml b/charts/anyscale-kubernetes-operator/templates/validating_webhook.yaml new file mode 100755 index 0000000..06a31cb --- /dev/null +++ b/charts/anyscale-kubernetes-operator/templates/validating_webhook.yaml @@ -0,0 +1,26 @@ +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingWebhookConfiguration +metadata: + name: anyscale-operator-{{ .Release.Namespace }} +webhooks: +- name: instance-types.{{ .Release.Namespace }}.anyscale-operator.anyscale.com + rules: + - apiGroups: [""] + apiVersions: ["v1"] + resources: ["configmaps"] + scope: Namespaced + operations: ["CREATE", "UPDATE"] + clientConfig: + url: {{or .Values.controlPlaneURL "https://console.anyscale.com"}}/api/v2/kubernetes_manager/admission/{{ .Values.cloudDeploymentId }} + sideEffects: None + timeoutSeconds: 30 + # We are starting with hard-failing here; this can be changed to soft-failing if issues arise (e.g. control plane availability). + failurePolicy: Fail + namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: {{ .Release.Namespace }} + objectSelector: + matchLabels: + anyscale.com/name: instance-types + admissionReviewVersions: + - "v1" diff --git a/charts/anyscale-kubernetes-operator/values.yaml b/charts/anyscale-kubernetes-operator/values.yaml new file mode 100755 index 0000000..61c524a --- /dev/null +++ b/charts/anyscale-kubernetes-operator/values.yaml @@ -0,0 +1,114 @@ +# cloudDeploymentId specifies the cloud deployment ID of the AOP. +cloudDeploymentId: "" + +# cloudProvider specifies the cloud provider that the AOP is running on. +# Supported values are "aws" and "gcp". +cloudProvider: "" + +# An Anyscale CLI Token. If set, the Anyscale Operator will use this token to authenticate with the Anyscale control plane. +# If not set, the Anyscale Operator will attempt to bootstrap using a native cloud provider identity (only supported on AWS/GCP). +anyscaleCliToken: "" + +# region specifies the region that the Kubernetes cluster is running in. +region: "" + +# operatorImage specifies the Docker image to use for the Anyscale Operator. +operatorImage: "" + +# operatorIamIdentity specifies the IAM identity from the cloud provider to bind to the Anyscale Operator. +# This is only supported on AWS/GCP. For AWS, this should be the ARN of the IAM role. For GCP, this should be the email of the +# service account. If not set, the Anyscale Operator will use the IAM identity of the Kubernetes node. +operatorIamIdentity: "" + +# operatorResources specifies the resource limits and requests for Anyscale Operator deployment. +operatorResources: + # 'operator' is the container for the Anyscale Operator. + operator: + requests: + memory: 512Mi + cpu: 1 + limits: + memory: 2Gi + # 'vector' is a sidecar used to forward telemetry (operator logs/metrics) to the Anyscale Control Plane. + vector: + requests: + cpu: 100m + memory: 512Mi + limits: + memory: 512Mi + +# defaultInstanceTypes provides a list of default Pod shapes that can be +# used in Anyscale workloads (abstracted as virtual "instance types"). +defaultInstanceTypes: + 2CPU-8GB: + resources: + CPU: 2 + memory: 8Gi + 4CPU-16GB: + resources: + CPU: 4 + memory: 16Gi + 8CPU-32GB: + resources: + CPU: 8 + memory: 32Gi + 8CPU-32GB-1xT4: + resources: + CPU: 8 + GPU: 1 + memory: 32Gi + 'accelerator_type:T4': 1 + +# additionalInstanceTypes provides a list of additional Pod shapes that can be +# used in Anyscale workloads. +additionalInstanceTypes: {} + +# additionalPatches provides a list of additional patches that will be respected +# by the operator when creating Pods & other types of Kubernetes resources. +additionalPatches: [] + +# supported_accelerators provides a mapping of Ray-supported accelerator types +# to accelerator names that can be used in a GPU nodeSelector for scheduling. +supportedAccelerators: + aws: + T4: "Tesla-T4" + A10G: "NVIDIA-A10G" + gcp: + L4: "nvidia-l4" + +# If set to true, then the "topology.kubernetes.io/zone" nodeSelector will be applied +# to all Anyscale workloads with a request to run in a specific zone specified through +# the compute configuration. +# +# This is disabled by default, since many cluster autoscalers do not respect the zone +# node selector when autoscaling (e.g. they do not pick the right subnet to launch an +# instance inside of). +enableZoneNodeSelector: false + +# acceleratorNodeSelector provides the nodeSelector key to use when scheduling +# Pods with accelerators. If not set, the default key for the cloud provider +# will be used. +acceleratorNodeSelector: "" + +# If set, this service account will be assigned to Pods running Anyscale workloads. +workloadServiceAccountName: "" + +# Default tolerances - these match the Anyscale recommended NodeGroup configurations, +# including those provided by the Anyscale Cloud Foundations Terraform Modules +workloadDefaultTolerances: + all: + # This may optionally be added to Anyscale workload nodes + # Allows scheduling on Anyscale's partition of nodes + node.anyscale.com/capacity-type: + value: "ON_DEMAND" + effect: "NoSchedule" + gpu: + # GPU workloads can tolerate GPU tainted nodes + node.anyscale.com/accelerator-type: + value: "GPU" + effect: "NoSchedule" + spot: + # Spot workloads can tolerate SPOT tainted nodes + node.anyscale.com/capacity-type: + value: "SPOT" + effect: "NoSchedule"