diff --git a/docker/sandbox-bundled/Dockerfile.gpu b/docker/sandbox-bundled/Dockerfile.gpu new file mode 100644 index 0000000000..87e8106ecf --- /dev/null +++ b/docker/sandbox-bundled/Dockerfile.gpu @@ -0,0 +1,78 @@ +# syntax=docker/dockerfile:1.4-labs +FROM --platform=${BUILDPLATFORM} mgoltzsche/podman:minimal AS builder + +ARG TARGETARCH +ENV TARGETARCH "${TARGETARCH}" + +WORKDIR /build + +COPY images/manifest.txt images/preload ./ +RUN --security=insecure ./preload manifest.txt + + +FROM --platform=${BUILDPLATFORM} golang:1.19-bullseye AS bootstrap + +ARG TARGETARCH +ENV CGO_ENABLED 0 +ENV GOARCH "${TARGETARCH}" +ENV GOOS linux + +WORKDIR /flyteorg/build +COPY bootstrap/go.mod bootstrap/go.sum ./ +RUN go mod download +COPY bootstrap/ ./ +RUN --mount=type=cache,target=/root/.cache/go-build --mount=type=cache,target=/root/go/pkg/mod \ + go build -o dist/flyte-sandbox-bootstrap cmd/bootstrap/main.go + +FROM rancher/k3s:v1.26.4-k3s1 as k3s + +# We may want to have another version with devel in the future (has more features but is huge) +FROM nvidia/cuda:11.8.0-base-ubuntu22.04 + +ENV CRICTL_VERSION="v1.26.0" +ENV FLYTE_GPU "ENABLED" +ARG TARGETARCH + +ARG FLYTE_SANDBOX_VERSION +ENV FLYTE_SANDBOX_VERSION "${FLYTE_SANDBOX_VERSION}" +RUN apt-get update \ + && apt-get -y install gnupg2 curl nvidia-container-toolkit \ + && chmod 1777 /tmp \ + && mkdir -p /var/lib/rancher/k3s/agent/etc/containerd \ + && mkdir -p /var/lib/rancher/k3s/server/manifests \ + && curl -L https://github.com/kubernetes-sigs/cri-tools/releases/download/$CRICTL_VERSION/crictl-${CRICTL_VERSION}-linux-amd64.tar.gz --output crictl-${CRICTL_VERSION}-linux-amd64.tar.gz \ + && tar zxvf crictl-$CRICTL_VERSION-linux-amd64.tar.gz -C /usr/local/bin \ + && rm -f crictl-$CRICTL_VERSION-linux-amd64.tar.gz \ + && echo "alias kubectl='k3s kubectl'" >> /root/.bashrc + +COPY --from=k3s /bin /bin +COPY --from=k3s /etc /etc + +# Provide custom containerd configuration to configure the nvidia-container-runtime +COPY config.toml.tmpl /var/lib/rancher/k3s/agent/etc/containerd/config.toml.tmpl + +COPY --from=builder /build/images/ /var/lib/rancher/k3s/agent/images/ +COPY images/tar/${TARGETARCH}/ /var/lib/rancher/k3s/agent/images/ +COPY manifests/ /var/lib/rancher/k3s/server/manifests-staging/ +COPY bin/ /bin/ + +# Install bootstrap +COPY --from=bootstrap /flyteorg/build/dist/flyte-sandbox-bootstrap /bin/ + +VOLUME /var/lib/flyte/storage + +# Set environment variable for picking up additional CA certificates +ENV SSL_CERT_DIR /var/lib/flyte/config/ca-certificates + +## START https://github.com/k3s-io/k3s/blob/master/package/Dockerfile#L15 +VOLUME /var/lib/kubelet +VOLUME /var/lib/rancher/k3s +VOLUME /var/lib/cni +VOLUME /var/log + +ENV PATH="$PATH:/bin/aux" +ENV CRI_CONFIG_FILE=/var/lib/rancher/k3s/agent/etc/crictl.yaml +## END https://github.com/k3s-io/k3s/blob/master/package/Dockerfile#L15 + +ENTRYPOINT [ "/bin/k3d-entrypoint.sh" ] +CMD [ "server", "--disable=traefik", "--disable=servicelb" ] diff --git a/docker/sandbox-bundled/Makefile b/docker/sandbox-bundled/Makefile index 9ae4197673..0791174cbc 100644 --- a/docker/sandbox-bundled/Makefile +++ b/docker/sandbox-bundled/Makefile @@ -43,6 +43,16 @@ build: flyte manifests docker buildx build --builder flyte-sandbox --allow security.insecure --load \ --tag flyte-sandbox:latest . +.PHONY: build-gpu +build-gpu: flyte manifests + [ -n "$(shell docker buildx ls | awk '/^flyte-sandbox / {print $$1}')" ] || \ + docker buildx create --name flyte-sandbox \ + --driver docker-container --driver-opt image=moby/buildkit:master \ + --buildkitd-flags '--allow-insecure-entitlement security.insecure' \ + --platform linux/arm64,linux/amd64 + docker buildx build --builder flyte-sandbox --allow security.insecure --load \ + --tag flyte-sandbox-gpu:latest -f Dockerfile.gpu . + # Port map # 6443 - k8s API server # 30000 - Docker Registry diff --git a/docker/sandbox-bundled/bin/k3d-entrypoint-cgroupv2.sh b/docker/sandbox-bundled/bin/k3d-entrypoint-cgroupv2.sh index 88cb669ded..d9892decef 100755 --- a/docker/sandbox-bundled/bin/k3d-entrypoint-cgroupv2.sh +++ b/docker/sandbox-bundled/bin/k3d-entrypoint-cgroupv2.sh @@ -14,8 +14,12 @@ if [ -f /sys/fs/cgroup/cgroup.controllers ]; then # move the processes from the root group to the /init group, # otherwise writing subtree_control fails with EBUSY. mkdir -p /sys/fs/cgroup/init - busybox xargs -rn1 < /sys/fs/cgroup/cgroup.procs > /sys/fs/cgroup/init/cgroup.procs || : + if command -v busybox >/dev/null 2>&1; then + busybox xargs -rn1 < /sys/fs/cgroup/cgroup.procs > /sys/fs/cgroup/init/cgroup.procs || : + else + xargs -rn1 < /sys/fs/cgroup/cgroup.procs > /sys/fs/cgroup/init/cgroup.procs || : + fi # enable controllers - sed -e 's/ / +/g' -e 's/^/+/' <"/sys/fs/cgroup/cgroup.controllers" >"/sys/fs/cgroup/cgroup.subtree_control" + sed -e 's/ / +/g' -e 's/^/+/' < /sys/fs/cgroup/cgroup.controllers > /sys/fs/cgroup/cgroup.subtree_control echo "[$(date -Iseconds)] [CgroupV2 Fix] Done" fi diff --git a/docker/sandbox-bundled/bin/k3d-entrypoint-gpu-check.sh b/docker/sandbox-bundled/bin/k3d-entrypoint-gpu-check.sh new file mode 100755 index 0000000000..96a5390fc5 --- /dev/null +++ b/docker/sandbox-bundled/bin/k3d-entrypoint-gpu-check.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +if [ -n "${FLYTE_GPU}" ]; then + echo "GPU Enabled - checking if it's available" + nvidia-smi + if [ $? -eq 0 ]; then + echo "nvidia-smi working" + else + >&2 echo "NVIDIA not available, enable it in docker like so: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/user-guide.html" + exit 255 + fi + +else + echo "GPU not enabled" +fi diff --git a/docker/sandbox-bundled/config.toml.tmpl b/docker/sandbox-bundled/config.toml.tmpl new file mode 100644 index 0000000000..c32ad5afaf --- /dev/null +++ b/docker/sandbox-bundled/config.toml.tmpl @@ -0,0 +1,121 @@ +# Exactly the same as: https://github.com/k3s-io/k3s/blob/master/pkg/agent/templates/templates_linux.go#L10 +# EXCEPT under the heading: [plugins."io.containerd.grpc.v1.cri".containerd] we add: default_runtime_name = "nvidia" +version = 2 + +[plugins."io.containerd.internal.v1.opt"] + path = "{{ .NodeConfig.Containerd.Opt }}" +[plugins."io.containerd.grpc.v1.cri"] + stream_server_address = "127.0.0.1" + stream_server_port = "10010" + enable_selinux = {{ .NodeConfig.SELinux }} + enable_unprivileged_ports = {{ .EnableUnprivileged }} + enable_unprivileged_icmp = {{ .EnableUnprivileged }} + +{{- if .DisableCgroup}} + disable_cgroup = true +{{end}} +{{- if .IsRunningInUserNS }} + disable_apparmor = true + restrict_oom_score_adj = true +{{end}} + +{{- if .NodeConfig.AgentConfig.PauseImage }} + sandbox_image = "{{ .NodeConfig.AgentConfig.PauseImage }}" +{{end}} + +{{- if .NodeConfig.AgentConfig.Snapshotter }} +[plugins."io.containerd.grpc.v1.cri".containerd] + default_runtime_name = "nvidia" + snapshotter = "{{ .NodeConfig.AgentConfig.Snapshotter }}" + disable_snapshot_annotations = {{ if eq .NodeConfig.AgentConfig.Snapshotter "stargz" }}false{{else}}true{{end}} +{{ if eq .NodeConfig.AgentConfig.Snapshotter "stargz" }} +{{ if .NodeConfig.AgentConfig.ImageServiceSocket }} +[plugins."io.containerd.snapshotter.v1.stargz"] +cri_keychain_image_service_path = "{{ .NodeConfig.AgentConfig.ImageServiceSocket }}" +[plugins."io.containerd.snapshotter.v1.stargz".cri_keychain] +enable_keychain = true +{{end}} +{{ if .PrivateRegistryConfig }} +{{ if .PrivateRegistryConfig.Mirrors }} +[plugins."io.containerd.snapshotter.v1.stargz".registry.mirrors]{{end}} +{{range $k, $v := .PrivateRegistryConfig.Mirrors }} +[plugins."io.containerd.snapshotter.v1.stargz".registry.mirrors."{{$k}}"] + endpoint = [{{range $i, $j := $v.Endpoints}}{{if $i}}, {{end}}{{printf "%q" .}}{{end}}] +{{if $v.Rewrites}} + [plugins."io.containerd.snapshotter.v1.stargz".registry.mirrors."{{$k}}".rewrite] +{{range $pattern, $replace := $v.Rewrites}} + "{{$pattern}}" = "{{$replace}}" +{{end}} +{{end}} +{{end}} +{{range $k, $v := .PrivateRegistryConfig.Configs }} +{{ if $v.Auth }} +[plugins."io.containerd.snapshotter.v1.stargz".registry.configs."{{$k}}".auth] + {{ if $v.Auth.Username }}username = {{ printf "%q" $v.Auth.Username }}{{end}} + {{ if $v.Auth.Password }}password = {{ printf "%q" $v.Auth.Password }}{{end}} + {{ if $v.Auth.Auth }}auth = {{ printf "%q" $v.Auth.Auth }}{{end}} + {{ if $v.Auth.IdentityToken }}identitytoken = {{ printf "%q" $v.Auth.IdentityToken }}{{end}} +{{end}} +{{ if $v.TLS }} +[plugins."io.containerd.snapshotter.v1.stargz".registry.configs."{{$k}}".tls] + {{ if $v.TLS.CAFile }}ca_file = "{{ $v.TLS.CAFile }}"{{end}} + {{ if $v.TLS.CertFile }}cert_file = "{{ $v.TLS.CertFile }}"{{end}} + {{ if $v.TLS.KeyFile }}key_file = "{{ $v.TLS.KeyFile }}"{{end}} + {{ if $v.TLS.InsecureSkipVerify }}insecure_skip_verify = true{{end}} +{{end}} +{{end}} +{{end}} +{{end}} +{{end}} + +{{- if not .NodeConfig.NoFlannel }} +[plugins."io.containerd.grpc.v1.cri".cni] + bin_dir = "{{ .NodeConfig.AgentConfig.CNIBinDir }}" + conf_dir = "{{ .NodeConfig.AgentConfig.CNIConfDir }}" +{{end}} + +[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] + runtime_type = "io.containerd.runc.v2" + +[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options] + SystemdCgroup = {{ .SystemdCgroup }} + +{{ if .PrivateRegistryConfig }} +{{ if .PrivateRegistryConfig.Mirrors }} +[plugins."io.containerd.grpc.v1.cri".registry.mirrors]{{end}} +{{range $k, $v := .PrivateRegistryConfig.Mirrors }} +[plugins."io.containerd.grpc.v1.cri".registry.mirrors."{{$k}}"] + endpoint = [{{range $i, $j := $v.Endpoints}}{{if $i}}, {{end}}{{printf "%q" .}}{{end}}] +{{if $v.Rewrites}} + [plugins."io.containerd.grpc.v1.cri".registry.mirrors."{{$k}}".rewrite] +{{range $pattern, $replace := $v.Rewrites}} + "{{$pattern}}" = "{{$replace}}" +{{end}} +{{end}} +{{end}} + +{{range $k, $v := .PrivateRegistryConfig.Configs }} +{{ if $v.Auth }} +[plugins."io.containerd.grpc.v1.cri".registry.configs."{{$k}}".auth] + {{ if $v.Auth.Username }}username = {{ printf "%q" $v.Auth.Username }}{{end}} + {{ if $v.Auth.Password }}password = {{ printf "%q" $v.Auth.Password }}{{end}} + {{ if $v.Auth.Auth }}auth = {{ printf "%q" $v.Auth.Auth }}{{end}} + {{ if $v.Auth.IdentityToken }}identitytoken = {{ printf "%q" $v.Auth.IdentityToken }}{{end}} +{{end}} +{{ if $v.TLS }} +[plugins."io.containerd.grpc.v1.cri".registry.configs."{{$k}}".tls] + {{ if $v.TLS.CAFile }}ca_file = "{{ $v.TLS.CAFile }}"{{end}} + {{ if $v.TLS.CertFile }}cert_file = "{{ $v.TLS.CertFile }}"{{end}} + {{ if $v.TLS.KeyFile }}key_file = "{{ $v.TLS.KeyFile }}"{{end}} + {{ if $v.TLS.InsecureSkipVerify }}insecure_skip_verify = true{{end}} +{{end}} +{{end}} +{{end}} + +{{range $k, $v := .ExtraRuntimes}} +[plugins."io.containerd.grpc.v1.cri".containerd.runtimes."{{$k}}"] + runtime_type = "{{$v.RuntimeType}}" +[plugins."io.containerd.grpc.v1.cri".containerd.runtimes."{{$k}}".options] + BinaryName = "{{$v.BinaryName}}" + SystemdCgroup = {{ $.SystemdCgroup }} +{{end}} diff --git a/docker/sandbox-bundled/device-plugin-daemonset.yaml b/docker/sandbox-bundled/device-plugin-daemonset.yaml new file mode 100644 index 0000000000..c53386c36c --- /dev/null +++ b/docker/sandbox-bundled/device-plugin-daemonset.yaml @@ -0,0 +1,44 @@ +# Sourced from: https://k3d.io/v5.6.0/usage/advanced/cuda/?h=gpu#the-nvidia-device-plugin +# Thank you to the k3d team for their work on this. + +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: nvidia-device-plugin-daemonset + namespace: kube-system +spec: + selector: + matchLabels: + name: nvidia-device-plugin-ds + template: + metadata: + # Mark this pod as a critical add-on; when enabled, the critical add-on scheduler + # reserves resources for critical add-on pods so that they can be rescheduled after + # a failure. This annotation works in tandem with the toleration below. + annotations: + scheduler.alpha.kubernetes.io/critical-pod: "" + labels: + name: nvidia-device-plugin-ds + spec: + tolerations: + # Allow this pod to be rescheduled while the node is in "critical add-ons only" mode. + # This, along with the annotation above marks this pod as a critical add-on. + - key: CriticalAddonsOnly + operator: Exists + containers: + - env: + - name: DP_DISABLE_HEALTHCHECKS + value: xids + image: nvidia/k8s-device-plugin:1.11 + name: nvidia-device-plugin-ctr + securityContext: + allowPrivilegeEscalation: true + capabilities: + drop: ["ALL"] + volumeMounts: + - name: device-plugin + mountPath: /var/lib/kubelet/device-plugins + volumes: + - name: device-plugin + hostPath: + path: /var/lib/kubelet/device-plugins diff --git a/docker/sandbox-bundled/manifests/complete.yaml b/docker/sandbox-bundled/manifests/complete.yaml index 06f86532c6..619a15a8ce 100644 --- a/docker/sandbox-bundled/manifests/complete.yaml +++ b/docker/sandbox-bundled/manifests/complete.yaml @@ -1822,3 +1822,12 @@ spec: updateStrategy: rollingUpdate: {} type: RollingUpdate +--- +apiVersion: helm.cattle.io/v1 +kind: HelmChart +metadata: + name: nvidia-device-plugin + namespace: kube-system +spec: + chart: nvidia-device-plugin + repo: https://nvidia.github.io/k8s-device-plugin