From dfec7ccf65d33e545fd0ee1105ed2602853a886e Mon Sep 17 00:00:00 2001 From: Nick Stogner Date: Tue, 16 Apr 2024 16:26:55 -0400 Subject: [PATCH] tpu-provisioner: Add support for Spot Node Pools --- tpu-provisioner/internal/cloud/gke.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tpu-provisioner/internal/cloud/gke.go b/tpu-provisioner/internal/cloud/gke.go index 099e15729..31a2018a1 100644 --- a/tpu-provisioner/internal/cloud/gke.go +++ b/tpu-provisioner/internal/cloud/gke.go @@ -262,6 +262,19 @@ func (g *GKE) nodePoolForPod(name string, p *corev1.Pod) (*containerv1beta1.Node } } + var taints []*containerv1beta1.NodeTaint + + spot := p.Spec.NodeSelector["cloud.google.com/gke-spot"] == "true" + if spot { + // Add the taint that NAP would add. + // https://cloud.google.com/kubernetes-engine/docs/concepts/spot-vms#spotvms-nap + taints = append(taints, &containerv1beta1.NodeTaint{ + Key: "cloud.google.com/gke-spot", + Value: "true", + Effect: "NO_SCHEDULE", + }) + } + var secondaryDisks []containerv1beta1.SecondaryBootDisk if g.ClusterContext.NodeSecondaryDisk != "" { secondaryDisks = []containerv1beta1.SecondaryBootDisk{ @@ -288,6 +301,8 @@ func (g *GKE) nodePoolForPod(name string, p *corev1.Pod) (*containerv1beta1.Node MachineType: machineType, ReservationAffinity: reservation, Labels: labels, + Spot: spot, + Taints: taints, }, InitialNodeCount: int64(nodeCount), Locations: []string{g.ClusterContext.NodeZone},