Skip to content

Commit

Permalink
Add annotation to specify which labels to copy to nodes
Browse files Browse the repository at this point in the history
  • Loading branch information
nstogner committed Sep 9, 2024
1 parent fbf2100 commit e00d42d
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 5 deletions.
3 changes: 3 additions & 0 deletions tpu-provisioner/internal/cloud/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ const (

LabelProvisionerNodepoolID = "provisioner-nodepool-id"

// AnnotationCopyLabels is a comma-separated list of labels to copy from the Pod to the node pool config (Nodes).
AnnotationCopyLabels = "tpu-provisioner.cloud.google.com/copy-labels"

EventNodePoolCreationStarted = "NodePoolCreationStarted"
EventNodePoolCreationSucceeded = "NodePoolCreationSucceeded"
EventNodePoolCreationFailed = "NodePoolCreationFailed"
Expand Down
18 changes: 18 additions & 0 deletions tpu-provisioner/internal/cloud/gke.go
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,17 @@ func (g *GKE) nodePoolForPod(name string, p *corev1.Pod) (*containerv1beta1.Node
}
}

// Copy labels specified by annotation to the Node.
for _, key := range strings.Split(getAnnotation(p, AnnotationCopyLabels), ",") {
key = strings.TrimSpace(key)
if key == "" {
continue
}
if val, ok := p.Labels[key]; ok {
labels[key] = val
}
}

for labelKey, labelValue := range p.Spec.NodeSelector {
switch labelKey {
case ICIResiliencyLabel:
Expand Down Expand Up @@ -492,3 +503,10 @@ func min(a, b int) int {
}
return b
}

func getAnnotation(p *corev1.Pod, key string) string {
if p.Annotations == nil {
return ""
}
return p.Annotations[key]
}
43 changes: 38 additions & 5 deletions tpu-provisioner/internal/cloud/gke_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,11 +220,12 @@ func TestPodToNodePoolName(t *testing.T) {
func TestNodePoolForPod(t *testing.T) {
trueVar := true
tests := []struct {
desc string
gkeContext GKEContext
additionalLabels map[string]string
selector map[string]string
want *containerv1beta1.NodePool
desc string
gkeContext GKEContext
additionalLabels map[string]string
additionalAnnotations map[string]string
selector map[string]string
want *containerv1beta1.NodePool
}{
{
desc: "simple case",
Expand Down Expand Up @@ -482,6 +483,38 @@ func TestNodePoolForPod(t *testing.T) {
UpgradeSettings: &container.UpgradeSettings{MaxSurge: 1},
},
},
{
desc: "labels to copy from pod to node by annotation",
additionalLabels: map[string]string{
"copy-me": "val-x",
"dont-copy-me": "val-y",
},
additionalAnnotations: map[string]string{
"tpu-provisioner.cloud.google.com/copy-labels": "copy-me",
},
want: &containerv1beta1.NodePool{
Config: &container.NodeConfig{
Labels: map[string]string{
"google.com/nodepool-manager": "tpu-provisioner",
"google.com/tpu-provisioner-jobset-name": "jobset-test",
"google.com/tpu-provisioner-jobset-namespace": "default",
"google.com/tpu-provisioner-parent-kind": "job",
"google.com/tpu-provisioner-parent-name": "jobset-test-job-1-0",
"google.com/tpu-provisioner-parent-namespace": "default",
"copy-me": "val-x",
},
MachineType: "ct5p-hightpu-4t",
ShieldedInstanceConfig: &container.ShieldedInstanceConfig{EnableIntegrityMonitoring: true},
},
InitialNodeCount: 512,
Locations: []string{""},
Management: &container.NodeManagement{AutoRepair: true, AutoUpgrade: false},
MaxPodsConstraint: &container.MaxPodsConstraint{MaxPodsPerNode: 15},
Name: "test-pool",
PlacementPolicy: &container.PlacementPolicy{TpuTopology: "8x16x16", Type: "COMPACT"},
UpgradeSettings: &container.UpgradeSettings{MaxSurge: 1},
},
},
}
for _, tc := range tests {
t.Run(tc.desc, func(t *testing.T) {
Expand Down

0 comments on commit e00d42d

Please sign in to comment.