From 42e6c00a1509bbf8d13940b4d3952dbb64dcb3d1 Mon Sep 17 00:00:00 2001 From: Marco Braga Date: Wed, 10 Apr 2024 00:16:03 -0300 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20fix/network/subnets:=20update=20?= =?UTF-8?q?subnets=20before=20tag=20failures?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The subnet information from unmanaged subnets, like Availability Zone name discovered when reconciling subnets, is not updated for additional subnets when the are failures to tag subnets for the preceding subnets. For example, given subnetId1 and subnetId2, if the subnet tag failed when setting tags on subnetId1, the subnetId2 will not be discovered and attributes not correctly set. This change enforce subnet updates before trying to apply tags, and not skip the lopp when checking existing subnets. Additionally, a new option was added to the unit for reconcileSubnets(), allowing to ensure expected SubnetSpec attributes from Subnets{} post reconciliation. --- pkg/cloud/services/network/subnets.go | 24 +-- pkg/cloud/services/network/subnets_test.go | 235 ++++++++++++++++++++- 2 files changed, 243 insertions(+), 16 deletions(-) diff --git a/pkg/cloud/services/network/subnets.go b/pkg/cloud/services/network/subnets.go index 65a70e7445..eb71873a38 100644 --- a/pkg/cloud/services/network/subnets.go +++ b/pkg/cloud/services/network/subnets.go @@ -133,8 +133,17 @@ func (s *Service) reconcileSubnets() error { sub := &subnets[i] existingSubnet := existing.FindEqual(sub) if existingSubnet != nil { - subnetTags := sub.Tags + if len(sub.ID) > 0 { + // NOTE: Describing subnets assumes the subnet.ID is the same as the subnet's identifier (i.e. subnet-), + // if we have a subnet ID specified in the spec, we need to restore it. + existingSubnet.ID = sub.ID + } + + // Update subnet spec with the existing subnet details + existingSubnet.DeepCopyInto(sub) + // Make sure tags are up-to-date. + subnetTags := sub.Tags if err := wait.WaitForWithRetryable(wait.NewBackoff(), func() (bool, error) { buildParams := s.getSubnetTagParams(unmanagedVPC, existingSubnet.GetResourceID(), existingSubnet.IsPublic, existingSubnet.AvailabilityZone, subnetTags) tagsBuilder := tags.New(&buildParams, tags.WithEC2(s.EC2Client)) @@ -151,19 +160,8 @@ func (s *Service) reconcileSubnets() error { // We may not have a permission to tag unmanaged subnets. // When tagging unmanaged subnet fails, record an event and proceed. record.Warnf(s.scope.InfraCluster(), "FailedTagSubnet", "Failed tagging unmanaged Subnet %q: %v", existingSubnet.GetResourceID(), err) - break - } - - // TODO(vincepri): check if subnet needs to be updated. - - if len(sub.ID) > 0 { - // NOTE: Describing subnets assumes the subnet.ID is the same as the subnet's identifier (i.e. subnet-), - // if we have a subnet ID specified in the spec, we need to restore it. - existingSubnet.ID = sub.ID + continue } - - // Update subnet spec with the existing subnet details - existingSubnet.DeepCopyInto(sub) } else if unmanagedVPC { // If there is no existing subnet and we have an umanaged vpc report an error record.Warnf(s.scope.InfraCluster(), "FailedMatchSubnet", "Using unmanaged VPC and failed to find existing subnet for specified subnet id %d, cidr %q", sub.GetResourceID(), sub.CidrBlock) diff --git a/pkg/cloud/services/network/subnets_test.go b/pkg/cloud/services/network/subnets_test.go index 840583a37c..8036b8597c 100644 --- a/pkg/cloud/services/network/subnets_test.go +++ b/pkg/cloud/services/network/subnets_test.go @@ -47,7 +47,9 @@ func TestReconcileSubnets(t *testing.T) { input ScopeBuilder expect func(m *mocks.MockEC2APIMockRecorder) errorExpected bool + errorMessageExpected string tagUnmanagedNetworkResources bool + optionalExpectSubnets infrav1.Subnets }{ { name: "Unmanaged VPC, disable TagUnmanagedNetworkResources, 2 existing subnets in vpc, 2 subnet in spec, subnets match, with routes, should succeed", @@ -486,6 +488,194 @@ func TestReconcileSubnets(t *testing.T) { }, { name: "Unmanaged VPC, 2 existing matching subnets, subnet tagging fails, should succeed", + input: NewClusterScope().WithNetwork(&infrav1.NetworkSpec{ + VPC: infrav1.VPCSpec{ + ID: subnetsVPCID, + }, + Subnets: []infrav1.SubnetSpec{ + { + ID: "subnet-1", + }, + }, + }).WithTagUnmanagedNetworkResources(true), + expect: func(m *mocks.MockEC2APIMockRecorder) { + m.DescribeSubnetsWithContext(context.TODO(), gomock.Eq(&ec2.DescribeSubnetsInput{ + Filters: []*ec2.Filter{ + { + Name: aws.String("state"), + Values: []*string{aws.String("pending"), aws.String("available")}, + }, + { + Name: aws.String("vpc-id"), + Values: []*string{aws.String(subnetsVPCID)}, + }, + }, + })). + Return(&ec2.DescribeSubnetsOutput{ + Subnets: []*ec2.Subnet{ + { + VpcId: aws.String(subnetsVPCID), + SubnetId: aws.String("subnet-1"), + AvailabilityZone: aws.String("us-east-1a"), + CidrBlock: aws.String("10.0.10.0/24"), + MapPublicIpOnLaunch: aws.Bool(false), + }, + }, + }, nil) + + m.DescribeRouteTablesWithContext(context.TODO(), gomock.AssignableToTypeOf(&ec2.DescribeRouteTablesInput{})). + Return(&ec2.DescribeRouteTablesOutput{ + RouteTables: []*ec2.RouteTable{ + { + VpcId: aws.String(subnetsVPCID), + Associations: []*ec2.RouteTableAssociation{ + { + SubnetId: aws.String("subnet-1"), + RouteTableId: aws.String("rt-12345"), + }, + }, + Routes: []*ec2.Route{ + { + GatewayId: aws.String("igw-12345"), + }, + }, + }, + }, + }, nil) + + m.DescribeNatGatewaysPagesWithContext(context.TODO(), + gomock.Eq(&ec2.DescribeNatGatewaysInput{ + Filter: []*ec2.Filter{ + { + Name: aws.String("vpc-id"), + Values: []*string{aws.String(subnetsVPCID)}, + }, + { + Name: aws.String("state"), + Values: []*string{aws.String("pending"), aws.String("available")}, + }, + }, + }), + gomock.Any()).Return(nil) + + m.CreateTagsWithContext(context.TODO(), gomock.Eq(&ec2.CreateTagsInput{ + Resources: aws.StringSlice([]string{"subnet-1"}), + Tags: []*ec2.Tag{ + { + Key: aws.String("kubernetes.io/cluster/test-cluster"), + Value: aws.String("shared"), + }, + { + Key: aws.String("kubernetes.io/role/elb"), + Value: aws.String("1"), + }, + }, + })). + Return(&ec2.CreateTagsOutput{}, fmt.Errorf("tagging failed")) + }, + tagUnmanagedNetworkResources: true, + }, + { + name: "Unmanaged VPC, 2 existing matching subnets, subnet tagging fails with subnet update, should succeed", + input: NewClusterScope().WithNetwork(&infrav1.NetworkSpec{ + VPC: infrav1.VPCSpec{ + ID: subnetsVPCID, + }, + Subnets: []infrav1.SubnetSpec{ + { + ID: "subnet-1", + }, + }, + }).WithTagUnmanagedNetworkResources(true), + optionalExpectSubnets: infrav1.Subnets{ + { + ID: "subnet-1", + ResourceID: "subnet-1", + AvailabilityZone: "us-east-1a", + CidrBlock: "10.0.10.0/24", + IsPublic: true, + Tags: infrav1.Tags{}, + }, + }, + expect: func(m *mocks.MockEC2APIMockRecorder) { + m.DescribeSubnetsWithContext(context.TODO(), gomock.Eq(&ec2.DescribeSubnetsInput{ + Filters: []*ec2.Filter{ + { + Name: aws.String("state"), + Values: []*string{aws.String("pending"), aws.String("available")}, + }, + { + Name: aws.String("vpc-id"), + Values: []*string{aws.String(subnetsVPCID)}, + }, + }, + })). + Return(&ec2.DescribeSubnetsOutput{ + Subnets: []*ec2.Subnet{ + { + VpcId: aws.String(subnetsVPCID), + SubnetId: aws.String("subnet-1"), + AvailabilityZone: aws.String("us-east-1a"), + CidrBlock: aws.String("10.0.10.0/24"), + MapPublicIpOnLaunch: aws.Bool(false), + }, + }, + }, nil) + + m.DescribeRouteTablesWithContext(context.TODO(), gomock.AssignableToTypeOf(&ec2.DescribeRouteTablesInput{})). + Return(&ec2.DescribeRouteTablesOutput{ + RouteTables: []*ec2.RouteTable{ + { + VpcId: aws.String(subnetsVPCID), + Associations: []*ec2.RouteTableAssociation{ + { + SubnetId: aws.String("subnet-1"), + RouteTableId: aws.String("rt-12345"), + }, + }, + Routes: []*ec2.Route{ + { + GatewayId: aws.String("igw-12345"), + }, + }, + }, + }, + }, nil) + + m.DescribeNatGatewaysPagesWithContext(context.TODO(), + gomock.Eq(&ec2.DescribeNatGatewaysInput{ + Filter: []*ec2.Filter{ + { + Name: aws.String("vpc-id"), + Values: []*string{aws.String(subnetsVPCID)}, + }, + { + Name: aws.String("state"), + Values: []*string{aws.String("pending"), aws.String("available")}, + }, + }, + }), + gomock.Any()).Return(nil) + + m.CreateTagsWithContext(context.TODO(), gomock.Eq(&ec2.CreateTagsInput{ + Resources: aws.StringSlice([]string{"subnet-1"}), + Tags: []*ec2.Tag{ + { + Key: aws.String("kubernetes.io/cluster/test-cluster"), + Value: aws.String("shared"), + }, + { + Key: aws.String("kubernetes.io/role/elb"), + Value: aws.String("1"), + }, + }, + })). + Return(&ec2.CreateTagsOutput{}, fmt.Errorf("tagging failed")) + }, + tagUnmanagedNetworkResources: true, + }, + { + name: "Unmanaged VPC, 2 existing matching subnets, subnet tagging fails second call, should succeed", input: NewClusterScope().WithNetwork(&infrav1.NetworkSpec{ VPC: infrav1.VPCSpec{ ID: subnetsVPCID, @@ -524,7 +714,7 @@ func TestReconcileSubnets(t *testing.T) { { VpcId: aws.String(subnetsVPCID), SubnetId: aws.String("subnet-2"), - AvailabilityZone: aws.String("us-east-1a"), + AvailabilityZone: aws.String("us-east-1b"), CidrBlock: aws.String("10.0.20.0/24"), MapPublicIpOnLaunch: aws.Bool(false), }, @@ -548,6 +738,20 @@ func TestReconcileSubnets(t *testing.T) { }, }, }, + { + VpcId: aws.String(subnetsVPCID), + Associations: []*ec2.RouteTableAssociation{ + { + SubnetId: aws.String("subnet-2"), + RouteTableId: aws.String("rt-22222"), + }, + }, + Routes: []*ec2.Route{ + { + GatewayId: aws.String("igw-12345"), + }, + }, + }, }, }, nil) @@ -566,7 +770,7 @@ func TestReconcileSubnets(t *testing.T) { }), gomock.Any()).Return(nil) - m.CreateTagsWithContext(context.TODO(), gomock.Eq(&ec2.CreateTagsInput{ + secondSubnetTag := m.CreateTagsWithContext(context.TODO(), gomock.Eq(&ec2.CreateTagsInput{ Resources: aws.StringSlice([]string{"subnet-1"}), Tags: []*ec2.Tag{ { @@ -579,7 +783,22 @@ func TestReconcileSubnets(t *testing.T) { }, }, })). - Return(&ec2.CreateTagsOutput{}, fmt.Errorf("tagging failed")) + Return(&ec2.CreateTagsOutput{}, nil) + + m.CreateTagsWithContext(context.TODO(), gomock.Eq(&ec2.CreateTagsInput{ + Resources: aws.StringSlice([]string{"subnet-2"}), + Tags: []*ec2.Tag{ + { + Key: aws.String("kubernetes.io/cluster/test-cluster"), + Value: aws.String("shared"), + }, + { + Key: aws.String("kubernetes.io/role/elb"), + Value: aws.String("1"), + }, + }, + })). + Return(&ec2.CreateTagsOutput{}, fmt.Errorf("tagging failed")).After(secondSubnetTag) }, tagUnmanagedNetworkResources: true, }, @@ -2341,6 +2560,16 @@ func TestReconcileSubnets(t *testing.T) { if !tc.errorExpected && err != nil { t.Fatalf("got an unexpected error: %v", err) } + if tc.errorExpected && err != nil && len(tc.errorMessageExpected) > 0 { + if err.Error() != tc.errorMessageExpected { + t.Fatalf("got an unexpected error message: %v", err) + } + } + if len(tc.optionalExpectSubnets) > 0 { + if !cmp.Equal(s.scope.Subnets(), tc.optionalExpectSubnets) { + t.Errorf("got unexpect Subnets():\n%v", cmp.Diff(s.scope.Subnets(), tc.optionalExpectSubnets)) + } + } }) } }