From c1fe93da28673e050bb62333def56e7a435fa595 Mon Sep 17 00:00:00 2001 From: miguelhar <98769216+miguelhar@users.noreply.github.com> Date: Fri, 25 Aug 2023 20:54:19 -0400 Subject: [PATCH] PLAT-7108: import EIP using eipalloc (#143) * PLAT-7108: Import EIP using alloc id, bumps tf-aws-eks, refractor initial stack delete --- .github/workflows/test.yml | 20 +++++- cdk/domino_cdk/provisioners/efs.py | 4 +- .../provisioners/eks/eks_cluster.py | 2 +- cdk/domino_cdk/provisioners/vpc.py | 4 +- convert/lib/convert.py | 65 ++++++++++++++++--- convert/lib/nuke.py | 2 +- 6 files changed, 79 insertions(+), 18 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e8d30bf9..fec9dd6a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -73,13 +73,20 @@ jobs: REGISTRY_PASSWORD: ${{ secrets.REGISTRY_PASSWORD }} ACM_CERT_ARN: ${{ secrets.DELTA_ACM_CERT_ARN }} BASE_DOMAIN: ${{ secrets.DELTA_BASE_DOMAIN }} + JSII_SILENCE_WARNING_DEPRECATED_NODE_VERSION: 1 run: | export NAME=cdk-${GITHUB_SHA:0:6}-$(date +%s) echo "NAME=$NAME" >> $GITHUB_ENV ./util.py generate_config_template --name $NAME --aws-region=$AWS_REGION --aws-account-id=$AWS_ACCOUNT_ID --dev --platform-nodegroups 2 --registry-username $REGISTRY_USERNAME --registry-password $REGISTRY_PASSWORD --hostname $NAME.$BASE_DOMAIN --acm-cert-arn $ACM_CERT_ARN --disable-flow-logs > config.yaml ./util.py load_config -f ./config.yaml - name: Test default config (single and nested stacks) + env: + JSII_SILENCE_WARNING_DEPRECATED_NODE_VERSION: 1 run: | + echo 'CDK acknowledge: AWS CDK v1 End-of-Support June 1, 2023' + cdk acknowledge 19836 + echo 'CDK acknowledge: (eks) eks overly permissive trust policies' + cdk acknowledge 25674 cdk synth --context singlestack=true -q cdk synth -q - name: Upload distribution artifacts @@ -95,11 +102,18 @@ jobs: urlfile=$(python -c 'import sys, urllib.parse; print(urllib.parse.quote(sys.stdin.read().strip()))' <<< "$filename") echo "Artifact url: https://domino-artifacts.s3.amazonaws.com/cdk/$($DATEDIR)/$urlfile" done + - name: Bootstrap CDK + env: + AWS_ACCOUNT_ID: ${{ secrets.DELTA_ACCOUNT_ID }} + AWS_REGION: ${{ env.AWS_REGION }} + JSII_SILENCE_WARNING_DEPRECATED_NODE_VERSION: 1 + run: cdk bootstrap "aws://$AWS_ACCOUNT_ID/$AWS_REGION" - name: Deploy CDK if: contains(github.event.pull_request.labels.*.name, 'deploy-test') || github.ref == 'refs/heads/master' env: REGISTRY_USERNAME: ${{ secrets.REGISTRY_USERNAME }} REGISTRY_PASSWORD: ${{ secrets.REGISTRY_PASSWORD }} + JSII_SILENCE_WARNING_DEPRECATED_NODE_VERSION: 1 run: | docker login -u $REGISTRY_USERNAME -p $REGISTRY_PASSWORD quay.io cdk deploy --require-approval never --outputs-file outputs.json @@ -187,15 +201,17 @@ jobs: aws-region: ${{ env.AWS_REGION }} - name: Delete stack w/CDK if: always() && (contains(github.event.pull_request.labels.*.name, 'deploy-test') || github.ref == 'refs/heads/master') + env: + JSII_SILENCE_WARNING_DEPRECATED_NODE_VERSION: 1 working-directory: ./cdk run: | cdk destroy --force - - name: Destroy Infrastructure + - name: Destroy Infrastructure tf if: always() && (contains(github.event.pull_request.labels.*.name, 'deploy-test') || github.ref == 'refs/heads/master') working-directory: ./convert/terraform run: | terraform destroy -auto-approve - - name: Destroy Infrastructure + - name: Destroy Infrastructure cf if: always() && (contains(github.event.pull_request.labels.*.name, 'deploy-test') || github.ref == 'refs/heads/master') working-directory: ./convert/cloudformation-only run: | diff --git a/cdk/domino_cdk/provisioners/efs.py b/cdk/domino_cdk/provisioners/efs.py index 3d78ff72..d3fd1bbf 100644 --- a/cdk/domino_cdk/provisioners/efs.py +++ b/cdk/domino_cdk/provisioners/efs.py @@ -45,7 +45,7 @@ def provision_efs(self, stack_name: str, cfg: config.EFS, vpc: ec2.Vpc, security security_group=security_group, performance_mode=efs.PerformanceMode.GENERAL_PURPOSE, throughput_mode=efs.ThroughputMode.BURSTING, - vpc_subnets=ec2.SubnetSelection(subnet_type=ec2.SubnetType.PRIVATE), + vpc_subnets=ec2.SubnetSelection(subnet_type=ec2.SubnetType.PRIVATE_WITH_NAT), ) self.efs_access_point = self.efs.add_access_point( @@ -69,7 +69,7 @@ def provision_backup_vault(self, stack_name: str, efs_backup: config.EFS.Backup) vault = backup.BackupVault( self.scope, "efs_backup", - backup_vault_name=f'{stack_name}-efs', + backup_vault_name=f"{stack_name}-efs", removal_policy=cdk.RemovalPolicy[efs_backup.removal_policy or cdk.RemovalPolicy.RETAIN.value], ) diff --git a/cdk/domino_cdk/provisioners/eks/eks_cluster.py b/cdk/domino_cdk/provisioners/eks/eks_cluster.py index d6387ca9..ca2ff17b 100644 --- a/cdk/domino_cdk/provisioners/eks/eks_cluster.py +++ b/cdk/domino_cdk/provisioners/eks/eks_cluster.py @@ -73,7 +73,7 @@ def provision( cluster_name=stack_name, vpc=vpc, endpoint_access=eks.EndpointAccess.PRIVATE if private_api else None, - vpc_subnets=[ec2.SubnetSelection(subnet_type=ec2.SubnetType.PRIVATE)], + vpc_subnets=[ec2.SubnetSelection(subnet_type=ec2.SubnetType.PRIVATE_WITH_NAT)], version=eks_version, default_capacity=0, security_group=eks_sg, diff --git a/cdk/domino_cdk/provisioners/vpc.py b/cdk/domino_cdk/provisioners/vpc.py index 84a2275d..050c45a8 100644 --- a/cdk/domino_cdk/provisioners/vpc.py +++ b/cdk/domino_cdk/provisioners/vpc.py @@ -51,7 +51,7 @@ def provision_vpc(self, stack_name: str, vpc: config.VPC, monitoring_bucket: Opt cidr_mask=vpc.public_cidr_mask, # can't use token ids ), ec2.SubnetConfiguration( - subnet_type=ec2.SubnetType.PRIVATE, + subnet_type=ec2.SubnetType.PRIVATE_WITH_NAT, name=self.private_subnet_name, cidr_mask=vpc.private_cidr_mask, # can't use token ids ), @@ -157,7 +157,7 @@ def provision_vpc(self, stack_name: str, vpc: config.VPC, monitoring_bucket: Opt vpc=self.vpc, security_groups=[endpoint_sg], service=ec2.InterfaceVpcEndpointAwsService(endpoint, port=443), - subnets=ec2.SubnetSelection(subnet_type=ec2.SubnetType.PRIVATE), + subnets=ec2.SubnetSelection(subnet_type=ec2.SubnetType.PRIVATE_WITH_NAT), ) # TODO until https://github.com/aws/aws-cdk/issues/14194 diff --git a/convert/lib/convert.py b/convert/lib/convert.py index ee4bad7d..e972f0a7 100755 --- a/convert/lib/convert.py +++ b/convert/lib/convert.py @@ -41,6 +41,11 @@ def get_stacks(self, stack: str = None, full: bool = False): for r in resources: logical_id = r["LogicalResourceId"] physical_id = r["PhysicalResourceId"] + if r["ResourceType"] == cdk_ids.eip.value: + # Extracting EIP AllocationId from ec2 as its not avaiable from cf + response = self.ec2.describe_addresses(PublicIps=[physical_id]) + if eip_allocation_id := response["Addresses"][0]["AllocationId"]: + physical_id = eip_allocation_id if r["ResourceType"] == cdk_ids.cloudformation_stack.value: for mapped_logical_id, name in stack_map.items(): if logical_id.startswith(mapped_logical_id): @@ -65,6 +70,7 @@ def setup(self, full: bool = False, no_stacks: bool = False): self.cf_stack_key = re.sub(r"\W", "", self.stack_name) self.cf = boto3.client("cloudformation", self.region) + self.ec2 = boto3.client("ec2", self.region) if not no_stacks: self.sanity() @@ -586,6 +592,45 @@ def get_nukes(stack_name, stack_resources): nuke_queue, self.args.remove_security_group_references ) + def _print_stacks_status(self, stacks_names: list): + for stack_name in stacks_names: + stack_status = self.cf.describe_stacks(StackName=stack_name)["Stacks"][0]["StackStatus"] + print("Stack:", stack_name, "Status:", stack_status) + + def _delete_stack_wait_for_fail_state(self, stack_name: str, role_arn: str): + stack_details = self.cf.describe_stacks(StackName=stack_name) + stack_status = stack_details["Stacks"][0]["StackStatus"] + + if stack_status != "DELETE_FAILED": + print(f"Deleting Stack: {stack_name} Status: {stack_status}") + self.cf.delete_stack(StackName=stack_name, RoleARN=role_arn) + + while (stack_status := self.cf.describe_stacks(StackName=stack_name)["Stacks"][0]["StackStatus"]) not in [ + "DELETE_FAILED", + "DELETE_COMPLETED", + ]: + print( + f"Waiting for stack{stack_name} to be in `DELETE_FAILED` or `DELETE_COMPLETED`...Currently: {stack_status}" + ) + sleep(5) + + nested_stacks = self._get_nested_stacks(stack_name) + + self._print_stacks_status(nested_stacks) + + for nested_stack in nested_stacks: + self._delete_stack_wait_for_fail_state(nested_stack, role_arn) + + def _get_nested_stacks(self, stack_name: str) -> list: + stack_resources = self.cf.list_stack_resources(StackName=stack_name) + + nested_stacks = [] + for resource in stack_resources["StackResourceSummaries"]: + if resource["ResourceType"] == cdk_ids.cloudformation_stack.value: + nested_stacks.append(resource.get("PhysicalResourceId")) + + return nested_stacks + def delete_stack(self): self.setup() @@ -599,15 +644,9 @@ def delete_stack(self): exit(1) if self.args.delete: - self.cf.delete_stack(StackName=self.stack_name, RoleARN=cf_only_role["value"]) - - while True: - desc_output = self.cf.describe_stacks(StackName=self.stack_name) - status = desc_output["Stacks"][0]["StackStatus"] - if status == "DELETE_IN_PROGRESS": - sleep(5) - elif status == "DELETE_FAILED": - break + print(f"Forcing {self.stack_name} into `DELETE_FAILED`") + self._print_stacks_status([self.stack_name]) + self._delete_stack_wait_for_fail_state(self.stack_name, cf_only_role["value"]) root_resources = self.cf.describe_stack_resources(StackName=self.stack_name)["StackResources"] @@ -617,7 +656,7 @@ def delete_stack(self): ] } - def get_stack_resources(s) -> dict: + def get_stack_resources(s): child_id = s["PhysicalResourceId"] child_name = re.search(r":stack/(.*)/", child_id).group(1) try: @@ -642,6 +681,12 @@ def get_stack_resources(s) -> dict: for i, (stack, resources) in enumerate(stacks.items()): if self.args.delete: + print("Deleting stack:", stack) + if ( + stack_status := self.cf.describe_stacks(StackName=stack)["Stacks"][0]["StackStatus"] + ) and stack_status != "DELETE_FAILED": + raise Exception(f"Expected stack status to be `DELETE_FAILED` but got: `{stack_status}`.") + self.cf.delete_stack(StackName=stack, RoleARN=cf_only_role["value"], RetainResources=resources) else: if i == 0: diff --git a/convert/lib/nuke.py b/convert/lib/nuke.py index a4115dd3..cb5a09c7 100644 --- a/convert/lib/nuke.py +++ b/convert/lib/nuke.py @@ -104,7 +104,7 @@ def asg(self, group_names: list[str]): tries=60, ) def delete_asg(group: str): - print(self.autoscaling.delete_auto_scaling_group(AutoScalingGroupName=group)) + print(self.autoscaling.delete_auto_scaling_group(AutoScalingGroupName=group, ForceDelete=True)) for group in existing_groups: delete_asg(group)