From ed4ab115b31dea330fe21dc80e362fadeb91ba64 Mon Sep 17 00:00:00 2001 From: Taku Nakajima Date: Tue, 9 May 2023 07:51:54 +0000 Subject: [PATCH 1/2] Count STOPPING state ECS tasks --- drain_instance.py | 35 ++++++++++++++++---- lib/barcelona/network/autoscaling_builder.rb | 1 + 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/drain_instance.py b/drain_instance.py index 73589d89..162202b6 100644 --- a/drain_instance.py +++ b/drain_instance.py @@ -3,7 +3,9 @@ import os import time import random + from botocore.config import Config +from botocore.exceptions import ClientError session = boto3.session.Session() config = Config( @@ -27,6 +29,25 @@ def ciFor(ec2Id): return None, None +def taskExists(clusterName, ciId): + running_tasks = ecs.list_tasks(cluster=clusterName, containerInstance=ciId, desiredStatus='RUNNING')['taskArns'] + if (len(running_tasks) > 0): + return True + + # Assume there are not more than 100 tasks in a container + stopping_tasks = ecs.list_tasks(cluster=clusterName, containerInstance=ciId, desiredStatus='STOPPED')['taskArns'] + for task_arn in stopping_tasks: + response = ecs.describe_tasks( + cluster=clusterName, + tasks=[task_arn] + ) + status = response['tasks'][0]['lastStatus'] + if status != 'STOPPED': + return True + + print('No tasks, will proceed terminating the instance') + return False + def lambda_handler(event, context): msg = json.loads(event['Records'][0]['Sns']['Message']) ec2Id = msg['EC2InstanceId'] @@ -48,13 +69,15 @@ def lambda_handler(event, context): if status != 'DRAINING': ecs.update_container_instances_state(cluster=clusterName,containerInstances=[ciId],status='DRAINING') - tasks = ecs.list_tasks(cluster=clusterName, containerInstance=ciId)['taskArns'] - if len(tasks) > 0: + if taskExists(clusterName, ciId): time.sleep(5) session.client('sns', config=config).publish(TopicArn=topicArn, Message=json.dumps(msg), Subject='Invoking lambda again') else: session.client('autoscaling', config=config).complete_lifecycle_action(LifecycleHookName=lifecycleHookName, AutoScalingGroupName=asgName, LifecycleActionResult='CONTINUE', InstanceId=ec2Id) - except ecs.exceptions.ThrottlingException: - sec = random.uniform(3, 5) - time.sleep(sec) - session.client('sns').publish(TopicArn=topicArn, Message=json.dumps(msg), Subject='Invoking lambda again') + except ClientError as exception_obj: + if exception_obj.response['Error']['Code'] == 'ThrottlingException': + sec = random.uniform(3, 5) + time.sleep(sec) + session.client('sns').publish(TopicArn=topicArn, Message=json.dumps(msg), Subject='Invoking lambda again') + else: + raise diff --git a/lib/barcelona/network/autoscaling_builder.rb b/lib/barcelona/network/autoscaling_builder.rb index de9d6fe3..affc261b 100644 --- a/lib/barcelona/network/autoscaling_builder.rb +++ b/lib/barcelona/network/autoscaling_builder.rb @@ -96,6 +96,7 @@ def build_resources "ecs:DescribeContainerInstances", "ecs:UpdateContainerInstancesState", "ecs:ListTasks", + "ecs:DescribeTasks", "logs:CreateLogGroup", "logs:CreateLogStream", "logs:PutLogEvents", From 433314d01b008226c59bf2d5c2c41bd21234b050 Mon Sep 17 00:00:00 2001 From: Taku Nakajima Date: Tue, 9 May 2023 08:03:27 +0000 Subject: [PATCH 2/2] Fix a spec --- spec/lib/barcelona/network/network_stack_spec.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/spec/lib/barcelona/network/network_stack_spec.rb b/spec/lib/barcelona/network/network_stack_spec.rb index bd387ad4..29728cad 100644 --- a/spec/lib/barcelona/network/network_stack_spec.rb +++ b/spec/lib/barcelona/network/network_stack_spec.rb @@ -195,6 +195,7 @@ "ecs:DescribeContainerInstances", "ecs:UpdateContainerInstancesState", "ecs:ListTasks", + "ecs:DescribeTasks", "logs:CreateLogGroup", "logs:CreateLogStream", "logs:PutLogEvents",