Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added Instance Maintenance Policy #73

Merged
merged 1 commit into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions modules/graphdb/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,11 @@ resource "aws_autoscaling_group" "graphdb_auto_scaling_group" {

target_group_arns = var.graphdb_target_group_arns

instance_maintenance_policy {
min_healthy_percentage = var.instance_maintenance_policy_min_healthy_percentage
max_healthy_percentage = var.instance_maintenance_policy_max_healthy_percentage
}

launch_template {
id = aws_launch_template.graphdb.id
version = aws_launch_template.graphdb.latest_version
Expand Down Expand Up @@ -119,3 +124,4 @@ resource "aws_autoscaling_group" "graphdb_auto_scaling_group" {
}
}
}

66 changes: 63 additions & 3 deletions modules/graphdb/templates/00_functions.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,72 @@
#!/usr/bin/env bash

# Generic helper functions

# Function to print messages with timestamps
# Function to log messages with a timestamp
log_with_timestamp() {
echo "$(date '+%Y-%m-%d %H:%M:%S'): $1"
}

# Function to check ASG node counts
wait_for_asg_nodes() {
local ASG_NAME="$1"
local RETRY_DELAY=10
local MAX_RETRIES=65
local RETRY_COUNT=0

# Get the desired capacity of the ASG
local NODE_COUNT
NODE_COUNT=$(aws autoscaling describe-auto-scaling-groups \
--auto-scaling-group-names "$ASG_NAME" \
--query "AutoScalingGroups[0].DesiredCapacity" \
--output text)

# Check if NODE_COUNT is not an integer
if ! [[ "$NODE_COUNT" =~ ^[0-9]+$ ]]; then
log_with_timestamp "Error: Unable to retrieve valid Desired Capacity for ASG: $ASG_NAME. Received value: $NODE_COUNT."
exit 1
fi

log_with_timestamp "Checking ASG node count for $ASG_NAME with desired node count: $NODE_COUNT"

while true; do
# Check InService and Terminating states via ASG
local IN_SERVICE_NODE_COUNT
IN_SERVICE_NODE_COUNT=$(aws autoscaling describe-auto-scaling-groups \
--auto-scaling-group-names "$ASG_NAME" \
--query "AutoScalingGroups[0].Instances[?LifecycleState=='InService'] | length(@)" \
--output text)

local TERMINATING_NODE_COUNT
TERMINATING_NODE_COUNT=$(aws autoscaling describe-auto-scaling-groups \
--auto-scaling-group-names "$ASG_NAME" \
--query "AutoScalingGroups[0].Instances[?LifecycleState=='Terminating'] | length(@)" \
--output text)

local SHUTTING_DOWN_NODE_COUNT
SHUTTING_DOWN_NODE_COUNT=$(aws ec2 describe-instances \
--filters "Name=instance-state-name,Values=shutting-down" \
--query "Reservations[].Instances[].InstanceId | length(@)" \
--output text)

log_with_timestamp "InService: $IN_SERVICE_NODE_COUNT, Terminating: $TERMINATING_NODE_COUNT, Shutting-down: $SHUTTING_DOWN_NODE_COUNT, Desired: $NODE_COUNT"

if [[ -z "$IN_SERVICE_NODE_COUNT" || "$IN_SERVICE_NODE_COUNT" -le "$NODE_COUNT" ]] \
&& [[ "$TERMINATING_NODE_COUNT" -eq 0 ]] \
&& [[ "$SHUTTING_DOWN_NODE_COUNT" -eq 0 ]]; then
log_with_timestamp "Conditions met: InService <= $NODE_COUNT, no Terminating, no Shutting-down. Proceeding..."
break
else
if [ "$RETRY_COUNT" -ge "$MAX_RETRIES" ]; then
log_with_timestamp "Error: Maximum retry attempts reached. Exiting..."
exit 1
fi

log_with_timestamp "Conditions not met. Waiting... (InService: $IN_SERVICE_NODE_COUNT, Terminating: $TERMINATING_NODE_COUNT, Shutting-down: $SHUTTING_DOWN_NODE_COUNT)"
sleep "$RETRY_DELAY"
viktor-ribchev marked this conversation as resolved.
Show resolved Hide resolved
RETRY_COUNT=$((RETRY_COUNT + 1))
fi
done
}

# Function which waits for all DNS records to be created
wait_dns_records() {
local ZONE_ID="$1"
Expand Down
9 changes: 9 additions & 0 deletions modules/graphdb/templates/01_wait_node_count.sh.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,15 @@ echo "#####################################################"
IMDS_TOKEN=$(curl -Ss -H "X-aws-ec2-metadata-token-ttl-seconds: 6000" -XPUT 169.254.169.254/latest/api/token)
AZ=$(curl -Ss -H "X-aws-ec2-metadata-token: $IMDS_TOKEN" 169.254.169.254/latest/meta-data/placement/availability-zone)
ASG_NAME=${name}
GRAPHDB_NODE_COUNT=${node_count}

# Only run the wait_asg_nodes function if graphdb_node_count is more than 1
if [ "$GRAPHDB_NODE_COUNT" -gt 1 ]; then
echo "GraphDB node count is greater than 1. Running wait_asg_nodes..."
wait_for_asg_nodes "$ASG_NAME"
else
echo "GraphDB node count is 1 or less. Skipping wait_asg_nodes."
fi

instance_refresh_status=$(aws autoscaling describe-instance-refreshes --auto-scaling-group-name "$ASG_NAME" --query 'InstanceRefreshes[?Status==`InProgress`]' --output json)

Expand Down
1 change: 1 addition & 0 deletions modules/graphdb/user_data.tf
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ data "cloudinit_config" "graphdb_user_data" {
content_type = "text/x-shellscript"
content = templatefile("${path.module}/templates/01_wait_node_count.sh.tpl", {
name : var.resource_name_prefix
node_count : var.graphdb_node_count
})
}

Expand Down
12 changes: 12 additions & 0 deletions modules/graphdb/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -394,3 +394,15 @@ variable "ebs_default_kms_key" {
description = "Define default KMS key"
type = string
}

variable "instance_maintenance_policy_min_healthy_percentage" {
description = "Define minimum healthy percentage for the Instance Maintenance Policy"
type = number
default = 66
}

variable "instance_maintenance_policy_max_healthy_percentage" {
description = "Define maximum healthy percentage for the Instance Maintenance Policy"
type = number
default = 100
}
Loading