Skip to content

Commit

Permalink
Updated the Backup script to work with single node
Browse files Browse the repository at this point in the history
Removed hardcoded values for DNS zone in userdata scripts
Updated the NSGs ports based on graphdb_node_count
Updated user_data.tf
Changed the name of the LB target group to avoid conflicts when scaling from 1 to 3 AZs
Updated the monitoring, to not deploy cluster alarms when a single node is deployed
Updated the availability_tests
Updated how the VPC azs are calculated based on the graphdb_node_count
Added calculations for the subnets based on the graphdb_node_count
Removed route53_availability_content_match from modules\monitoring
Moved route53_availability_http_string_type to root level.
Changed the availability test to support single node deployment.
Added dynamic change of the availability tests http string type based on tls_enabled
Updated the README.md
Updated CHANGELOG.md
  • Loading branch information
viktor-ribchev committed Jul 2, 2024
1 parent c04e033 commit be3b0f1
Show file tree
Hide file tree
Showing 17 changed files with 400 additions and 68 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# GraphDB AWS Terraform Module Changelog

# 1.2.0
* Added support for single node deployment
* Added new userdata script `10_start_graphdb_services.sh.tpl` for single node setup.
* Made cluster-related userdata scripts executable only when graphdb_node_count is greater than 1.
* Removed hardcoded values from the userdata scripts.
* Changed the availability tests http_string_type to be calculated based on TLS being enabled.


## 1.0.1

* Updated GraphDB version to [10.6.4](https://graphdb.ontotext.com/documentation/10.6/release-notes.html#graphdb-10-6-4)
Expand Down
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,15 @@ vpc_public_subnet_ids = ["public-subnet-1","public-subnet2","public-subnet-3"]
vpc_private_subnet_ids = ["private-subnet-1","private-subnet-2","private-subnet-3"]
```

## Single Node Deployment

This Terraform module has the ability to deploy a single instance of GraphDB.
To deploy a single instance you just need to set `graphdb_node_count` to 1, everything else happens automatically.

**Important:** While scaling from a single node deployment to a cluster (e.g., from 1 node to 3 nodes) is possible,
it is not recommended. Synchronizing the repository across all nodes can be time-consuming,
potentially causing scripts to time out.

## Updating configurations on an active deployment

### Updating Configurations
Expand Down
30 changes: 25 additions & 5 deletions main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,17 @@ data "aws_region" "current" {}

data "aws_caller_identity" "current" {}


locals {
# Reduce to one subnet if node_count is 1
effective_private_subnet_cidrs = var.graphdb_node_count == 1 ? [var.vpc_private_subnet_cidrs[0]] : var.vpc_private_subnet_cidrs
effective_public_subnet_cidrs = var.graphdb_node_count == 1 ? [var.vpc_public_subnet_cidrs[0]] : var.vpc_public_subnet_cidrs
# Determine the appropriate subnets based on node_count
lb_subnets = var.graphdb_node_count == 1 ? (var.vpc_id == "" ? (var.lb_internal ? [module.vpc[0].private_subnet_ids[0]] : [module.vpc[0].public_subnet_ids[0]]) : (var.lb_internal ? [var.vpc_private_subnet_ids[0]] : [var.vpc_public_subnet_ids[0]])) : (var.vpc_id == "" ? (var.lb_internal ? module.vpc[0].private_subnet_ids : module.vpc[0].public_subnet_ids) : (var.lb_internal ? var.vpc_private_subnet_ids : var.vpc_public_subnet_ids))
# Check if node_count is 1 and select only one subnet if true
graphdb_subnets = var.graphdb_node_count == 1 ? [(var.vpc_id != "" ? var.vpc_private_subnet_ids : module.vpc[0].private_subnet_ids)[0]] : (var.vpc_id != "" ? var.vpc_private_subnet_ids : module.vpc[0].private_subnet_ids)
}

module "vpc" {
source = "./modules/vpc"

Expand All @@ -10,8 +21,8 @@ module "vpc" {
resource_name_prefix = var.resource_name_prefix
vpc_dns_hostnames = var.vpc_dns_hostnames
vpc_dns_support = var.vpc_dns_support
vpc_private_subnet_cidrs = var.vpc_private_subnet_cidrs
vpc_public_subnet_cidrs = var.vpc_public_subnet_cidrs
vpc_private_subnet_cidrs = local.effective_private_subnet_cidrs
vpc_public_subnet_cidrs = local.effective_public_subnet_cidrs
vpc_cidr_block = var.vpc_cidr_block
single_nat_gateway = var.single_nat_gateway
enable_nat_gateway = var.enable_nat_gateway
Expand All @@ -21,6 +32,7 @@ module "vpc" {
vpc_endpoint_service_accept_connection_requests = var.vpc_endpoint_service_accept_connection_requests
vpc_enable_flow_logs = var.vpc_enable_flow_logs
vpc_flow_log_bucket_arn = var.vpc_enable_flow_logs && var.deploy_logging_module ? module.logging[0].graphdb_logging_bucket_arn : null
graphdb_node_count = var.graphdb_node_count
}

module "backup" {
Expand Down Expand Up @@ -93,7 +105,7 @@ module "load_balancer" {

resource_name_prefix = var.resource_name_prefix
vpc_id = var.vpc_id != "" ? var.vpc_id : module.vpc[0].vpc_id
lb_subnets = var.vpc_id == "" ? (var.lb_internal ? module.vpc[0].private_subnet_ids : module.vpc[0].public_subnet_ids) : (var.lb_internal ? var.vpc_private_subnet_ids : var.vpc_public_subnet_ids)
lb_subnets = local.lb_subnets
lb_internal = var.lb_internal
lb_deregistration_delay = var.lb_deregistration_delay
lb_health_check_path = var.lb_health_check_path
Expand All @@ -103,6 +115,7 @@ module "load_balancer" {
lb_tls_policy = var.lb_tls_policy
lb_access_logs_bucket_name = var.lb_enable_access_logs && var.deploy_logging_module ? module.logging[0].graphdb_logging_bucket_name : null
lb_enable_access_logs = var.lb_enable_access_logs
graphdb_node_count = var.graphdb_node_count
}

locals {
Expand All @@ -111,6 +124,11 @@ locals {
)
}

locals {
lb_tls_enabled = var.lb_tls_certificate_arn != null ? true : false
calculated_http_string_type = local.lb_tls_enabled == true ? "HTTPS" : "HTTP"
}

module "monitoring" {
source = "./modules/monitoring"
providers = {
Expand All @@ -129,6 +147,8 @@ module "monitoring" {
cloudwatch_log_group_retention_in_days = var.monitoring_log_group_retention_in_days
route53_availability_request_url = module.load_balancer.lb_dns_name
route53_availability_measure_latency = var.monitoring_route53_measure_latency
graphdb_node_count = var.graphdb_node_count
route53_availability_http_string_type = local.calculated_http_string_type
}

module "graphdb" {
Expand All @@ -142,13 +162,13 @@ module "graphdb" {

allowed_inbound_cidrs = var.allowed_inbound_cidrs_lb
allowed_inbound_cidrs_ssh = var.allowed_inbound_cidrs_ssh
graphdb_subnets = var.vpc_id != "" ? var.vpc_private_subnet_ids : module.vpc[0].private_subnet_ids
graphdb_subnets = local.graphdb_subnets
graphdb_target_group_arns = local.graphdb_target_group_arns
vpc_id = var.vpc_id != "" ? var.vpc_id : module.vpc[0].vpc_id

# Network Load Balancer
lb_enable_private_access = var.lb_internal ? var.lb_enable_private_access : false
lb_subnets = var.vpc_id == "" ? (var.lb_internal ? module.vpc[0].private_subnet_ids : module.vpc[0].public_subnet_ids) : (var.lb_internal ? var.vpc_private_subnet_ids : var.vpc_public_subnet_ids)
lb_subnets = local.lb_subnets
graphdb_lb_dns_name = module.load_balancer.lb_dns_name

# GraphDB Configurations
Expand Down
13 changes: 8 additions & 5 deletions modules/graphdb/nsg.tf
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@ resource "aws_security_group_rule" "graphdb_internal_http" {
description = "Allow GraphDB proxies and nodes to communicate (HTTP)."
security_group_id = aws_security_group.graphdb_security_group.id
type = "ingress"
from_port = 7200
from_port = var.graphdb_node_count == 1 ? 7201 : 7200
to_port = 7201
protocol = "tcp"
cidr_blocks = local.subnet_cidr_blocks
}

resource "aws_security_group_rule" "graphdb_internal_raft" {
count = var.graphdb_node_count != 1 ? 1 : 0

description = "Allow GraphDB proxies and nodes to communicate (Raft)."
security_group_id = aws_security_group.graphdb_security_group.id
type = "ingress"
Expand All @@ -25,7 +27,8 @@ resource "aws_security_group_rule" "graphdb_internal_raft" {
}

resource "aws_security_group_rule" "graphdb_ssh_inbound" {
count = var.allowed_inbound_cidrs_ssh != null ? 1 : 0
count = var.allowed_inbound_cidrs_ssh != null ? 1 : 0

description = "Allow specified CIDRs SSH access to the GraphDB instances."
security_group_id = aws_security_group.graphdb_security_group.id
type = "ingress"
Expand All @@ -51,8 +54,8 @@ resource "aws_security_group_rule" "graphdb_network_lb_ingress" {
description = "CIRDs allowed to access GraphDB."
security_group_id = aws_security_group.graphdb_security_group.id
type = "ingress"
from_port = 7200
to_port = 7200
from_port = var.graphdb_node_count == 1 ? 7201 : 7200
to_port = var.graphdb_node_count == 1 ? 7201 : 7200
protocol = "tcp"
cidr_blocks = var.allowed_inbound_cidrs
}
Expand All @@ -64,7 +67,7 @@ resource "aws_security_group_rule" "graphdb_lb_healthchecks" {
description = "Allow the load balancer to healthcheck the GraphDB nodes and access the proxies."
security_group_id = aws_security_group.graphdb_security_group.id
type = "ingress"
from_port = 7200
from_port = var.graphdb_node_count == 1 ? 7201 : 7200
to_port = 7201
protocol = "tcp"
cidr_blocks = local.lb_subnet_cidr_blocks
Expand Down
32 changes: 22 additions & 10 deletions modules/graphdb/templates/05_gdb_backup_conf.sh.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,16 @@ if [ ${deploy_backup} == "true" ]; then
cat <<-EOF >/usr/bin/graphdb_backup
#!/bin/bash
set -euxo pipefail
set -euo pipefail
GRAPHDB_ADMIN_PASSWORD="\$(aws --cli-connect-timeout 300 ssm get-parameter --region ${region} --name "/${name}/graphdb/admin_password" --with-decryption | jq -r .Parameter.Value | base64 -d)"
NODE_STATE="\$(curl --silent --fail --user "admin:\$GRAPHDB_ADMIN_PASSWORD" localhost:7201/rest/cluster/node/status | jq -r .nodeState)"
if [ "\$NODE_STATE" != "LEADER" ]; then
echo "current node is not a leader, but \$NODE_STATE"
exit 0
fi
NODE_STATE="\$(curl --silent -u "admin:\$GRAPHDB_ADMIN_PASSWORD" http://localhost:7201/rest/cluster/node/status | jq -r .nodeState)"
function trigger_backup {
local backup_name="\$(date +'%Y-%m-%d_%H-%M-%S').tar"
current_time=$(date +"%T %Y-%m-%d")
start_time=$(date +%s)
echo "Creating backup $backup_name at $start_time"
curl \
-vvv --fail \
Expand Down Expand Up @@ -59,9 +57,23 @@ function rotate_backups {
done
}
if ! trigger_backup; then
echo "failed to create backup"
exit 1
# Checks if GraphDB is running in cluster
IS_CLUSTER=\$(
curl -s -o /dev/null \
-u "admin:\$GRAPHDB_ADMIN_PASSWORD" \
-w "%%{http_code}" \
http://localhost:7200/rest/monitor/cluster
)
if [ "\$IS_CLUSTER" == 200 ]; then
# Checks if the current GraphDB instance is Leader, otherwise exits.
if [ "\$NODE_STATE" != "LEADER" ]; then
echo "current node is not a leader, but \$NODE_STATE"
exit 0
fi
(trigger_backup && echo "") | tee -a /var/opt/graphdb/node/graphdb_backup.log
elif [ "\$IS_CLUSTER" == 503 ]; then
(trigger_backup && echo "") | tee -a /var/opt/graphdb/node/graphdb_backup.log
fi
rotate_backups
Expand Down
13 changes: 7 additions & 6 deletions modules/graphdb/templates/08_cluster_setup.sh.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ fi

# Function which waits for all DNS records to be created
wait_dns_records() {
local all_dns_records=($(aws route53 list-resource-record-sets --hosted-zone-id "${zone_id}" --query "ResourceRecordSets[?contains(Name, '.graphdb.cluster') == \`true\`].Name" --output text))
local all_dns_records=($(aws route53 list-resource-record-sets --hosted-zone-id "${zone_id}" --query "ResourceRecordSets[?contains(Name, '.${route53_zone_dns_name}') == \`true\`].Name" --output text))
local all_dns_records_count="$${#all_dns_records[@]}"

if [ "$${all_dns_records_count}" -ne $${NODE_COUNT} ]; then
Expand Down Expand Up @@ -77,7 +77,7 @@ check_gdb() {
wait_dns_records

# Existing records are returned with . at the end
EXISTING_DNS_RECORDS=$(aws route53 list-resource-record-sets --hosted-zone-id "${zone_id}" --query "ResourceRecordSets[?contains(Name, '.graphdb.cluster') == \`true\`].Name")
EXISTING_DNS_RECORDS=$(aws route53 list-resource-record-sets --hosted-zone-id "${zone_id}" --query "ResourceRecordSets[?contains(Name, '.${route53_zone_dns_name}') == \`true\`].Name")
# Convert the output into an array
readarray -t EXISTING_DNS_RECORDS_ARRAY <<<$(echo "$EXISTING_DNS_RECORDS" | jq -r '.[] | rtrimstr(".")')
# Builds grpc addresses for all nodes registered in Route53
Expand Down Expand Up @@ -156,7 +156,8 @@ create_cluster() {
elif [ "$is_cluster" == 503 ]; then
# Create the GraphDB cluster configuration if it does not exist.
local cluster_create=$(
curl -X POST -s http://localhost:7201/rest/cluster/config \
# TODO update to use node-1
curl -X POST -s "http://node-1.${route53_zone_dns_name}:7201/rest/cluster/config" \
-o "/dev/null" \
-w "%%{http_code}" \
-H 'Content-type: application/json' \
Expand Down Expand Up @@ -188,7 +189,7 @@ enable_security() {
# Set the admin password
local set_password=$(
curl --location -s -w "%%{http_code}" \
--request PATCH 'http://localhost:7200/rest/security/users/admin' \
--request PATCH 'http://localhost:7201/rest/security/users/admin' \
--header 'Content-Type: application/json' \
--data "{ \"password\": \"$${GRAPHDB_ADMIN_PASSWORD}\" }"
)
Expand All @@ -204,7 +205,7 @@ enable_security() {
curl -X POST -s -w "%%{http_code}" \
--header 'Content-Type: application/json' \
--header 'Accept: */*' \
-d 'true' 'http://localhost:7200/rest/security'
-d 'true' 'http://localhost:7201/rest/security'
)

if [[ "$enable_security" == 200 ]]; then
Expand All @@ -221,7 +222,7 @@ check_security_status() {
curl -s -X GET \
--header 'Accept: application/json' \
-u "admin:$${GRAPHDB_ADMIN_PASSWORD}" \
'http://localhost:7200/rest/security'
'http://localhost:7201/rest/security'
)

# Check if GDB security is enabled
Expand Down
4 changes: 2 additions & 2 deletions modules/graphdb/templates/09_node_join.sh.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ CURRENT_NODE_NAME=$(hostname)
LEADER_NODE=""
RAFT_DIR="/var/opt/graphdb/node/data/raft"

# Get existing DNS records from Route53 which contain .graphdb.cluster in their name
EXISTING_RECORDS=$(aws route53 list-resource-record-sets --hosted-zone-id "${zone_id}" --query "ResourceRecordSets[?contains(Name, '.graphdb.cluster') == \`true\`].Name")
# Get existing DNS records from Route53 which contain .${route53_zone_dns_name} in their name
EXISTING_RECORDS=$(aws route53 list-resource-record-sets --hosted-zone-id "${zone_id}" --query "ResourceRecordSets[?contains(Name, '.${route53_zone_dns_name}') == \`true\`].Name")
# Use jq to process the JSON output, remove the last dot from each element, and convert it to an array
EXISTING_RECORDS=$(echo "$EXISTING_RECORDS" | jq -r '.[] | rtrimstr(".")')
# Convert the output into an array
Expand Down
Loading

0 comments on commit be3b0f1

Please sign in to comment.