From 35bc67bb1df3a06f1f4aaf009f3aac4afb78fb8e Mon Sep 17 00:00:00 2001 From: Steven Platt Date: Tue, 12 Nov 2024 16:18:23 -0500 Subject: [PATCH 1/6] working base cluster configuration --- spartan/terraform/gke-cluster/main.tf | 173 +++++++++++++++++++++ spartan/terraform/gke-cluster/outputs.tf | 9 ++ spartan/terraform/gke-cluster/variables.tf | 7 + 3 files changed, 189 insertions(+) create mode 100644 spartan/terraform/gke-cluster/main.tf create mode 100644 spartan/terraform/gke-cluster/outputs.tf create mode 100644 spartan/terraform/gke-cluster/variables.tf diff --git a/spartan/terraform/gke-cluster/main.tf b/spartan/terraform/gke-cluster/main.tf new file mode 100644 index 000000000000..d8838496fe52 --- /dev/null +++ b/spartan/terraform/gke-cluster/main.tf @@ -0,0 +1,173 @@ +terraform { + backend "s3" { + bucket = "aztec-terraform" + key = "spartan-gke-cluster/terraform.tfstate" + region = "eu-west-2" + } + required_providers { + google = { + source = "hashicorp/google" + version = "~> 5.0" + } + } +} + +# Configure the Google Cloud provider +provider "google" { + project = "testnet-440309" + region = var.region +} + +# Create the service account +resource "google_service_account" "gke_sa" { + account_id = "gke-nodes-sa" + display_name = "GKE Nodes Service Account" + description = "Service account for GKE nodes" +} + +# Add IAM roles to the service account +resource "google_project_iam_member" "gke_sa_roles" { + for_each = toset([ + "roles/logging.logWriter", + "roles/monitoring.metricWriter", + "roles/monitoring.viewer", + "roles/artifactregistry.reader" + ]) + + project = "testnet-440309" + role = each.key + member = "serviceAccount:${google_service_account.gke_sa.email}" +} + +# Create ingress firewall rule for UDP +resource "google_compute_firewall" "udp_ingress" { + name = "allow-udp-ingress-40400-40499" + network = "default" + + allow { + protocol = "udp" + ports = ["40400-40499"] + } + + direction = "INGRESS" + source_ranges = ["0.0.0.0/0"] + target_tags = ["gke-node"] +} + +# Create egress firewall rule for UDP +resource "google_compute_firewall" "udp_egress" { + name = "allow-udp-egress-40400-40499" + network = "default" + + allow { + protocol = "udp" + ports = ["40400-40499"] + } + + direction = "EGRESS" + destination_ranges = ["0.0.0.0/0"] + target_tags = ["gke-node"] +} + +# Create a GKE cluster +resource "google_container_cluster" "primary" { + name = "spartan-gke" + location = var.zone + initial_node_count = 1 + + # Remove default node pool after cluster creation + remove_default_node_pool = true + + # Kubernetes version + min_master_version = "latest" + + # Network configuration + network = "default" + subnetwork = "default" + + # Master auth configuration + master_auth { + client_certificate_config { + issue_client_certificate = false + } + } +} + +# Create primary node pool with autoscaling +resource "google_container_node_pool" "primary_nodes" { + name = "primary-node-pool" + location = var.zone + cluster = google_container_cluster.primary.name + + # Enable autoscaling + autoscaling { + min_node_count = 1 + max_node_count = 5 + } + + # Node configuration + node_config { + machine_type = "e2-medium" + + service_account = google_service_account.gke_sa.email + oauth_scopes = [ + "https://www.googleapis.com/auth/cloud-platform" + ] + + labels = { + env = "production" + } + + tags = ["gke-node"] + } + + # Management configuration + management { + auto_repair = true + auto_upgrade = true + } +} + +# Create spot instance node pool with autoscaling +resource "google_container_node_pool" "spot_nodes" { + name = "spot-node-pool" + location = var.zone + cluster = google_container_cluster.primary.name + + # Enable autoscaling + autoscaling { + min_node_count = 0 + max_node_count = 10 + } + + # Node configuration + node_config { + machine_type = "e2-medium" + spot = true + + service_account = google_service_account.gke_sa.email + oauth_scopes = [ + "https://www.googleapis.com/auth/cloud-platform" + ] + + labels = { + env = "production" + pool = "spot" + } + + tags = ["gke-node", "spot"] + + # Spot instance termination handler + taint { + key = "cloud.google.com/gke-spot" + value = "true" + effect = "NO_SCHEDULE" + } + } + + # Management configuration + management { + auto_repair = true + auto_upgrade = true + } +} diff --git a/spartan/terraform/gke-cluster/outputs.tf b/spartan/terraform/gke-cluster/outputs.tf new file mode 100644 index 000000000000..bfa7b36406e8 --- /dev/null +++ b/spartan/terraform/gke-cluster/outputs.tf @@ -0,0 +1,9 @@ +# Output the cluster endpoint +output "cluster_endpoint" { + value = google_container_cluster.primary.endpoint +} + +# Output the service account email +output "service_account_email" { + value = google_service_account.gke_sa.email +} diff --git a/spartan/terraform/gke-cluster/variables.tf b/spartan/terraform/gke-cluster/variables.tf new file mode 100644 index 000000000000..1961675a71ea --- /dev/null +++ b/spartan/terraform/gke-cluster/variables.tf @@ -0,0 +1,7 @@ +variable "region" { + default = "us-east4" +} + +variable "zone" { + default = "us-east4-a" +} From 14f1474732b15fe875e9a85e3f2ea8d507e72194 Mon Sep 17 00:00:00 2001 From: Steven Platt Date: Tue, 12 Nov 2024 16:27:56 -0500 Subject: [PATCH 2/6] added gcloud project var --- spartan/terraform/gke-cluster/main.tf | 4 ++-- spartan/terraform/gke-cluster/variables.tf | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/spartan/terraform/gke-cluster/main.tf b/spartan/terraform/gke-cluster/main.tf index d8838496fe52..32f6c270f175 100644 --- a/spartan/terraform/gke-cluster/main.tf +++ b/spartan/terraform/gke-cluster/main.tf @@ -14,7 +14,7 @@ terraform { # Configure the Google Cloud provider provider "google" { - project = "testnet-440309" + project = var.project region = var.region } @@ -34,7 +34,7 @@ resource "google_project_iam_member" "gke_sa_roles" { "roles/artifactregistry.reader" ]) - project = "testnet-440309" + project = var.project role = each.key member = "serviceAccount:${google_service_account.gke_sa.email}" } diff --git a/spartan/terraform/gke-cluster/variables.tf b/spartan/terraform/gke-cluster/variables.tf index 1961675a71ea..555458daa5d0 100644 --- a/spartan/terraform/gke-cluster/variables.tf +++ b/spartan/terraform/gke-cluster/variables.tf @@ -1,3 +1,7 @@ +variable "project" { + default = "testnet-440309" +} + variable "region" { default = "us-east4" } From cfd70959ca14249170bc8df7f2bdb0a7351605dc Mon Sep 17 00:00:00 2001 From: Steven Platt Date: Tue, 12 Nov 2024 17:22:31 -0500 Subject: [PATCH 3/6] updated node types ad eks fw policy --- spartan/terraform/eks-cluster/main.tf | 72 ++++++++++++++++++++++----- spartan/terraform/gke-cluster/main.tf | 4 +- 2 files changed, 62 insertions(+), 14 deletions(-) diff --git a/spartan/terraform/eks-cluster/main.tf b/spartan/terraform/eks-cluster/main.tf index 09ef171443ff..5dc2fe235114 100644 --- a/spartan/terraform/eks-cluster/main.tf +++ b/spartan/terraform/eks-cluster/main.tf @@ -1,8 +1,8 @@ terraform { backend "s3" { - bucket = "aztec-terraform" - key = "spartan/terraform.tfstate" - region = "eu-west-2" + bucket = "aztec-terraform" + key = "spartan/terraform.tfstate" + region = "eu-west-2" } required_providers { @@ -26,6 +26,54 @@ data "aws_availability_zones" "available" { } } +# Create security group for node traffic +resource "aws_security_group" "node_traffic" { + name_prefix = "eks-node-traffic" + description = "Security group for EKS node UDP and TCP traffic" + vpc_id = module.vpc.vpc_id + + # Ingress UDP rule + ingress { + from_port = 40400 + to_port = 40499 + protocol = "udp" + cidr_blocks = ["0.0.0.0/0"] + description = "Allow incoming UDP traffic" + } + + # Ingress TCP rule + ingress { + from_port = 40400 + to_port = 40499 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + description = "Allow incoming TCP traffic" + } + + # Egress UDP rule + egress { + from_port = 40400 + to_port = 40499 + protocol = "udp" + cidr_blocks = ["0.0.0.0/0"] + description = "Allow outgoing UDP traffic" + } + + # Egress TCP rule + egress { + from_port = 40400 + to_port = 40499 + protocol = "tcp" + cidr_blocks = ["0.0.0.0/0"] + description = "Allow outgoing TCP traffic" + } + + tags = { + Name = "${var.cluster_name}-node-traffic" + Project = var.cluster_name + } +} + module "vpc" { source = "terraform-aws-modules/vpc/aws" version = "5.8.1" @@ -33,14 +81,14 @@ module "vpc" { name = var.cluster_name cidr = "10.1.0.0/16" - azs = slice(data.aws_availability_zones.available.names, 0, 3) - private_subnets = ["10.1.1.0/24", "10.1.2.0/24"] - public_subnets = ["10.1.3.0/24", "10.1.4.0/24"] + azs = slice(data.aws_availability_zones.available.names, 0, 3) + private_subnets = ["10.1.1.0/24", "10.1.2.0/24"] + public_subnets = ["10.1.3.0/24", "10.1.4.0/24"] enable_nat_gateway = true single_nat_gateway = true enable_dns_hostnames = true - enable_vpn_gateway = true + enable_vpn_gateway = true public_subnet_tags = { "kubernetes.io/role/elb" = 1 @@ -51,7 +99,7 @@ module "vpc" { } tags = { - Project = var.cluster_name + Project = var.cluster_name } } @@ -83,17 +131,17 @@ module "eks" { eks_managed_node_groups = { default = { - name = "node-group-1" + name = "node-group-1" instance_types = ["m6a.2xlarge"] min_size = 1 - max_size = 2 - desired_size = 1 + max_size = 10 + desired_size = 10 } } tags = { - Project = var.cluster_name + Project = var.cluster_name } } diff --git a/spartan/terraform/gke-cluster/main.tf b/spartan/terraform/gke-cluster/main.tf index 32f6c270f175..f39156a7f219 100644 --- a/spartan/terraform/gke-cluster/main.tf +++ b/spartan/terraform/gke-cluster/main.tf @@ -107,7 +107,7 @@ resource "google_container_node_pool" "primary_nodes" { # Node configuration node_config { - machine_type = "e2-medium" + machine_type = "n1-standard-16" service_account = google_service_account.gke_sa.email oauth_scopes = [ @@ -142,7 +142,7 @@ resource "google_container_node_pool" "spot_nodes" { # Node configuration node_config { - machine_type = "e2-medium" + machine_type = "n1-standard-16" spot = true service_account = google_service_account.gke_sa.email From d2b27a1befc0ddf28a141234b42081c01e8d65d0 Mon Sep 17 00:00:00 2001 From: Steven Platt Date: Tue, 12 Nov 2024 17:29:08 -0500 Subject: [PATCH 4/6] updated terraform outputs --- spartan/terraform/gke-cluster/outputs.tf | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/spartan/terraform/gke-cluster/outputs.tf b/spartan/terraform/gke-cluster/outputs.tf index bfa7b36406e8..1b8d94cf9212 100644 --- a/spartan/terraform/gke-cluster/outputs.tf +++ b/spartan/terraform/gke-cluster/outputs.tf @@ -7,3 +7,13 @@ output "cluster_endpoint" { output "service_account_email" { value = google_service_account.gke_sa.email } + +output "region" { + description = "Google cloud region" + value = var.region +} + +output "kubernetes_cluster_name" { + description = "GKE Cluster Name" + value = google_container_cluster.primary.name +} From af5efab7ef6e25778df239f3853ca82410eb5604 Mon Sep 17 00:00:00 2001 From: Steven Platt Date: Tue, 12 Nov 2024 17:36:05 -0500 Subject: [PATCH 5/6] comment consistency --- spartan/terraform/gke-cluster/outputs.tf | 2 -- 1 file changed, 2 deletions(-) diff --git a/spartan/terraform/gke-cluster/outputs.tf b/spartan/terraform/gke-cluster/outputs.tf index 1b8d94cf9212..befaa28092e9 100644 --- a/spartan/terraform/gke-cluster/outputs.tf +++ b/spartan/terraform/gke-cluster/outputs.tf @@ -1,9 +1,7 @@ -# Output the cluster endpoint output "cluster_endpoint" { value = google_container_cluster.primary.endpoint } -# Output the service account email output "service_account_email" { value = google_service_account.gke_sa.email } From 97e74b2137f5e2155999dec2ada7ce9bd28b51a9 Mon Sep 17 00:00:00 2001 From: Steven Platt Date: Wed, 13 Nov 2024 08:33:18 -0500 Subject: [PATCH 6/6] updated machine types in gke cluster --- spartan/terraform/gke-cluster/main.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spartan/terraform/gke-cluster/main.tf b/spartan/terraform/gke-cluster/main.tf index f39156a7f219..46c1a51dc6c2 100644 --- a/spartan/terraform/gke-cluster/main.tf +++ b/spartan/terraform/gke-cluster/main.tf @@ -107,7 +107,7 @@ resource "google_container_node_pool" "primary_nodes" { # Node configuration node_config { - machine_type = "n1-standard-16" + machine_type = "t2d-standard-16" service_account = google_service_account.gke_sa.email oauth_scopes = [ @@ -142,7 +142,7 @@ resource "google_container_node_pool" "spot_nodes" { # Node configuration node_config { - machine_type = "n1-standard-16" + machine_type = "t2d-standard-16" spot = true service_account = google_service_account.gke_sa.email