-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
k8s: Terraform deployment for GKE clusters
This provides a Terraform configuration for deploying our Kubernetes clusters to GKE. We deploy an identical cluster to each of a list of regions, with one small node for admin purposes due to a requirement to not use spot instances for the main node group for the and two autoscaling groups one with small 8 core nodes for most jobs and one with bigger nodes for the more resource intensive ones. This is different to our current scheme where each cluster has a single node group and we direct jobs in Jenkins. With this scheme we allow the Kubernetes scheduler to place jobs, or we can still direct them to specific node sizes using nodeSelector in the jobs and the labels that are assigned to the nodegroups. This is a more Kubernetes way of doing things and decouples further from Jenkins. Signed-off-by: Mark Brown <[email protected]>
- Loading branch information
Showing
4 changed files
with
170 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
# FIXME: For real deployment we should store the terraform state | ||
# in cloud storage rather than just the current directory, terraform | ||
# supports Azure blob storage directly. This means configuration | ||
# doesn't need to be on a single machine somewhere. | ||
# | ||
# See https://developer.hashicorp.com/terraform/language/settings/backends/gcs | ||
# | ||
#terraform { | ||
# backend "gcs" { | ||
# resource_group_name = "kernelci-tf-storage" | ||
# storage_account_name = "kernelci-tf" | ||
# container_name = "tfstate" | ||
# key = "workers.terraform.tfstate" | ||
# } | ||
#} | ||
|
||
#variable "gke_username" { | ||
# default = "" | ||
# description = "gke username" | ||
#} | ||
|
||
#variable "gke_password" { | ||
# default = "" | ||
# description = "gke password" | ||
#} | ||
|
||
locals { | ||
regions = toset([ | ||
"us-central1", | ||
"europe-west2", | ||
]) | ||
} | ||
|
||
# GKE cluster | ||
resource "google_container_cluster" "primary" { | ||
for_each = local.regions | ||
|
||
name = "${each.key}-workers" | ||
location = each.key | ||
|
||
# We can't create a cluster with no node pool defined, but we want to only use | ||
# separately managed node pools. So we create the smallest possible default | ||
# node pool and immediately delete it. | ||
remove_default_node_pool = true | ||
initial_node_count = 1 | ||
|
||
network = "${each.key}-vpc" | ||
subnetwork = "${each.key}-subnet" | ||
} | ||
|
||
# Smaller nodes for most jobs | ||
resource "google_container_node_pool" "small_nodes" { | ||
for_each = local.regions | ||
|
||
name = "${each.key}-small-node-pool" | ||
location = each.key | ||
cluster = "${each.key}-workers" | ||
|
||
node_config { | ||
oauth_scopes = [ | ||
"https://www.googleapis.com/auth/logging.write", | ||
"https://www.googleapis.com/auth/monitoring", | ||
] | ||
|
||
labels = { | ||
"kernelci/worker" = "worker" | ||
"kernelci/worker-size" = "small" | ||
} | ||
|
||
# Standard machine, 8 vCPUs, 30G memory | ||
machine_type = "n1-standard-8" | ||
preemptible = true | ||
spot = true | ||
tags = [ | ||
"kernelci/worker", | ||
"kernelci/small-worker" | ||
] | ||
|
||
metadata = { | ||
disable-legacy-endpoints = "true" | ||
} | ||
} | ||
|
||
autoscaling { | ||
min_node_count = 1 | ||
max_node_count = 10 | ||
} | ||
} | ||
|
||
# Bigger nodes for all*config jobs | ||
resource "google_container_node_pool" "big_nodes" { | ||
for_each = local.regions | ||
|
||
name = "${each.key}-big-node-pool" | ||
location = each.key | ||
cluster = "${each.key}-workers" | ||
|
||
node_config { | ||
oauth_scopes = [ | ||
"https://www.googleapis.com/auth/logging.write", | ||
"https://www.googleapis.com/auth/monitoring", | ||
] | ||
|
||
labels = { | ||
"kernelci/worker" = "worker" | ||
"kernelci/worker-size" = "big" | ||
} | ||
|
||
# Standard machine, 32 vCPUs, 128G (?) memory | ||
machine_type = "n2-standard-32" | ||
preemptible = true | ||
spot = true | ||
tags = [ | ||
"kernelci/worker", | ||
"kernelci/big-worker" | ||
] | ||
|
||
metadata = { | ||
disable-legacy-endpoints = "true" | ||
} | ||
} | ||
|
||
autoscaling { | ||
min_node_count = 1 | ||
max_node_count = 10 | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
output "project_id" { | ||
value = var.project_id | ||
description = "GCloud Project ID" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
terraform { | ||
required_providers { | ||
google = { | ||
source = "hashicorp/google" | ||
version = "4.27.0" | ||
} | ||
} | ||
|
||
required_version = ">= 0.14" | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
variable "project_id" { | ||
description = "project id" | ||
} | ||
|
||
variable "region" { | ||
description = "region" | ||
} | ||
|
||
provider "google" { | ||
project = var.project_id | ||
region = var.region | ||
} | ||
|
||
# VPC | ||
resource "google_compute_network" "vpc" { | ||
for_each = local.regions | ||
name = "${each.key}-vpc" | ||
auto_create_subnetworks = "false" | ||
} | ||
|
||
# Subnet | ||
resource "google_compute_subnetwork" "subnet" { | ||
for_each = local.regions | ||
name = "${each.key}-subnet" | ||
region = each.key | ||
network = google_compute_network.vpc[each.value].name | ||
ip_cidr_range = "10.10.0.0/24" | ||
} |