Skip to content

Commit

Permalink
k8s: Terraform deployment for GKE clusters
Browse files Browse the repository at this point in the history
This provides a Terraform configuration for deploying our Kubernetes
clusters to GKE. We deploy an identical cluster to each of a list of
regions, with one small node for admin purposes due to a requirement to
not use spot instances for the main node group for the and two
autoscaling groups one with small 8 core nodes for most jobs and one
with bigger nodes for the more resource intensive ones.

This is different to our current scheme where each cluster has a single
node group and we direct jobs in Jenkins. With this scheme we allow the
Kubernetes scheduler to place jobs, or we can still direct them to
specific node sizes using nodeSelector in the jobs and the labels that
are assigned to the nodegroups. This is a more Kubernetes way of doing
things and decouples further from Jenkins.

Signed-off-by: Mark Brown <[email protected]>
  • Loading branch information
broonie committed Nov 28, 2022
1 parent ffe5e7a commit 8c75f81
Show file tree
Hide file tree
Showing 4 changed files with 168 additions and 0 deletions.
127 changes: 127 additions & 0 deletions k8s/gke/gke.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
# FIXME: For real deployment we should store the terraform state
# in cloud storage rather than just the current directory, terraform
# supports Azure blob storage directly. This means configuration
# doesn't need to be on a single machine somewhere.
#
# See https://developer.hashicorp.com/terraform/language/settings/backends/gcs
#
#terraform {
# backend "gcs" {
# resource_group_name = "kernelci-tf-storage"
# storage_account_name = "kernelci-tf"
# container_name = "tfstate"
# key = "workers.terraform.tfstate"
# }
#}

#variable "gke_username" {
# default = ""
# description = "gke username"
#}

#variable "gke_password" {
# default = ""
# description = "gke password"
#}

locals {
regions = toset([
"us-central1",
"europe-west2",
])
}

# GKE cluster
resource "google_container_cluster" "primary" {
for_each = local.regions

name = "${each.key}-workers"
location = each.key

# We can't create a cluster with no node pool defined, but we want to only use
# separately managed node pools. So we create the smallest possible default
# node pool and immediately delete it.
remove_default_node_pool = true
initial_node_count = 1

network = google_compute_network.vpc.name
subnetwork = google_compute_subnetwork.subnet.name
}

# Smaller nodes for most jobs
resource "google_container_node_pool" "small_nodes" {
for_each = local.regions

name = "${each.key}-small-node-pool"
location = each.key
cluster = "${each.key}-workers"

node_config {
oauth_scopes = [
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring",
]

labels = {
"kernelci/worker" = "worker"
"kernelci/worker-size" = "small"
}

# Standard machine, 8 vCPUs, 30G memory
machine_type = "n1-standard-8"
preemptible = true
spot = true
tags = [
"kernelci/worker",
"kernelci/small-worker"
]

metadata = {
disable-legacy-endpoints = "true"
}
}

autoscaling {
min_node_count = 1
max_node_count = 10
}
}

# Bigger nodes for all*config jobs
resource "google_container_node_pool" "big_nodes" {
for_each = local.regions

name = "${each.key}-big-node-pool"
location = each.key
cluster = "${each.key}-workers"

node_config {
oauth_scopes = [
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring",
]

labels = {
"kernelci/worker" = "worker"
"kernelci/worker-size" = "big"
}

# Standard machine, 32 vCPUs, 128G (?) memory
machine_type = "n2-standard-32"
preemptible = true
spot = true
tags = [
"kernelci/worker",
"kernelci/big-worker"
]

metadata = {
disable-legacy-endpoints = "true"
}
}

autoscaling {
min_node_count = 1
max_node_count = 10
}
}
4 changes: 4 additions & 0 deletions k8s/gke/outputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
output "project_id" {
value = var.project_id
description = "GCloud Project ID"
}
11 changes: 11 additions & 0 deletions k8s/gke/versions.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
terraform {
required_providers {
google = {
source = "hashicorp/google"
version = "4.27.0"
}
}

required_version = ">= 0.14"
}

26 changes: 26 additions & 0 deletions k8s/gke/vpc.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
variable "project_id" {
description = "project id"
}

variable "region" {
description = "region"
}

provider "google" {
project = var.project_id
region = var.region
}

# VPC
resource "google_compute_network" "vpc" {
name = "${var.project_id}-vpc"
auto_create_subnetworks = "false"
}

# Subnet
resource "google_compute_subnetwork" "subnet" {
name = "${var.project_id}-subnet"
region = var.region
network = google_compute_network.vpc.name
ip_cidr_range = "10.10.0.0/24"
}

0 comments on commit 8c75f81

Please sign in to comment.