diff --git a/ml-platform/01_gcp_project/backend.tf b/ml-platform/01_gcp_project/backend.tf index 5b9bff1bd..b54d5aca8 100644 --- a/ml-platform/01_gcp_project/backend.tf +++ b/ml-platform/01_gcp_project/backend.tf @@ -18,4 +18,3 @@ terraform { bucket = "YOUR_STATE_BUCKET" } } - diff --git a/ml-platform/01_gcp_project/main.tf b/ml-platform/01_gcp_project/main.tf index 305bfce2c..1dadd943e 100644 --- a/ml-platform/01_gcp_project/main.tf +++ b/ml-platform/01_gcp_project/main.tf @@ -13,10 +13,10 @@ # limitations under the License. module "gcp-project" { - source = "./modules/projects" - org_id = var.org_id - folder_id = var.folder_id - env = var.env + source = "./modules/projects" + org_id = var.org_id + folder_id = var.folder_id + env = var.env billing_account = var.billing_account - project_name = var.project_name + project_name = var.project_name } diff --git a/ml-platform/01_gcp_project/modules/projects/outputs.tf b/ml-platform/01_gcp_project/modules/projects/outputs.tf index e087e6c85..431fe53dd 100644 --- a/ml-platform/01_gcp_project/modules/projects/outputs.tf +++ b/ml-platform/01_gcp_project/modules/projects/outputs.tf @@ -14,4 +14,4 @@ output "project_ids" { value = "${google_project.project_under_folder}" == {} ? "${google_project.project_under_org}" : "${google_project.project_under_folder}" -} \ No newline at end of file +} diff --git a/ml-platform/01_gcp_project/modules/projects/projects.tf b/ml-platform/01_gcp_project/modules/projects/projects.tf index 55c88ee86..76f7d1ef3 100644 --- a/ml-platform/01_gcp_project/modules/projects/projects.tf +++ b/ml-platform/01_gcp_project/modules/projects/projects.tf @@ -17,18 +17,18 @@ resource "random_id" "random_project_id_suffix" { } resource "google_project" "project_under_folder" { - for_each = var.folder_id != null ? var.env : toset([]) - name = format("%s-%s",var.project_name,each.value) - project_id = format("%s-%s-%s",var.project_name,random_id.random_project_id_suffix.hex,each.value) - folder_id = var.folder_id + for_each = var.folder_id != null ? var.env : toset([]) + name = format("%s-%s", var.project_name, each.value) + project_id = format("%s-%s-%s", var.project_name, random_id.random_project_id_suffix.hex, each.value) + folder_id = var.folder_id billing_account = var.billing_account } resource "google_project" "project_under_org" { - for_each = var.folder_id == null ? var.env : toset([]) - name = format("%s-%s",var.project_name,each.value) - project_id = format("%s-%s-%s",var.project_name,random_id.random_project_id_suffix.hex,each.value) - org_id = var.org_id + for_each = var.folder_id == null ? var.env : toset([]) + name = format("%s-%s", var.project_name, each.value) + project_id = format("%s-%s-%s", var.project_name, random_id.random_project_id_suffix.hex, each.value) + org_id = var.org_id billing_account = var.billing_account } @@ -38,7 +38,7 @@ resource "google_project_service" "project_services" { service = "cloudresourcemanager.googleapis.com" disable_on_destroy = true disable_dependent_services = true - depends_on = [google_project.project_under_folder,google_project.project_under_org] + depends_on = [google_project.project_under_folder, google_project.project_under_org] } resource "google_project_service" "project_services-1" { @@ -47,25 +47,25 @@ resource "google_project_service" "project_services-1" { service = "iam.googleapis.com" disable_on_destroy = true disable_dependent_services = true - depends_on = [google_project.project_under_folder,google_project.project_under_org] + depends_on = [google_project.project_under_folder, google_project.project_under_org] } resource "google_project_service" "project_services-2" { - for_each = var.folder_id == null ? google_project.project_under_org: google_project.project_under_folder + for_each = var.folder_id == null ? google_project.project_under_org : google_project.project_under_folder project = each.value.id service = "container.googleapis.com" disable_on_destroy = true disable_dependent_services = true - depends_on = [google_project.project_under_folder,google_project.project_under_org] + depends_on = [google_project.project_under_folder, google_project.project_under_org] } resource "google_project_service" "project_services-3" { - for_each = var.folder_id == null ? google_project.project_under_org: google_project.project_under_folder + for_each = var.folder_id == null ? google_project.project_under_org : google_project.project_under_folder project = each.value.id service = "compute.googleapis.com" disable_on_destroy = true disable_dependent_services = true - depends_on = [google_project.project_under_folder,google_project.project_under_org] + depends_on = [google_project.project_under_folder, google_project.project_under_org] } resource "google_project_service" "project_services-4" { @@ -74,7 +74,7 @@ resource "google_project_service" "project_services-4" { service = "anthos.googleapis.com" disable_on_destroy = true disable_dependent_services = true - depends_on = [google_project.project_under_folder,google_project.project_under_org] + depends_on = [google_project.project_under_folder, google_project.project_under_org] } resource "google_project_service" "project_services-5" { @@ -83,7 +83,7 @@ resource "google_project_service" "project_services-5" { service = "anthosconfigmanagement.googleapis.com" disable_on_destroy = true disable_dependent_services = true - depends_on = [google_project.project_under_folder,google_project.project_under_org] + depends_on = [google_project.project_under_folder, google_project.project_under_org] } resource "google_project_service" "project_services-6" { @@ -92,5 +92,5 @@ resource "google_project_service" "project_services-6" { service = "gkehub.googleapis.com" disable_on_destroy = true disable_dependent_services = true - depends_on = [google_project.project_under_folder,google_project.project_under_org] -} \ No newline at end of file + depends_on = [google_project.project_under_folder, google_project.project_under_org] +} diff --git a/ml-platform/01_gcp_project/outputs.tf b/ml-platform/01_gcp_project/outputs.tf index 7e4d72a6c..11352c942 100644 --- a/ml-platform/01_gcp_project/outputs.tf +++ b/ml-platform/01_gcp_project/outputs.tf @@ -13,5 +13,5 @@ # limitations under the License. output "project_ids" { - value = {for k, v in "${module.gcp-project.project_ids}" : k => v.project_id} -} \ No newline at end of file + value = { for k, v in "${module.gcp-project.project_ids}" : k => v.project_id } +} diff --git a/ml-platform/01_gcp_project/providers.tf b/ml-platform/01_gcp_project/providers.tf index 1817d23eb..95ff9fe61 100644 --- a/ml-platform/01_gcp_project/providers.tf +++ b/ml-platform/01_gcp_project/providers.tf @@ -19,4 +19,4 @@ terraform { version = "4.72.1" } } -} \ No newline at end of file +} diff --git a/ml-platform/02_gke/main.tf b/ml-platform/02_gke/main.tf index 2d8ed3dea..d8fbc0b21 100644 --- a/ml-platform/02_gke/main.tf +++ b/ml-platform/02_gke/main.tf @@ -13,11 +13,11 @@ # limitations under the License. data "terraform_remote_state" "gcp-projects" { - count = length(keys("${var.project_id}")) == 0 ? 1 : 0 + count = length(keys("${var.project_id}")) == 0 ? 1 : 0 backend = "gcs" config = { - bucket = var.lookup_state_bucket - prefix = "01_gcp_project" + bucket = var.lookup_state_bucket + prefix = "01_gcp_project" } } @@ -26,17 +26,17 @@ locals { } module "create-vpc" { - for_each = local.parsed_project_id - source = "./modules/network" - project_id = each.value - network_name = format("%s-%s",var.network_name,each.key) - routing_mode = var.routing_mode - subnet_01_name = format("%s-%s",var.subnet_01_name,each.key) - subnet_01_ip = var.subnet_01_ip - subnet_01_region = var.subnet_01_region - subnet_02_name = format("%s-%s",var.subnet_02_name,each.key) - subnet_02_ip = var.subnet_02_ip - subnet_02_region = var.subnet_02_region + for_each = local.parsed_project_id + source = "./modules/network" + project_id = each.value + network_name = format("%s-%s", var.network_name, each.key) + routing_mode = var.routing_mode + subnet_01_name = format("%s-%s", var.subnet_01_name, each.key) + subnet_01_ip = var.subnet_01_ip + subnet_01_region = var.subnet_01_region + subnet_02_name = format("%s-%s", var.subnet_02_name, each.key) + subnet_02_ip = var.subnet_02_ip + subnet_02_region = var.subnet_02_region #default_route_name = format("%s-%s","default-route",each.key) } @@ -49,69 +49,69 @@ resource "google_gke_hub_feature" "configmanagement_acm_feature" { } module "gke" { - for_each = local.parsed_project_id - source = "./modules/cluster" - cluster_name = format("%s-%s",var.cluster_name,each.key) - network = module.create-vpc[each.key].vpc - subnet = module.create-vpc[each.key].subnet-1 - project_id = each.value - region = var.subnet_01_region - zone = "${var.subnet_01_region}-a" - master_auth_networks_ipcidr = var.subnet_01_ip - depends_on = [ google_gke_hub_feature.configmanagement_acm_feature ] - env = each.key + for_each = local.parsed_project_id + source = "./modules/cluster" + cluster_name = format("%s-%s", var.cluster_name, each.key) + network = module.create-vpc[each.key].vpc + subnet = module.create-vpc[each.key].subnet-1 + project_id = each.value + region = var.subnet_01_region + zone = "${var.subnet_01_region}-a" + master_auth_networks_ipcidr = var.subnet_01_ip + depends_on = [google_gke_hub_feature.configmanagement_acm_feature] + env = each.key } module "reservation" { - for_each = local.parsed_project_id - source = "./modules/vm-reservations" - cluster_name = module.gke[each.key].cluster_name - zone = "${var.subnet_01_region}-a" - project_id = each.value - depends_on = [ module.gke ] + for_each = local.parsed_project_id + source = "./modules/vm-reservations" + cluster_name = module.gke[each.key].cluster_name + zone = "${var.subnet_01_region}-a" + project_id = each.value + depends_on = [module.gke] } module "node_pool-reserved" { - for_each = local.parsed_project_id - source = "./modules/node-pools" - node_pool_name = "reservation" - project_id = each.value - cluster_name = module.gke[each.key].cluster_name - region = "${var.subnet_01_region}" - taints = var.reserved_taints - resource_type = "reservation" + for_each = local.parsed_project_id + source = "./modules/node-pools" + node_pool_name = "reservation" + project_id = each.value + cluster_name = module.gke[each.key].cluster_name + region = var.subnet_01_region + taints = var.reserved_taints + resource_type = "reservation" reservation_name = module.reservation[each.key].reservation_name } module "node_pool-ondemand" { - for_each = local.parsed_project_id - source = "./modules/node-pools" + for_each = local.parsed_project_id + source = "./modules/node-pools" node_pool_name = "ondemand" - project_id = each.value + project_id = each.value cluster_name = module.gke[each.key].cluster_name - region = "${var.subnet_01_region}" - taints = var.ondemand_taints - resource_type = "ondemand" + region = var.subnet_01_region + taints = var.ondemand_taints + resource_type = "ondemand" } module "node_pool-spot" { - for_each = local.parsed_project_id - source = "./modules/node-pools" + for_each = local.parsed_project_id + source = "./modules/node-pools" node_pool_name = "spot" - project_id = each.value + project_id = each.value cluster_name = module.gke[each.key].cluster_name - region = "${var.subnet_01_region}" - taints = var.spot_taints - resource_type = "spot" + region = var.subnet_01_region + taints = var.spot_taints + resource_type = "spot" } module "cloud-nat" { - for_each = local.parsed_project_id - source = "./modules/cloud-nat" - project_id = each.value - region = split("/", module.create-vpc[each.key].subnet-1)[3] - name = format("%s-%s","nat-for-acm",each.key) - network = module.create-vpc[each.key].vpc - create_router = true - router = format("%s-%s","router-for-acm",each.key) - depends_on = [ module.create-vpc ] + for_each = local.parsed_project_id + source = "./modules/cloud-nat" + project_id = each.value + region = split("/", module.create-vpc[each.key].subnet-1)[3] + name = format("%s-%s", "nat-for-acm", each.key) + network = module.create-vpc[each.key].vpc + create_router = true + router = format("%s-%s", "router-for-acm", each.key) + depends_on = [module.create-vpc] } diff --git a/ml-platform/02_gke/modules/cloud-nat/outputs.tf b/ml-platform/02_gke/modules/cloud-nat/outputs.tf index 86bf7c39d..acd7f8ce6 100644 --- a/ml-platform/02_gke/modules/cloud-nat/outputs.tf +++ b/ml-platform/02_gke/modules/cloud-nat/outputs.tf @@ -31,4 +31,3 @@ output "router_name" { description = "Cloud NAT router name" value = local.router } - diff --git a/ml-platform/02_gke/modules/cloud-nat/versions.tf b/ml-platform/02_gke/modules/cloud-nat/versions.tf index 8422786e6..ee7532c5e 100644 --- a/ml-platform/02_gke/modules/cloud-nat/versions.tf +++ b/ml-platform/02_gke/modules/cloud-nat/versions.tf @@ -16,7 +16,7 @@ terraform { required_providers { google = { - source = "hashicorp/google" + source = "hashicorp/google" #version = ">= 4.51, < 5.0" version = "4.72.1" } diff --git a/ml-platform/02_gke/modules/cluster/gke.tf b/ml-platform/02_gke/modules/cluster/gke.tf index 34186dbc8..418068752 100644 --- a/ml-platform/02_gke/modules/cluster/gke.tf +++ b/ml-platform/02_gke/modules/cluster/gke.tf @@ -107,14 +107,14 @@ resource "google_container_cluster" "gke_batch" { channel = "RAPID" } private_cluster_config { - enable_private_nodes = true + enable_private_nodes = true enable_private_endpoint = true - master_ipv4_cidr_block = "172.16.0.32/28" + master_ipv4_cidr_block = "172.16.0.32/28" } master_authorized_networks_config { cidr_blocks { - cidr_block = var.master_auth_networks_ipcidr + cidr_block = var.master_auth_networks_ipcidr display_name = "vpc-cidr" } } diff --git a/ml-platform/02_gke/modules/cluster/outputs.tf b/ml-platform/02_gke/modules/cluster/outputs.tf index b26d3be8e..57bd8a0de 100644 --- a/ml-platform/02_gke/modules/cluster/outputs.tf +++ b/ml-platform/02_gke/modules/cluster/outputs.tf @@ -30,4 +30,4 @@ output "gke_project_id" { output "env" { value = var.env -} \ No newline at end of file +} diff --git a/ml-platform/02_gke/modules/cluster/variables.tf b/ml-platform/02_gke/modules/cluster/variables.tf index 66e3cda06..5d76462c4 100644 --- a/ml-platform/02_gke/modules/cluster/variables.tf +++ b/ml-platform/02_gke/modules/cluster/variables.tf @@ -36,23 +36,23 @@ variable "zone" { } variable "master_auth_networks_ipcidr" { - type = string + type = string description = "master authorized network" } variable "network" { - type = string + type = string description = "VPC network where the cluster will be created" } variable "subnet" { - type = string + type = string description = "subnetwork where the cluster will be created" } variable "env" { - type = string + type = string description = "environment" -} \ No newline at end of file +} diff --git a/ml-platform/02_gke/modules/cluster/versions.tf b/ml-platform/02_gke/modules/cluster/versions.tf index dc628619e..fc374eab1 100644 --- a/ml-platform/02_gke/modules/cluster/versions.tf +++ b/ml-platform/02_gke/modules/cluster/versions.tf @@ -24,4 +24,3 @@ terraform { } } } - diff --git a/ml-platform/02_gke/modules/network/outputs.tf b/ml-platform/02_gke/modules/network/outputs.tf index bf9d36dad..13026f645 100644 --- a/ml-platform/02_gke/modules/network/outputs.tf +++ b/ml-platform/02_gke/modules/network/outputs.tf @@ -25,4 +25,4 @@ output "subnet-1" { output "subnet-2" { value = google_compute_subnetwork.subnet-2.id description = "subnet2." -} \ No newline at end of file +} diff --git a/ml-platform/02_gke/modules/network/variables.tf b/ml-platform/02_gke/modules/network/variables.tf index e85ab0e48..db344133d 100644 --- a/ml-platform/02_gke/modules/network/variables.tf +++ b/ml-platform/02_gke/modules/network/variables.tf @@ -14,43 +14,46 @@ variable "project_id" { description = "Id of the GCP project where VPC is to be created." - type = string + type = string } + variable "network_name" { description = "Name of the VPC network." - type = string + type = string } + variable "routing_mode" { description = "The network routing mode." - type = string - default = "GLOBAL" + type = string + default = "GLOBAL" } + variable "subnet_01_name" { description = "Name of first subnet." - type = string + type = string } + variable "subnet_01_ip" { description = "IP range of first subnet." - type = string + type = string } + variable "subnet_01_region" { description = "Region of first subnet." - type = string + type = string } variable "subnet_02_name" { description = "Name of the second subnet." - type = string + type = string } + variable "subnet_02_ip" { description = "IP range of second subnet." - type = string + type = string } + variable "subnet_02_region" { description = "Region of second subnet." - type = string + type = string } -//variable "default_route_name" { -// description = "Name of the default route to internet." -// type = string -//} diff --git a/ml-platform/02_gke/modules/network/versions.tf b/ml-platform/02_gke/modules/network/versions.tf index 033f83d8f..c5f8c84a4 100644 --- a/ml-platform/02_gke/modules/network/versions.tf +++ b/ml-platform/02_gke/modules/network/versions.tf @@ -19,4 +19,4 @@ terraform { version = ">= 4.28.0" } } -} \ No newline at end of file +} diff --git a/ml-platform/02_gke/modules/network/vpc.tf b/ml-platform/02_gke/modules/network/vpc.tf index ad7071b5a..a80166be5 100644 --- a/ml-platform/02_gke/modules/network/vpc.tf +++ b/ml-platform/02_gke/modules/network/vpc.tf @@ -13,34 +13,26 @@ # limitations under the License. resource "google_compute_network" "vpc-network" { - project = var.project_id - name = var.network_name - auto_create_subnetworks = false - routing_mode = var.routing_mode + project = var.project_id + name = var.network_name + auto_create_subnetworks = false + routing_mode = var.routing_mode } resource "google_compute_subnetwork" "subnet-1" { - project = var.project_id - name = var.subnet_01_name - ip_cidr_range = var.subnet_01_ip - region = var.subnet_01_region - network = google_compute_network.vpc-network.id + project = var.project_id + name = var.subnet_01_name + ip_cidr_range = var.subnet_01_ip + region = var.subnet_01_region + network = google_compute_network.vpc-network.id private_ip_google_access = true } resource "google_compute_subnetwork" "subnet-2" { - project = var.project_id - name = var.subnet_02_name - ip_cidr_range = var.subnet_02_ip - region = var.subnet_02_region - network = google_compute_network.vpc-network.id + project = var.project_id + name = var.subnet_02_name + ip_cidr_range = var.subnet_02_ip + region = var.subnet_02_region + network = google_compute_network.vpc-network.id private_ip_google_access = true } - -//resource "google_compute_route" "default-route" { -//name = var.default_route_name -//dest_range = "0.0.0.0/0" -//network = google_compute_network.vpc-network.id -//priority = 1000 -//next_hop_gateway = "default-internet-gateway" -//} diff --git a/ml-platform/02_gke/modules/node-pools/nodepools.tf b/ml-platform/02_gke/modules/node-pools/nodepools.tf index 6eec2bc7d..402e45695 100644 --- a/ml-platform/02_gke/modules/node-pools/nodepools.tf +++ b/ml-platform/02_gke/modules/node-pools/nodepools.tf @@ -13,19 +13,19 @@ # limitations under the License. resource "google_container_node_pool" "node-pool" { - name = format("%s-%s",var.cluster_name,var.node_pool_name) - project = var.project_id - cluster = var.cluster_name - location = var.region + name = format("%s-%s", var.cluster_name, var.node_pool_name) + project = var.project_id + cluster = var.cluster_name + location = var.region node_config { machine_type = var.machine_type - taint = var.taints + taint = var.taints labels = { "resource-type" : var.resource_type } guest_accelerator { - type = var.accelerator + type = var.accelerator count = var.accelerator_count } oauth_scopes = [ @@ -33,11 +33,11 @@ resource "google_container_node_pool" "node-pool" { ] dynamic "reservation_affinity" { - for_each = var.reservation_name != "" ? [1] : [ ] + for_each = var.reservation_name != "" ? [1] : [] content { consume_reservation_type = "SPECIFIC_RESERVATION" - key = "compute.googleapis.com/reservation-name" - values = [var.reservation_name] + key = "compute.googleapis.com/reservation-name" + values = [var.reservation_name] } } } @@ -61,4 +61,4 @@ resource "google_container_node_pool" "node-pool" { network_config { enable_private_nodes = true } -} \ No newline at end of file +} diff --git a/ml-platform/02_gke/modules/node-pools/variables.tf b/ml-platform/02_gke/modules/node-pools/variables.tf index f217268b8..973d7a1fe 100644 --- a/ml-platform/02_gke/modules/node-pools/variables.tf +++ b/ml-platform/02_gke/modules/node-pools/variables.tf @@ -13,19 +13,22 @@ # limitations under the License. variable "node_pool_name" { - type = string + type = string description = "Name of the node pool" } + variable "project_id" { type = string description = "The GCP project where the resources will be created" default = "" } + variable "cluster_name" { type = string description = "GKE cluster name" default = "" } + variable "region" { type = string description = "The GCP zone where the reservation will be created" @@ -49,11 +52,10 @@ variable "taints" { variable "resource_type" { description = "ondemand/spot/reserved." - type = string - default = "ondemand" + type = string + default = "ondemand" } - variable "accelerator" { type = string description = "The GPU accelerator to use." @@ -65,6 +67,7 @@ variable "accelerator_count" { description = "The number of accelerators per machine." default = 2 } + variable "machine_reservation_count" { type = number description = "Number of machines reserved instances with GPUs" @@ -72,12 +75,12 @@ variable "machine_reservation_count" { } variable "autoscaling" { - type = map - default = { "total_min_node_count" : 0, "total_max_node_count" : 24, "location_policy" : "ANY"} + type = map(any) + default = { "total_min_node_count" : 0, "total_max_node_count" : 24, "location_policy" : "ANY" } } variable "reservation_name" { description = "reservation name to which the nodepool will be associated" - type = string - default = "" -} \ No newline at end of file + type = string + default = "" +} diff --git a/ml-platform/02_gke/modules/node-pools/versions.tf b/ml-platform/02_gke/modules/node-pools/versions.tf index dc628619e..fc374eab1 100644 --- a/ml-platform/02_gke/modules/node-pools/versions.tf +++ b/ml-platform/02_gke/modules/node-pools/versions.tf @@ -24,4 +24,3 @@ terraform { } } } - diff --git a/ml-platform/02_gke/modules/vm-reservations/outputs.tf b/ml-platform/02_gke/modules/vm-reservations/outputs.tf index 367c796d1..11ffcc6d8 100644 --- a/ml-platform/02_gke/modules/vm-reservations/outputs.tf +++ b/ml-platform/02_gke/modules/vm-reservations/outputs.tf @@ -13,5 +13,5 @@ # limitations under the License. output "reservation_name" { - value = split("/",google_compute_reservation.machine_reservation.id)[5] -} \ No newline at end of file + value = split("/", google_compute_reservation.machine_reservation.id)[5] +} diff --git a/ml-platform/02_gke/modules/vm-reservations/reservations.tf b/ml-platform/02_gke/modules/vm-reservations/reservations.tf index 3e35e47c5..03438d0f7 100644 --- a/ml-platform/02_gke/modules/vm-reservations/reservations.tf +++ b/ml-platform/02_gke/modules/vm-reservations/reservations.tf @@ -15,7 +15,7 @@ resource "google_compute_reservation" "machine_reservation" { project = var.project_id specific_reservation_required = true - name = format("%s-%s",var.cluster_name,"reservation") + name = format("%s-%s", var.cluster_name, "reservation") zone = var.zone specific_reservation { count = var.machine_reservation_count @@ -27,4 +27,4 @@ resource "google_compute_reservation" "machine_reservation" { } } } -} \ No newline at end of file +} diff --git a/ml-platform/02_gke/modules/vm-reservations/variables.tf b/ml-platform/02_gke/modules/vm-reservations/variables.tf index 3a8e3482d..7ca5e5af3 100644 --- a/ml-platform/02_gke/modules/vm-reservations/variables.tf +++ b/ml-platform/02_gke/modules/vm-reservations/variables.tf @@ -17,16 +17,19 @@ variable "project_id" { description = "The GCP project where the resources will be created" default = "" } + variable "cluster_name" { type = string description = "GKE cluster name" default = "" } + variable "zone" { type = string description = "The GCP zone where the reservation will be created" default = "us-central1-a" } + variable "machine_type" { type = string description = "The machine type to use." @@ -44,6 +47,7 @@ variable "accelerator_count" { description = "The number of accelerators per machine." default = 2 } + variable "machine_reservation_count" { type = number description = "Number of machines reserved instances with GPUs" diff --git a/ml-platform/02_gke/modules/vm-reservations/versions.tf b/ml-platform/02_gke/modules/vm-reservations/versions.tf index dc628619e..fc374eab1 100644 --- a/ml-platform/02_gke/modules/vm-reservations/versions.tf +++ b/ml-platform/02_gke/modules/vm-reservations/versions.tf @@ -24,4 +24,3 @@ terraform { } } } - diff --git a/ml-platform/02_gke/outputs.tf b/ml-platform/02_gke/outputs.tf index 76dca95a5..08500e25e 100644 --- a/ml-platform/02_gke/outputs.tf +++ b/ml-platform/02_gke/outputs.tf @@ -14,4 +14,4 @@ output "gke_cluster" { value = module.gke -} \ No newline at end of file +} diff --git a/ml-platform/02_gke/providers.tf b/ml-platform/02_gke/providers.tf index dc628619e..fc374eab1 100644 --- a/ml-platform/02_gke/providers.tf +++ b/ml-platform/02_gke/providers.tf @@ -24,4 +24,3 @@ terraform { } } } - diff --git a/ml-platform/02_gke/variables.tf b/ml-platform/02_gke/variables.tf index 83ccafcb1..05765c043 100644 --- a/ml-platform/02_gke/variables.tf +++ b/ml-platform/02_gke/variables.tf @@ -13,7 +13,7 @@ # limitations under the License. variable "project_id" { - type = map + type = map(any) description = "The GCP project where the resources will be created. It is a map with environments a skeys and project_ids s values" default = {} #Below is an example of not null project_id variable @@ -21,67 +21,76 @@ variable "project_id" { } variable "network_name" { - default = "ml-vpc" + default = "ml-vpc" description = "VPC network where GKE cluster will be created" - type = string + type = string } + variable "routing_mode" { - default = "GLOBAL" + default = "GLOBAL" description = "VPC routing mode." - type = string + type = string } + variable "subnet_01_name" { - default = "ml-vpc-subnet-01" + default = "ml-vpc-subnet-01" description = "Name of the first subnet in the VPC network." - type = string + type = string } + variable "subnet_01_ip" { - default = "10.40.0.0/22" + default = "10.40.0.0/22" description = "CIDR of the first subnet." - type = string + type = string } + variable "subnet_01_region" { - default = "us-central1" + default = "us-central1" description = "Region of the first subnet." - type = string + type = string } + variable "subnet_01_description" { - default = "subnet 01" + default = "subnet 01" description = "Description of the first subnet." - type = string + type = string } variable "subnet_02_name" { - default = "gke-vpc-subnet-02" + default = "gke-vpc-subnet-02" description = "Name of the second subnet in the VPC network." - type = string + type = string } + variable "subnet_02_ip" { - default = "10.12.0.0/22" + default = "10.12.0.0/22" description = "CIDR of the second subnet." - type = string + type = string } + variable "subnet_02_region" { - default = "us-west2" + default = "us-west2" description = "Region of the second subnet." - type = string + type = string } + variable "subnet_02_description" { - default = "subnet 02" + default = "subnet 02" description = "Description of the second subnet." - type = string + type = string } variable "lookup_state_bucket" { description = "GCS bucket to look up TF state from previous steps." - type = string - default = "YOUR_STATE_BUCKET" + type = string + default = "YOUR_STATE_BUCKET" } variable "cluster_name" { description = "Name of the GKE cluster" - default = "gke-ml" - type = string + default = "gke-ml" + type = string } + variable "reserved_taints" { description = "Taints to be applied to the reserved node pool." type = list(object({ @@ -123,4 +132,3 @@ variable "spot_taints" { effect = "NO_SCHEDULE" }] } - diff --git a/ml-platform/03_configsync/main.tf b/ml-platform/03_configsync/main.tf index b8ad93325..671abee5c 100644 --- a/ml-platform/03_configsync/main.tf +++ b/ml-platform/03_configsync/main.tf @@ -15,14 +15,14 @@ data "terraform_remote_state" "gke-clusters" { backend = "gcs" config = { - bucket = var.lookup_state_bucket - prefix = "02_gke" + bucket = var.lookup_state_bucket + prefix = "02_gke" } } locals { parsed_gke_info = data.terraform_remote_state.gke-clusters.outputs.gke_cluster - project_id_list = [for k,v in "${data.terraform_remote_state.gke-clusters.outputs.gke_cluster}" : v.gke_project_id] + project_id_list = [for k, v in "${data.terraform_remote_state.gke-clusters.outputs.gke_cluster}" : v.gke_project_id] } //resource "google_gke_hub_feature" "configmanagement_acm_feature" { @@ -40,20 +40,20 @@ resource "google_gke_hub_membership" "membership" { membership_id = each.value["cluster_name"] endpoint { gke_cluster { - resource_link = format("%s/%s","//container.googleapis.com",each.value["cluster_id"]) + resource_link = format("%s/%s", "//container.googleapis.com", each.value["cluster_id"]) } } lifecycle { ignore_changes = [ - "labels","description" + "labels", "description" ] } #depends_on = [ google_gke_hub_feature.configmanagement_acm_feature ] } resource "github_repository" "acm_repo" { - name = var.configsync_repo_name - description = "Repo for Config Sync" + name = var.configsync_repo_name + description = "Repo for Config Sync" visibility = "private" has_issues = false has_projects = false @@ -63,27 +63,27 @@ resource "github_repository" "acm_repo" { allow_squash_merge = true allow_rebase_merge = true delete_branch_on_merge = false - auto_init = true - vulnerability_alerts = true + auto_init = true + vulnerability_alerts = true } //Create a branch for each env resource "github_branch" "branch" { for_each = local.parsed_gke_info - repository = split("/",github_repository.acm_repo.full_name)[1] + repository = split("/", github_repository.acm_repo.full_name)[1] branch = each.key depends_on = [github_repository.acm_repo] } //Set default branch as the lowest env resource "github_branch_default" "default_branch" { - repository = split("/",github_repository.acm_repo.full_name)[1] + repository = split("/", github_repository.acm_repo.full_name)[1] branch = tostring(keys(local.parsed_gke_info)[0]) #rename = true depends_on = [github_branch.branch] } #Protect branches other than the default branch resource "github_branch_protection_v3" "branch_protection" { - for_each = local.parsed_gke_info - repository = split("/",github_repository.acm_repo.full_name)[1] + for_each = local.parsed_gke_info + repository = split("/", github_repository.acm_repo.full_name)[1] branch = each.key required_pull_request_reviews { required_approving_review_count = 1 @@ -98,7 +98,7 @@ resource "github_branch_protection_v3" "branch_protection" { resource "google_gke_hub_feature_membership" "feature_member" { provider = google-beta - for_each = local.parsed_gke_info + for_each = local.parsed_gke_info project = each.value["gke_project_id"] location = "global" feature = "configmanagement" @@ -108,21 +108,21 @@ resource "google_gke_hub_feature_membership" "feature_member" { config_sync { source_format = "unstructured" git { - sync_repo = "https://github.com/${github_repository.acm_repo.full_name}.git" + sync_repo = "https://github.com/${github_repository.acm_repo.full_name}.git" sync_branch = each.value["env"] policy_dir = "manifests/clusters" secret_type = "token" } } policy_controller { - enabled = true + enabled = true template_library_installed = true - referential_rules_enabled = true + referential_rules_enabled = true } } provisioner "local-exec" { - command = "${path.module}/create_cluster_yamls.sh ${var.github_org} ${github_repository.acm_repo.full_name} ${var.github_user} ${var.github_email} ${each.value["env"]} ${each.value["cluster_name"]} ${index(keys(local.parsed_gke_info),each.key)}" + command = "${path.module}/create_cluster_yamls.sh ${var.github_org} ${github_repository.acm_repo.full_name} ${var.github_user} ${var.github_email} ${each.value["env"]} ${each.value["cluster_name"]} ${index(keys(local.parsed_gke_info), each.key)}" } #depends_on = [ diff --git a/ml-platform/03_configsync/outputs.tf b/ml-platform/03_configsync/outputs.tf index 2e9c6603f..a19b71988 100644 --- a/ml-platform/03_configsync/outputs.tf +++ b/ml-platform/03_configsync/outputs.tf @@ -15,6 +15,7 @@ output "membership" { value = google_gke_hub_membership.membership } -output "val"{ -value = local.parsed_gke_info -} \ No newline at end of file + +output "val" { + value = local.parsed_gke_info +} diff --git a/ml-platform/03_configsync/variables.tf b/ml-platform/03_configsync/variables.tf index 000789e66..f04844d23 100644 --- a/ml-platform/03_configsync/variables.tf +++ b/ml-platform/03_configsync/variables.tf @@ -14,32 +14,35 @@ variable "lookup_state_bucket" { description = "GCS bucket to look up TF state from previous steps." - type = string - default = "YOUR_STATE_BUCKET" + type = string + default = "YOUR_STATE_BUCKET" } variable "configsync_repo_name" { - type = string + type = string description = "Name of the GitHub repo that will be synced to the cluster with Config sync." - default = "config-sync-repo" + default = "config-sync-repo" } variable "github_user" { description = "GitHub user name." - type = string - default = "YOUR_GIT_USER" + type = string + default = "YOUR_GIT_USER" } + variable "github_email" { description = "GitHub user email." - type = string - default = "YOUR_GIT_USER_EMAIL" + type = string + default = "YOUR_GIT_USER_EMAIL" } + variable "github_org" { - type = string + type = string description = "GitHub org." - default = "YOUR_GIT_ORG" + default = "YOUR_GIT_ORG" } + variable "github_token" { - type = string + type = string description = "GitHub token. It is a token with write permissions as it will create a repo in the GitHub org." }