diff --git a/README.md b/README.md index 474b6d6..a14ef1f 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,7 @@ resources that lack official modules. | Name | Version | |------|---------| | [azurerm](#provider\_azurerm) | ~> 3.17 | +| [external](#provider\_external) | n/a | ## Modules @@ -65,6 +66,7 @@ resources that lack official modules. | Name | Type | |------|------| | [azurerm_subscription.current](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/data-sources/subscription) | data source | +| [external_external.az_zones](https://registry.terraform.io/providers/hashicorp/external/latest/docs/data-sources/external) | data source | ## Inputs @@ -79,7 +81,7 @@ resources that lack official modules. | [create\_private\_link](#input\_create\_private\_link) | Use for the azure private link. | `bool` | `false` | no | | [create\_redis](#input\_create\_redis) | Boolean indicating whether to provision an redis instance (true) or not (false). | `bool` | `false` | no | | [database\_availability\_mode](#input\_database\_availability\_mode) | n/a | `string` | `"SameZone"` | no | -| [database\_sku\_name](#input\_database\_sku\_name) | Specifies the SKU Name for this MySQL Server | `string` | `"GP_Standard_D4ds_v4"` | no | +| [database\_sku\_name](#input\_database\_sku\_name) | Specifies the SKU Name for this MySQL Server. Defaults to null and value from deployment-size.tf is used | `string` | `null` | no | | [database\_version](#input\_database\_version) | Version for MySQL | `string` | `"5.7"` | no | | [deletion\_protection](#input\_deletion\_protection) | If the instance should have deletion protection enabled. The database / Bucket can't be deleted when this value is set to `true`. | `bool` | `true` | no | | [disable\_storage\_vault\_key\_id](#input\_disable\_storage\_vault\_key\_id) | Flag to disable the `customer_managed_key` block, the properties 'encryption.identity, encryption.keyvaultproperties' cannot be updated in a single operation. | `bool` | `false` | no | @@ -87,21 +89,22 @@ resources that lack official modules. | [enable\_database\_vault\_key](#input\_enable\_database\_vault\_key) | Flag to enable managed key encryption for the database. Once enabled, cannot be disabled. | `bool` | `false` | no | | [enable\_storage\_vault\_key](#input\_enable\_storage\_vault\_key) | Flag to enable managed key encryption for the storage account. | `bool` | `false` | no | | [external\_bucket](#input\_external\_bucket) | config an external bucket | `any` | `null` | no | -| [kubernetes\_instance\_type](#input\_kubernetes\_instance\_type) | Use for the Kubernetes cluster. | `string` | `"Standard_D4a_v4"` | no | -| [kubernetes\_node\_count](#input\_kubernetes\_node\_count) | n/a | `number` | `2` | no | +| [kubernetes\_instance\_type](#input\_kubernetes\_instance\_type) | Instance type for primary node group. Defaults to null and value from deployment-size.tf is used | `string` | `null` | no | +| [kubernetes\_max\_node\_count](#input\_kubernetes\_max\_node\_count) | Maximum number of nodes for the AKS cluster. Defaults to null and value from deployment-size.tf is used | `number` | `null` | no | +| [kubernetes\_min\_node\_count](#input\_kubernetes\_min\_node\_count) | Minimum number of nodes for the AKS cluster. Defaults to null and value from deployment-size.tf is used | `number` | `null` | no | | [license](#input\_license) | Your wandb/local license | `string` | n/a | yes | | [location](#input\_location) | n/a | `string` | n/a | yes | | [namespace](#input\_namespace) | String used for prefix resources. | `string` | n/a | yes | | [node\_max\_pods](#input\_node\_max\_pods) | Maximum number of pods per node | `number` | `30` | no | -| [node\_pool\_zones](#input\_node\_pool\_zones) | Availability zones for the node pool | `list(string)` |
[| no | +| [node\_pool\_zones](#input\_node\_pool\_zones) | Availability zones for the node pool | `list(string)` | `null` | no | | [oidc\_auth\_method](#input\_oidc\_auth\_method) | OIDC auth method | `string` | `"implicit"` | no | | [oidc\_client\_id](#input\_oidc\_client\_id) | The Client ID of application in your identity provider | `string` | `""` | no | | [oidc\_issuer](#input\_oidc\_issuer) | A url to your Open ID Connect identity provider, i.e. https://cognito-idp.us-east-1.amazonaws.com/us-east-1_uiIFNdacd | `string` | `""` | no | | [oidc\_secret](#input\_oidc\_secret) | The Client secret of application in your identity provider | `string` | `""` | no | | [other\_wandb\_env](#input\_other\_wandb\_env) | Extra environment variables for W&B | `map(any)` | `{}` | no | | [parquet\_wandb\_env](#input\_parquet\_wandb\_env) | Extra environment variables for W&B | `map(string)` | `{}` | no | -| [redis\_capacity](#input\_redis\_capacity) | Number indicating size of an redis instance | `number` | `2` | no | -| [size](#input\_size) | Deployment size | `string` | `null` | no | +| [redis\_capacity](#input\_redis\_capacity) | Number indicating size of an redis instance. Defaults to null and value from deployment-size.tf is used | `number` | `null` | no | +| [size](#input\_size) | Deployment size | `string` | `"small"` | no | | [ssl](#input\_ssl) | Enable SSL certificate | `bool` | `true` | no | | [storage\_account](#input\_storage\_account) | Azure storage account name | `string` | `""` | no | | [storage\_key](#input\_storage\_key) | Azure primary storage access key | `string` | `""` | no | @@ -117,7 +120,8 @@ resources that lack official modules. | Name | Description | |------|-------------| | [address](#output\_address) | n/a | -| [aks\_node\_count](#output\_aks\_node\_count) | n/a | +| [aks\_max\_node\_count](#output\_aks\_max\_node\_count) | n/a | +| [aks\_min\_node\_count](#output\_aks\_min\_node\_count) | n/a | | [aks\_node\_instance\_type](#output\_aks\_node\_instance\_type) | n/a | | [client\_id](#output\_client\_id) | n/a | | [cluster\_ca\_certificate](#output\_cluster\_ca\_certificate) | n/a | diff --git a/deployment-size.tf b/deployment-size.tf index 9d9698e..c3a1055 100644 --- a/deployment-size.tf +++ b/deployment-size.tf @@ -2,34 +2,39 @@ locals { # Specifications for t-shirt sized deployments deployment_size = { small = { - db = "MO_Standard_E2ds_v4", - node_count = 2, - node_instance = "Standard_E4s_v5" - cache = "3" + db = "MO_Standard_E2ds_v4", + min_node_count = 2, + max_node_count = 3, + node_instance = "Standard_E4s_v5" + cache = "3" }, medium = { - db = "MO_Standard_E4ds_v4", - node_count = 2, - node_instance = "Standard_E4s_v5" - cache = "3" + db = "MO_Standard_E4ds_v4", + min_node_count = 2, + max_node_count = 4, + node_instance = "Standard_E4s_v5" + cache = "3" }, large = { - db = "MO_Standard_E8ds_v4", - node_count = 3, - node_instance = "Standard_E8s_v5" - cache = "4" + db = "MO_Standard_E8ds_v4", + min_node_count = 2, + max_node_count = 3, + node_instance = "Standard_E8s_v5" + cache = "4" }, xlarge = { - db = "MO_Standard_E16ds_v4", - node_count = 3, - node_instance = "Standard_E8s_v5" - cache = "4" + db = "MO_Standard_E16ds_v4", + min_node_count = 3, + max_node_count = 4, + node_instance = "Standard_E8s_v5" + cache = "4" }, xxlarge = { - db = "MO_Standard_E32ds_v4", - node_count = 3, - node_instance = "Standard_E16s_v5" - cache = "5" + db = "MO_Standard_E32ds_v4", + min_node_count = 3, + max_node_count = 5, + node_instance = "Standard_E16s_v5" + cache = "5" } } } \ No newline at end of file diff --git a/main.tf b/main.tf index c3005a0..5b816e4 100644 --- a/main.tf +++ b/main.tf @@ -2,6 +2,12 @@ locals { fqdn = var.subdomain == null ? var.domain_name : "${var.subdomain}.${var.domain_name}" url_prefix = var.ssl ? "https" : "http" url = "${local.url_prefix}://${local.fqdn}" + + redis_capacity = coalesce(var.redis_capacity, local.deployment_size[var.size].cache) + database_sku_name = coalesce(var.database_sku_name, local.deployment_size[var.size].db) + kubernetes_instance_type = coalesce(var.kubernetes_instance_type, local.deployment_size[var.size].node_instance) + kubernetes_min_node_count = coalesce(var.kubernetes_min_node_count, local.deployment_size[var.size].min_node_count) + kubernetes_max_node_count = coalesce(var.kubernetes_max_node_count, local.deployment_size[var.size].max_node_count) } resource "azurerm_resource_group" "default" { @@ -40,7 +46,7 @@ module "database" { database_version = var.database_version database_private_dns_zone_id = module.networking.database_private_dns_zone.id database_subnet_id = module.networking.database_subnet.id - sku_name = try(local.deployment_size[var.size].db, var.database_sku_name) + sku_name = local.database_sku_name deletion_protection = var.deletion_protection database_key_id = try(module.vault.vault_internal_keys[module.vault.vault_key_map.database].id, null) @@ -58,7 +64,7 @@ module "redis" { namespace = var.namespace resource_group_name = azurerm_resource_group.default.name location = azurerm_resource_group.default.location - capacity = try(local.deployment_size[var.size].cache, var.redis_capacity) + capacity = local.redis_capacity depends_on = [module.networking] } @@ -107,24 +113,33 @@ module "app_lb" { tags = var.tags } +data "external" "az_zones" { + program = ["bash", "${path.module}/vmtype_to_az.sh", local.kubernetes_instance_type, azurerm_resource_group.default.location] +} + +locals { + node_pool_zones = (var.node_pool_zones == null) ? jsondecode(data.external.az_zones.result.zones) : var.node_pool_zones +} + module "app_aks" { source = "./modules/app_aks" depends_on = [module.app_lb] - cluster_subnet_id = module.networking.private_subnet.id - etcd_key_vault_key_id = module.vault.etcd_key_id - gateway = module.app_lb.gateway - identity = module.identity.identity - location = azurerm_resource_group.default.location - namespace = var.namespace - node_pool_vm_count = try(local.deployment_size[var.size].node_count, var.kubernetes_node_count) - node_pool_vm_size = try(local.deployment_size[var.size].node_instance, var.kubernetes_instance_type) - node_pool_zones = var.node_pool_zones - public_subnet = module.networking.public_subnet - resource_group = azurerm_resource_group.default - sku_tier = var.cluster_sku_tier - max_pods = var.node_max_pods - tags = var.tags + cluster_subnet_id = module.networking.private_subnet.id + etcd_key_vault_key_id = module.vault.etcd_key_id + gateway = module.app_lb.gateway + identity = module.identity.identity + location = azurerm_resource_group.default.location + namespace = var.namespace + node_pool_min_vm_count = local.kubernetes_min_node_count + node_pool_max_vm_count = local.kubernetes_max_node_count + node_pool_vm_size = local.kubernetes_instance_type + node_pool_zones = local.node_pool_zones + public_subnet = module.networking.public_subnet + resource_group = azurerm_resource_group.default + sku_tier = var.cluster_sku_tier + max_pods = var.node_max_pods + tags = var.tags } locals { service_account_name = "wandb-app" @@ -247,7 +262,7 @@ module "wandb" { host = local.url license = var.license cloudProvider = "azure" - bucket = local.bucket_config == null ? { + bucket = local.bucket_config == null ? { provider = "az" name = module.storage[0].account.name path = module.storage[0].container.name diff --git a/modules/app_aks/main.tf b/modules/app_aks/main.tf index 0364275..1858b56 100644 --- a/modules/app_aks/main.tf +++ b/modules/app_aks/main.tf @@ -18,10 +18,12 @@ resource "azurerm_kubernetes_cluster" "default" { } default_node_pool { - enable_auto_scaling = false + enable_auto_scaling = true max_pods = var.max_pods name = "default" - node_count = var.node_pool_vm_count + node_count = var.node_pool_min_vm_count + max_count = var.node_pool_max_vm_count + min_count = var.node_pool_min_vm_count temporary_name_for_rotation = "rotating" type = "VirtualMachineScaleSets" vm_size = var.node_pool_vm_size @@ -57,21 +59,21 @@ locals { } resource "azurerm_role_assignment" "gateway" { - depends_on = [ local.ingress_gateway_principal_id ] + depends_on = [local.ingress_gateway_principal_id] scope = var.gateway.id role_definition_name = "Contributor" principal_id = local.ingress_gateway_principal_id } resource "azurerm_role_assignment" "resource_group" { - depends_on = [ local.ingress_gateway_principal_id ] + depends_on = [local.ingress_gateway_principal_id] scope = var.resource_group.id role_definition_name = "Reader" principal_id = local.ingress_gateway_principal_id } resource "azurerm_role_assignment" "public_subnet" { - depends_on = [ local.ingress_gateway_principal_id ] + depends_on = [local.ingress_gateway_principal_id] scope = var.public_subnet.id role_definition_name = "Contributor" principal_id = local.ingress_gateway_principal_id diff --git a/modules/app_aks/variables.tf b/modules/app_aks/variables.tf index 772fc35..38d26e9 100644 --- a/modules/app_aks/variables.tf +++ b/modules/app_aks/variables.tf @@ -46,7 +46,11 @@ variable "node_pool_vm_size" { type = string } -variable "node_pool_vm_count" { +variable "node_pool_min_vm_count" { + type = number +} + +variable "node_pool_max_vm_count" { type = number } diff --git a/modules/app_lb/main.tf b/modules/app_lb/main.tf index b046b30..518f200 100644 --- a/modules/app_lb/main.tf +++ b/modules/app_lb/main.tf @@ -17,7 +17,7 @@ locals { listener_name = "${var.network.name}-httplstn" request_routing_rule_name = "${var.network.name}-rqrt" redirect_configuration_name = "${var.network.name}-rdrcfg" - app_gateway_name = var.private_link ? "${var.namespace}-ag-private-link" : "${var.namespace}-ag" + app_gateway_name = var.private_link ? "${var.namespace}-ag-private-link" : "${var.namespace}-ag" } diff --git a/modules/app_lb/variables.tf b/modules/app_lb/variables.tf index 01c19f6..4c25a71 100644 --- a/modules/app_lb/variables.tf +++ b/modules/app_lb/variables.tf @@ -39,6 +39,6 @@ variable "private_subnet" { } variable "private_link" { - type = bool + type = bool description = "Specifies the Azure private link creation" } \ No newline at end of file diff --git a/modules/networking/main.tf b/modules/networking/main.tf index a5f2bc5..1528407 100644 --- a/modules/networking/main.tf +++ b/modules/networking/main.tf @@ -9,10 +9,10 @@ resource "azurerm_virtual_network" "default" { } resource "azurerm_subnet" "private" { - name = "${var.namespace}-private" - resource_group_name = var.resource_group_name - address_prefixes = [var.network_private_subnet_cidr] - virtual_network_name = azurerm_virtual_network.default.name + name = "${var.namespace}-private" + resource_group_name = var.resource_group_name + address_prefixes = [var.network_private_subnet_cidr] + virtual_network_name = azurerm_virtual_network.default.name private_link_service_network_policies_enabled = var.private_link ? false : true service_endpoints = concat( diff --git a/modules/networking/variables.tf b/modules/networking/variables.tf index af10679..81735e7 100644 --- a/modules/networking/variables.tf +++ b/modules/networking/variables.tf @@ -56,7 +56,7 @@ variable "tags" { } variable "private_link" { - type = bool + type = bool description = "Private link flag for multi region storage endpoint access" } diff --git a/outputs.tf b/outputs.tf index 3176fb0..08bde10 100644 --- a/outputs.tf +++ b/outputs.tf @@ -45,16 +45,20 @@ output "standardized_size" { value = var.size } -output "aks_node_count" { - value = try(local.deployment_size[var.size].node_count, var.kubernetes_node_count) +output "aks_min_node_count" { + value = local.kubernetes_min_node_count +} + +output "aks_max_node_count" { + value = local.kubernetes_max_node_count } output "aks_node_instance_type" { - value = try(local.deployment_size[var.size].node_instance, var.kubernetes_instance_type) + value = local.kubernetes_instance_type } output "database_instance_type" { - value = try(local.deployment_size[var.size].db, var.database_sku_name) + value = local.database_sku_name } output "client_id" { diff --git a/variables.tf b/variables.tf index a8771c0..c339420 100644 --- a/variables.tf +++ b/variables.tf @@ -29,7 +29,7 @@ variable "use_internal_queue" { } variable "size" { - default = null + default = "small" description = "Deployment size" nullable = true type = string @@ -131,8 +131,8 @@ variable "database_availability_mode" { variable "database_sku_name" { type = string - default = "GP_Standard_D4ds_v4" - description = "Specifies the SKU Name for this MySQL Server" + default = null + description = "Specifies the SKU Name for this MySQL Server. Defaults to null and value from deployment-size.tf is used" } ########################################## @@ -146,8 +146,8 @@ variable "create_redis" { variable "redis_capacity" { type = number - description = "Number indicating size of an redis instance" - default = 2 + description = "Number indicating size of an redis instance. Defaults to null and value from deployment-size.tf is used" + default = null } ########################################## @@ -185,14 +185,21 @@ variable "external_bucket" { # K8s # ########################################## variable "kubernetes_instance_type" { + description = "Instance type for primary node group. Defaults to null and value from deployment-size.tf is used" type = string - description = "Use for the Kubernetes cluster." - default = "Standard_D4a_v4" + default = null } -variable "kubernetes_node_count" { - default = 2 - type = number +variable "kubernetes_min_node_count" { + description = "Minimum number of nodes for the AKS cluster. Defaults to null and value from deployment-size.tf is used" + type = number + default = null +} + +variable "kubernetes_max_node_count" { + description = "Maximum number of nodes for the AKS cluster. Defaults to null and value from deployment-size.tf is used" + type = number + default = null } variable "cluster_sku_tier" { @@ -204,7 +211,7 @@ variable "cluster_sku_tier" { variable "node_pool_zones" { type = list(string) description = "Availability zones for the node pool" - default = ["1", "2"] + default = null } variable "node_max_pods" { diff --git a/vmtype_to_az.sh b/vmtype_to_az.sh new file mode 100755 index 0000000..b69a196 --- /dev/null +++ b/vmtype_to_az.sh @@ -0,0 +1,32 @@ +#! /usr/bin/env bash + +# Given a Azure VM instance type and a region return the availability zones that support the instance type + +# Example: +# ./vmtype_to_az.sh Standard_D2_v3 westeurope +# +# Output: +# ["1", "2", "3"] + +# Copy script arguments to named environment variables +VM_TYPE="$1" +REGION="$2" + +# Check if both arguments are provided +if [ -z "$VM_TYPE" ] || [ -z "$REGION" ]; then + echo "Error: Both VM type and region must be provided." >&2 + echo "Usage: $0
"1",
"2"
]